diff options
author | Stephen Hines <srhines@google.com> | 2014-12-01 14:51:49 -0800 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-12-02 16:08:10 -0800 |
commit | 37ed9c199ca639565f6ce88105f9e39e898d82d0 (patch) | |
tree | 8fb36d3910e3ee4c4e1b7422f4f017108efc52f5 /lib/Target/NVPTX | |
parent | d2327b22152ced7bc46dc629fc908959e8a52d03 (diff) | |
download | external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.zip external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.tar.gz external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.tar.bz2 |
Update aosp/master LLVM for rebase to r222494.
Change-Id: Ic787f5e0124df789bd26f3f24680f45e678eef2d
Diffstat (limited to 'lib/Target/NVPTX')
43 files changed, 7103 insertions, 1059 deletions
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index 4e35b18..3a4a19d 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -9,26 +9,28 @@ tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(NVPTXCommonTableGen) set(NVPTXCodeGen_sources + NVPTXAllocaHoisting.cpp + NVPTXAsmPrinter.cpp + NVPTXAssignValidGlobalNames.cpp NVPTXFavorNonGenericAddrSpaces.cpp NVPTXFrameLowering.cpp - NVPTXInstrInfo.cpp + NVPTXGenericToNVVM.cpp NVPTXISelDAGToDAG.cpp NVPTXISelLowering.cpp + NVPTXImageOptimizer.cpp + NVPTXInstrInfo.cpp + NVPTXLowerAggrCopies.cpp + NVPTXLowerStructArgs.cpp + NVPTXMCExpr.cpp + NVPTXPrologEpilogPass.cpp NVPTXRegisterInfo.cpp + NVPTXReplaceImageHandles.cpp NVPTXSubtarget.cpp NVPTXTargetMachine.cpp - NVPTXLowerAggrCopies.cpp - NVPTXutil.cpp - NVPTXAllocaHoisting.cpp - NVPTXAsmPrinter.cpp + NVPTXTargetTransformInfo.cpp NVPTXUtilities.cpp + NVPTXutil.cpp NVVMReflect.cpp - NVPTXGenericToNVVM.cpp - NVPTXAssignValidGlobalNames.cpp - NVPTXPrologEpilogPass.cpp - NVPTXMCExpr.cpp - NVPTXReplaceImageHandles.cpp - NVPTXImageOptimizer.cpp ) add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources}) diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp index 9618896..80b2f62 100644 --- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp +++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp @@ -57,13 +57,13 @@ void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << "%r"; break; case 4: - OS << "%rl"; + OS << "%rd"; break; case 5: OS << "%f"; break; case 6: - OS << "%fl"; + OS << "%fd"; break; } diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h index 1fb3c57..0496964 100644 --- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h +++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTX_INST_PRINTER_H -#define NVPTX_INST_PRINTER_H +#ifndef LLVM_LIB_TARGET_NVPTX_INSTPRINTER_NVPTXINSTPRINTER_H +#define LLVM_LIB_TARGET_NVPTX_INSTPRINTER_NVPTXINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/NVPTX/LLVMBuild.txt b/lib/Target/NVPTX/LLVMBuild.txt index e805aba..bc8d82e 100644 --- a/lib/Target/NVPTX/LLVMBuild.txt +++ b/lib/Target/NVPTX/LLVMBuild.txt @@ -28,5 +28,5 @@ has_asmprinter = 1 type = Library name = NVPTXCodeGen parent = NVPTX -required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXAsmPrinter NVPTXDesc NVPTXInfo SelectionDAG Support Target +required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXAsmPrinter NVPTXDesc NVPTXInfo Scalar SelectionDAG Support Target add_to_library_groups = NVPTX diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h index ddb122f..a72ae2e 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h @@ -14,8 +14,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTXBASEINFO_H -#define NVPTXBASEINFO_H +#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXBASEINFO_H +#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXBASEINFO_H namespace llvm { @@ -84,6 +84,17 @@ __attribute__((unused)) #endif static const char *NamedMDForAnnotations = "nvvm.annotations"; +namespace NVPTXII { +enum { + // These must be kept in sync with TSFlags in NVPTXInstrFormats.td + IsTexFlag = 0x80, + IsSuldMask = 0x300, + IsSuldShift = 8, + IsSustFlag = 0x400, + IsSurfTexQueryFlag = 0x800, + IsTexModeUnifiedFlag = 0x1000 +}; +} } #endif diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index 366341a..4fd5bdd 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -25,7 +25,7 @@ static cl::opt<bool> CompileForDebugging("debug-compile", void NVPTXMCAsmInfo::anchor() {} -NVPTXMCAsmInfo::NVPTXMCAsmInfo(const StringRef &TT) { +NVPTXMCAsmInfo::NVPTXMCAsmInfo(StringRef TT) { Triple TheTriple(TT); if (TheTriple.getArch() == Triple::nvptx64) { PointerSize = CalleeSaveStackSlotSize = 8; @@ -33,8 +33,6 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const StringRef &TT) { CommentString = "//"; - HasSetDirective = false; - HasSingleParameterDotFile = false; InlineAsmStart = " inline asm"; diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h index 7d1633f..c324286 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTX_MCASM_INFO_H -#define NVPTX_MCASM_INFO_H +#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCASMINFO_H +#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCASMINFO_H #include "llvm/MC/MCAsmInfo.h" @@ -23,8 +23,8 @@ class StringRef; class NVPTXMCAsmInfo : public MCAsmInfo { virtual void anchor(); public: - explicit NVPTXMCAsmInfo(const StringRef &TT); + explicit NVPTXMCAsmInfo(StringRef TT); }; } // namespace llvm -#endif // NVPTX_MCASM_INFO_H +#endif diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h index af95c76..98821d2 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTXMCTARGETDESC_H -#define NVPTXMCTARGETDESC_H +#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H +#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H namespace llvm { class Target; diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h index f9fb059..a2d670f 100644 --- a/lib/Target/NVPTX/ManagedStringPool.h +++ b/lib/Target/NVPTX/ManagedStringPool.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_SUPPORT_MANAGED_STRING_H -#define LLVM_SUPPORT_MANAGED_STRING_H +#ifndef LLVM_LIB_TARGET_NVPTX_MANAGEDSTRINGPOOL_H +#define LLVM_LIB_TARGET_NVPTX_MANAGEDSTRINGPOOL_H #include "llvm/ADT/SmallVector.h" #include <string> diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index e74c808..13ba57e 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_NVPTX_H -#define LLVM_TARGET_NVPTX_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTX_H +#define LLVM_LIB_TARGET_NVPTX_NVPTX_H #include "MCTargetDesc/NVPTXBaseInfo.h" #include "llvm/ADT/StringMap.h" @@ -59,6 +59,7 @@ inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) { llvm_unreachable("Unknown condition code"); } +ImmutablePass *createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM); FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel); ModulePass *createNVPTXAssignValidGlobalNamesPass(); @@ -69,6 +70,7 @@ ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping); MachineFunctionPass *createNVPTXPrologEpilogPass(); MachineFunctionPass *createNVPTXReplaceImageHandlesPass(); FunctionPass *createNVPTXImageOptimizerPass(); +FunctionPass *createNVPTXLowerStructArgsPass(); bool isImageOrSamplerVal(const Value *, const Module *); diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h index 5b61068..69fc86e 100644 --- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTX_ALLOCA_HOISTING_H_ -#define NVPTX_ALLOCA_HOISTING_H_ +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/IR/DataLayout.h" @@ -47,4 +47,4 @@ extern FunctionPass *createAllocaHoisting(); } // end namespace llvm -#endif // NVPTX_ALLOCA_HOISTING_H_ +#endif diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index decf02a..35ba4f1 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -88,12 +88,9 @@ void VisitGlobalVariableForEmission( return; // Do we have a circular dependency? - if (Visiting.count(GV)) + if (!Visiting.insert(GV).second) report_fatal_error("Circular dependency found in global variable set"); - // Start visiting this global - Visiting.insert(GV); - // Make sure we visit all dependents first DenseSet<const GlobalVariable *> Others; for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) @@ -140,7 +137,8 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { // If the code isn't optimized, there may be outstanding folding // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. - if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) + if (Constant *C = ConstantFoldConstantExpression( + CE, AP.TM.getSubtargetImpl()->getDataLayout())) if (C != CE) return LowerConstant(C, AP); @@ -169,7 +167,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { - const DataLayout &TD = *AP.TM.getDataLayout(); + const DataLayout &TD = *AP.TM.getSubtargetImpl()->getDataLayout(); // Generate a symbolic expression for the byte address APInt OffsetAI(TD.getPointerSizeInBits(), 0); cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI); @@ -193,7 +191,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { return LowerConstant(CE->getOperand(0), AP); case Instruction::IntToPtr: { - const DataLayout &TD = *AP.TM.getDataLayout(); + const DataLayout &TD = *AP.TM.getSubtargetImpl()->getDataLayout(); // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); @@ -203,7 +201,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { } case Instruction::PtrToInt: { - const DataLayout &TD = *AP.TM.getDataLayout(); + const DataLayout &TD = *AP.TM.getSubtargetImpl()->getDataLayout(); // Support only foldable casts to/from pointers that can be eliminated by // changing the pointer to the appropriately sized integer type. Constant *Op = CE->getOperand(0); @@ -330,253 +328,51 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI, unsigned OpNo, MCOperand &MCOp) { const MachineOperand &MO = MI->getOperand(OpNo); + const MCInstrDesc &MCID = MI->getDesc(); - switch (MI->getOpcode()) { - default: return false; - case NVPTX::TEX_1D_F32_I32: - case NVPTX::TEX_1D_F32_F32: - case NVPTX::TEX_1D_F32_F32_LEVEL: - case NVPTX::TEX_1D_F32_F32_GRAD: - case NVPTX::TEX_1D_I32_I32: - case NVPTX::TEX_1D_I32_F32: - case NVPTX::TEX_1D_I32_F32_LEVEL: - case NVPTX::TEX_1D_I32_F32_GRAD: - case NVPTX::TEX_1D_ARRAY_F32_I32: - case NVPTX::TEX_1D_ARRAY_F32_F32: - case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL: - case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD: - case NVPTX::TEX_1D_ARRAY_I32_I32: - case NVPTX::TEX_1D_ARRAY_I32_F32: - case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL: - case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD: - case NVPTX::TEX_2D_F32_I32: - case NVPTX::TEX_2D_F32_F32: - case NVPTX::TEX_2D_F32_F32_LEVEL: - case NVPTX::TEX_2D_F32_F32_GRAD: - case NVPTX::TEX_2D_I32_I32: - case NVPTX::TEX_2D_I32_F32: - case NVPTX::TEX_2D_I32_F32_LEVEL: - case NVPTX::TEX_2D_I32_F32_GRAD: - case NVPTX::TEX_2D_ARRAY_F32_I32: - case NVPTX::TEX_2D_ARRAY_F32_F32: - case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL: - case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD: - case NVPTX::TEX_2D_ARRAY_I32_I32: - case NVPTX::TEX_2D_ARRAY_I32_F32: - case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL: - case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD: - case NVPTX::TEX_3D_F32_I32: - case NVPTX::TEX_3D_F32_F32: - case NVPTX::TEX_3D_F32_F32_LEVEL: - case NVPTX::TEX_3D_F32_F32_GRAD: - case NVPTX::TEX_3D_I32_I32: - case NVPTX::TEX_3D_I32_F32: - case NVPTX::TEX_3D_I32_F32_LEVEL: - case NVPTX::TEX_3D_I32_F32_GRAD: - { + if (MCID.TSFlags & NVPTXII::IsTexFlag) { // This is a texture fetch, so operand 4 is a texref and operand 5 is // a samplerref - if (OpNo == 4) { + if (OpNo == 4 && MO.isImm()) { lowerImageHandleSymbol(MO.getImm(), MCOp); return true; } - if (OpNo == 5) { + if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) { lowerImageHandleSymbol(MO.getImm(), MCOp); return true; } return false; - } - case NVPTX::SULD_1D_I8_TRAP: - case NVPTX::SULD_1D_I16_TRAP: - case NVPTX::SULD_1D_I32_TRAP: - case NVPTX::SULD_1D_ARRAY_I8_TRAP: - case NVPTX::SULD_1D_ARRAY_I16_TRAP: - case NVPTX::SULD_1D_ARRAY_I32_TRAP: - case NVPTX::SULD_2D_I8_TRAP: - case NVPTX::SULD_2D_I16_TRAP: - case NVPTX::SULD_2D_I32_TRAP: - case NVPTX::SULD_2D_ARRAY_I8_TRAP: - case NVPTX::SULD_2D_ARRAY_I16_TRAP: - case NVPTX::SULD_2D_ARRAY_I32_TRAP: - case NVPTX::SULD_3D_I8_TRAP: - case NVPTX::SULD_3D_I16_TRAP: - case NVPTX::SULD_3D_I32_TRAP: { - // This is a V1 surface load, so operand 1 is a surfref - if (OpNo == 1) { - lowerImageHandleSymbol(MO.getImm(), MCOp); - return true; - } + } else if (MCID.TSFlags & NVPTXII::IsSuldMask) { + unsigned VecSize = + 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1); - return false; - } - case NVPTX::SULD_1D_V2I8_TRAP: - case NVPTX::SULD_1D_V2I16_TRAP: - case NVPTX::SULD_1D_V2I32_TRAP: - case NVPTX::SULD_1D_ARRAY_V2I8_TRAP: - case NVPTX::SULD_1D_ARRAY_V2I16_TRAP: - case NVPTX::SULD_1D_ARRAY_V2I32_TRAP: - case NVPTX::SULD_2D_V2I8_TRAP: - case NVPTX::SULD_2D_V2I16_TRAP: - case NVPTX::SULD_2D_V2I32_TRAP: - case NVPTX::SULD_2D_ARRAY_V2I8_TRAP: - case NVPTX::SULD_2D_ARRAY_V2I16_TRAP: - case NVPTX::SULD_2D_ARRAY_V2I32_TRAP: - case NVPTX::SULD_3D_V2I8_TRAP: - case NVPTX::SULD_3D_V2I16_TRAP: - case NVPTX::SULD_3D_V2I32_TRAP: { - // This is a V2 surface load, so operand 2 is a surfref - if (OpNo == 2) { + // For a surface load of vector size N, the Nth operand will be the surfref + if (OpNo == VecSize && MO.isImm()) { lowerImageHandleSymbol(MO.getImm(), MCOp); return true; } return false; - } - case NVPTX::SULD_1D_V4I8_TRAP: - case NVPTX::SULD_1D_V4I16_TRAP: - case NVPTX::SULD_1D_V4I32_TRAP: - case NVPTX::SULD_1D_ARRAY_V4I8_TRAP: - case NVPTX::SULD_1D_ARRAY_V4I16_TRAP: - case NVPTX::SULD_1D_ARRAY_V4I32_TRAP: - case NVPTX::SULD_2D_V4I8_TRAP: - case NVPTX::SULD_2D_V4I16_TRAP: - case NVPTX::SULD_2D_V4I32_TRAP: - case NVPTX::SULD_2D_ARRAY_V4I8_TRAP: - case NVPTX::SULD_2D_ARRAY_V4I16_TRAP: - case NVPTX::SULD_2D_ARRAY_V4I32_TRAP: - case NVPTX::SULD_3D_V4I8_TRAP: - case NVPTX::SULD_3D_V4I16_TRAP: - case NVPTX::SULD_3D_V4I32_TRAP: { - // This is a V4 surface load, so operand 4 is a surfref - if (OpNo == 4) { - lowerImageHandleSymbol(MO.getImm(), MCOp); - return true; - } - - return false; - } - case NVPTX::SUST_B_1D_B8_TRAP: - case NVPTX::SUST_B_1D_B16_TRAP: - case NVPTX::SUST_B_1D_B32_TRAP: - case NVPTX::SUST_B_1D_V2B8_TRAP: - case NVPTX::SUST_B_1D_V2B16_TRAP: - case NVPTX::SUST_B_1D_V2B32_TRAP: - case NVPTX::SUST_B_1D_V4B8_TRAP: - case NVPTX::SUST_B_1D_V4B16_TRAP: - case NVPTX::SUST_B_1D_V4B32_TRAP: - case NVPTX::SUST_B_1D_ARRAY_B8_TRAP: - case NVPTX::SUST_B_1D_ARRAY_B16_TRAP: - case NVPTX::SUST_B_1D_ARRAY_B32_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP: - case NVPTX::SUST_B_2D_B8_TRAP: - case NVPTX::SUST_B_2D_B16_TRAP: - case NVPTX::SUST_B_2D_B32_TRAP: - case NVPTX::SUST_B_2D_V2B8_TRAP: - case NVPTX::SUST_B_2D_V2B16_TRAP: - case NVPTX::SUST_B_2D_V2B32_TRAP: - case NVPTX::SUST_B_2D_V4B8_TRAP: - case NVPTX::SUST_B_2D_V4B16_TRAP: - case NVPTX::SUST_B_2D_V4B32_TRAP: - case NVPTX::SUST_B_2D_ARRAY_B8_TRAP: - case NVPTX::SUST_B_2D_ARRAY_B16_TRAP: - case NVPTX::SUST_B_2D_ARRAY_B32_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP: - case NVPTX::SUST_B_3D_B8_TRAP: - case NVPTX::SUST_B_3D_B16_TRAP: - case NVPTX::SUST_B_3D_B32_TRAP: - case NVPTX::SUST_B_3D_V2B8_TRAP: - case NVPTX::SUST_B_3D_V2B16_TRAP: - case NVPTX::SUST_B_3D_V2B32_TRAP: - case NVPTX::SUST_B_3D_V4B8_TRAP: - case NVPTX::SUST_B_3D_V4B16_TRAP: - case NVPTX::SUST_B_3D_V4B32_TRAP: - case NVPTX::SUST_P_1D_B8_TRAP: - case NVPTX::SUST_P_1D_B16_TRAP: - case NVPTX::SUST_P_1D_B32_TRAP: - case NVPTX::SUST_P_1D_V2B8_TRAP: - case NVPTX::SUST_P_1D_V2B16_TRAP: - case NVPTX::SUST_P_1D_V2B32_TRAP: - case NVPTX::SUST_P_1D_V4B8_TRAP: - case NVPTX::SUST_P_1D_V4B16_TRAP: - case NVPTX::SUST_P_1D_V4B32_TRAP: - case NVPTX::SUST_P_1D_ARRAY_B8_TRAP: - case NVPTX::SUST_P_1D_ARRAY_B16_TRAP: - case NVPTX::SUST_P_1D_ARRAY_B32_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP: - case NVPTX::SUST_P_2D_B8_TRAP: - case NVPTX::SUST_P_2D_B16_TRAP: - case NVPTX::SUST_P_2D_B32_TRAP: - case NVPTX::SUST_P_2D_V2B8_TRAP: - case NVPTX::SUST_P_2D_V2B16_TRAP: - case NVPTX::SUST_P_2D_V2B32_TRAP: - case NVPTX::SUST_P_2D_V4B8_TRAP: - case NVPTX::SUST_P_2D_V4B16_TRAP: - case NVPTX::SUST_P_2D_V4B32_TRAP: - case NVPTX::SUST_P_2D_ARRAY_B8_TRAP: - case NVPTX::SUST_P_2D_ARRAY_B16_TRAP: - case NVPTX::SUST_P_2D_ARRAY_B32_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP: - case NVPTX::SUST_P_3D_B8_TRAP: - case NVPTX::SUST_P_3D_B16_TRAP: - case NVPTX::SUST_P_3D_B32_TRAP: - case NVPTX::SUST_P_3D_V2B8_TRAP: - case NVPTX::SUST_P_3D_V2B16_TRAP: - case NVPTX::SUST_P_3D_V2B32_TRAP: - case NVPTX::SUST_P_3D_V4B8_TRAP: - case NVPTX::SUST_P_3D_V4B16_TRAP: - case NVPTX::SUST_P_3D_V4B32_TRAP: { + } else if (MCID.TSFlags & NVPTXII::IsSustFlag) { // This is a surface store, so operand 0 is a surfref - if (OpNo == 0) { + if (OpNo == 0 && MO.isImm()) { lowerImageHandleSymbol(MO.getImm(), MCOp); return true; } return false; - } - case NVPTX::TXQ_CHANNEL_ORDER: - case NVPTX::TXQ_CHANNEL_DATA_TYPE: - case NVPTX::TXQ_WIDTH: - case NVPTX::TXQ_HEIGHT: - case NVPTX::TXQ_DEPTH: - case NVPTX::TXQ_ARRAY_SIZE: - case NVPTX::TXQ_NUM_SAMPLES: - case NVPTX::TXQ_NUM_MIPMAP_LEVELS: - case NVPTX::SUQ_CHANNEL_ORDER: - case NVPTX::SUQ_CHANNEL_DATA_TYPE: - case NVPTX::SUQ_WIDTH: - case NVPTX::SUQ_HEIGHT: - case NVPTX::SUQ_DEPTH: - case NVPTX::SUQ_ARRAY_SIZE: { + } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) { // This is a query, so operand 1 is a surfref/texref - if (OpNo == 1) { + if (OpNo == 1 && MO.isImm()) { lowerImageHandleSymbol(MO.getImm(), MCOp); return true; } return false; } - } + + return false; } void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) { @@ -704,8 +500,8 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { } void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { - const DataLayout *TD = TM.getDataLayout(); - const TargetLowering *TLI = TM.getTargetLowering(); + const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); + const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); Type *Ty = F->getReturnType(); @@ -828,13 +624,14 @@ void NVPTXAsmPrinter::EmitFunctionBodyEnd() { void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { unsigned RegNo = MI->getOperand(0).getReg(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); if (TRI->isVirtualRegister(RegNo)) { OutStreamer.AddComment(Twine("implicit-def: ") + getVirtualRegisterName(RegNo)); } else { - OutStreamer.AddComment(Twine("implicit-def: ") + - TM.getRegisterInfo()->getName(RegNo)); + OutStreamer.AddComment( + Twine("implicit-def: ") + + TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo)); } OutStreamer.AddBlankLine(); } @@ -1155,7 +952,7 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) { const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) .Initialize(OutContext, TM); - Mang = new Mangler(TM.getDataLayout()); + Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout()); // Emit header before any dwarf directives are emitted below. emitHeader(M, OS1); @@ -1356,7 +1153,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, GVar->getName().startswith("nvvm.")) return; - const DataLayout *TD = TM.getDataLayout(); + const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); // GlobalVariables are always constant pointers themselves. const PointerType *PTy = GVar->getType(); @@ -1659,7 +1456,7 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O) { - const DataLayout *TD = TM.getDataLayout(); + const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); // GlobalVariables are always constant pointers themselves. const PointerType *PTy = GVar->getType(); @@ -1780,9 +1577,9 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { } void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { - const DataLayout *TD = TM.getDataLayout(); + const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); const AttributeSet &PAL = F->getAttributes(); - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); Function::const_arg_iterator I, E; unsigned paramIndex = 0; bool first = true; @@ -1973,7 +1770,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( // Map the global virtual register number to a register class specific // virtual register number starting from 1 with that class. - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); //unsigned numRegClasses = TRI->getNumRegClasses(); // Emit the Fake Stack Object @@ -2010,9 +1807,9 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; - // O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n"; + // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n"; // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; - // O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n"; + // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n"; // Emit declaration of the virtual registers or 'physical' registers for // each register class @@ -2113,7 +1910,7 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer) { - const DataLayout *TD = TM.getDataLayout(); + const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); if (isa<UndefValue>(CPV) || CPV->isNullValue()) { int s = TD->getTypeAllocSize(CPV->getType()); @@ -2237,7 +2034,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, AggBuffer *aggBuffer) { - const DataLayout *TD = TM.getDataLayout(); + const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); int Bytes; // Old constants diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index a9f9bdd..83fa5d3 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTXASMPRINTER_H -#define NVPTXASMPRINTER_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXASMPRINTER_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXASMPRINTER_H #include "NVPTX.h" #include "NVPTXSubtarget.h" @@ -86,13 +86,13 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { // Once we have this AggBuffer setup, we can choose how to print // it out. public: - unsigned size; // size of the buffer in bytes - unsigned char *buffer; // the buffer unsigned numSymbols; // number of symbol addresses - SmallVector<unsigned, 4> symbolPosInBuffer; - SmallVector<const Value *, 4> Symbols; private: + const unsigned size; // size of the buffer in bytes + std::vector<unsigned char> buffer; // the buffer + SmallVector<unsigned, 4> symbolPosInBuffer; + SmallVector<const Value *, 4> Symbols; unsigned curpos; raw_ostream &O; NVPTXAsmPrinter &AP; @@ -100,14 +100,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { public: AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP) - : O(_O), AP(_AP) { - buffer = new unsigned char[_size]; - size = _size; + : size(_size), buffer(_size), O(_O), AP(_AP) { curpos = 0; numSymbols = 0; EmitGeneric = AP.EmitGeneric; } - ~AggBuffer() { delete[] buffer; } unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) { assert((curpos + Num) <= size); assert((curpos + Bytes) <= size); @@ -179,9 +176,9 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { else nextSymbolPos = symbolPosInBuffer[nSym]; } else if (nBytes == 4) - O << *(unsigned int *)(buffer + pos); + O << *(unsigned int *)(&buffer[pos]); else - O << *(unsigned long long *)(buffer + pos); + O << *(unsigned long long *)(&buffer[pos]); } } } diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp index 8b088412..314df38 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -48,20 +48,20 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { if (is64bit) { unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int64RegsRegClass); MachineInstr *MI = - BuildMI(MBB, MBBI, dl, - MF.getTarget().getInstrInfo()->get(NVPTX::cvta_local_yes_64), + BuildMI(MBB, MBBI, dl, MF.getSubtarget().getInstrInfo()->get( + NVPTX::cvta_local_yes_64), NVPTX::VRFrame).addReg(LocalReg); BuildMI(MBB, MI, dl, - MF.getTarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64), + MF.getSubtarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64), LocalReg).addImm(MF.getFunctionNumber()); } else { unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int32RegsRegClass); MachineInstr *MI = BuildMI(MBB, MBBI, dl, - MF.getTarget().getInstrInfo()->get(NVPTX::cvta_local_yes), + MF.getSubtarget().getInstrInfo()->get(NVPTX::cvta_local_yes), NVPTX::VRFrame).addReg(LocalReg); BuildMI(MBB, MI, dl, - MF.getTarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR), + MF.getSubtarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR), LocalReg).addImm(MF.getFunctionNumber()); } } diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h index 56fb673..0846b78 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.h +++ b/lib/Target/NVPTX/NVPTXFrameLowering.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTX_FRAMELOWERING_H -#define NVPTX_FRAMELOWERING_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXFRAMELOWERING_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXFRAMELOWERING_H #include "llvm/Target/TargetFrameLowering.h" diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index faa9fdb..58fa95b 100644 --- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -140,20 +140,23 @@ bool GenericToNVVM::runOnModule(Module &M) { for (GVMapTy::iterator I = GVMap.begin(), E = GVMap.end(); I != E;) { GlobalVariable *GV = I->first; GlobalVariable *NewGV = I->second; - ++I; + + // Remove GV from the map so that it can be RAUWed. Note that + // DenseMap::erase() won't invalidate any iterators but this one. + auto Next = std::next(I); + GVMap.erase(I); + I = Next; + Constant *BitCastNewGV = ConstantExpr::getPointerCast(NewGV, GV->getType()); // At this point, the remaining uses of GV should be found only in global // variable initializers, as other uses have been already been removed // while walking through the instructions in function definitions. - for (Value::use_iterator UI = GV->use_begin(), UE = GV->use_end(); - UI != UE;) - (UI++)->set(BitCastNewGV); + GV->replaceAllUsesWith(BitCastNewGV); std::string Name = GV->getName(); - GV->removeDeadConstantUsers(); GV->eraseFromParent(); NewGV->setName(Name); } - GVMap.clear(); + assert(GVMap.empty() && "Expected it to be empty by now"); return true; } diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 0dfbf10..cd0422d 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -24,19 +24,10 @@ using namespace llvm; #define DEBUG_TYPE "nvptx-isel" -unsigned FMAContractLevel = 0; - -static cl::opt<unsigned, true> -FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, - cl::desc("NVPTX Specific: FMA contraction (0: don't do it" - " 1: do it 2: do it aggressively"), - cl::location(FMAContractLevel), - cl::init(2)); - static cl::opt<int> UsePrecDivF32( "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" - " IEEE Compliant F32 div.rnd if avaiable."), + " IEEE Compliant F32 div.rnd if available."), cl::init(2)); static cl::opt<bool> @@ -61,16 +52,6 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, CodeGenOpt::Level OptLevel) : SelectionDAGISel(tm, OptLevel), Subtarget(tm.getSubtarget<NVPTXSubtarget>()) { - - doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1); - doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1); - doFMAF32AGG = - (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2); - doFMAF64AGG = - (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2); - - allowFMA = (FMAContractLevel >= 1); - doMulWide = (OptLevel > 0); } @@ -116,6 +97,11 @@ bool NVPTXDAGToDAGISel::useF32FTZ() const { } } +bool NVPTXDAGToDAGISel::allowFMA() const { + const NVPTXTargetLowering *TL = Subtarget.getTargetLowering(); + return TL->allowFMA(*MF, OptLevel); +} + /// Select - Select instructions not customized! Used for /// expanded, promoted and normal instructions. SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { @@ -170,93 +156,341 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { case ISD::INTRINSIC_W_CHAIN: ResNode = SelectIntrinsicChain(N); break; - case NVPTXISD::Tex1DFloatI32: + case NVPTXISD::Tex1DFloatS32: case NVPTXISD::Tex1DFloatFloat: case NVPTXISD::Tex1DFloatFloatLevel: case NVPTXISD::Tex1DFloatFloatGrad: - case NVPTXISD::Tex1DI32I32: - case NVPTXISD::Tex1DI32Float: - case NVPTXISD::Tex1DI32FloatLevel: - case NVPTXISD::Tex1DI32FloatGrad: - case NVPTXISD::Tex1DArrayFloatI32: + case NVPTXISD::Tex1DS32S32: + case NVPTXISD::Tex1DS32Float: + case NVPTXISD::Tex1DS32FloatLevel: + case NVPTXISD::Tex1DS32FloatGrad: + case NVPTXISD::Tex1DU32S32: + case NVPTXISD::Tex1DU32Float: + case NVPTXISD::Tex1DU32FloatLevel: + case NVPTXISD::Tex1DU32FloatGrad: + case NVPTXISD::Tex1DArrayFloatS32: case NVPTXISD::Tex1DArrayFloatFloat: case NVPTXISD::Tex1DArrayFloatFloatLevel: case NVPTXISD::Tex1DArrayFloatFloatGrad: - case NVPTXISD::Tex1DArrayI32I32: - case NVPTXISD::Tex1DArrayI32Float: - case NVPTXISD::Tex1DArrayI32FloatLevel: - case NVPTXISD::Tex1DArrayI32FloatGrad: - case NVPTXISD::Tex2DFloatI32: + case NVPTXISD::Tex1DArrayS32S32: + case NVPTXISD::Tex1DArrayS32Float: + case NVPTXISD::Tex1DArrayS32FloatLevel: + case NVPTXISD::Tex1DArrayS32FloatGrad: + case NVPTXISD::Tex1DArrayU32S32: + case NVPTXISD::Tex1DArrayU32Float: + case NVPTXISD::Tex1DArrayU32FloatLevel: + case NVPTXISD::Tex1DArrayU32FloatGrad: + case NVPTXISD::Tex2DFloatS32: case NVPTXISD::Tex2DFloatFloat: case NVPTXISD::Tex2DFloatFloatLevel: case NVPTXISD::Tex2DFloatFloatGrad: - case NVPTXISD::Tex2DI32I32: - case NVPTXISD::Tex2DI32Float: - case NVPTXISD::Tex2DI32FloatLevel: - case NVPTXISD::Tex2DI32FloatGrad: - case NVPTXISD::Tex2DArrayFloatI32: + case NVPTXISD::Tex2DS32S32: + case NVPTXISD::Tex2DS32Float: + case NVPTXISD::Tex2DS32FloatLevel: + case NVPTXISD::Tex2DS32FloatGrad: + case NVPTXISD::Tex2DU32S32: + case NVPTXISD::Tex2DU32Float: + case NVPTXISD::Tex2DU32FloatLevel: + case NVPTXISD::Tex2DU32FloatGrad: + case NVPTXISD::Tex2DArrayFloatS32: case NVPTXISD::Tex2DArrayFloatFloat: case NVPTXISD::Tex2DArrayFloatFloatLevel: case NVPTXISD::Tex2DArrayFloatFloatGrad: - case NVPTXISD::Tex2DArrayI32I32: - case NVPTXISD::Tex2DArrayI32Float: - case NVPTXISD::Tex2DArrayI32FloatLevel: - case NVPTXISD::Tex2DArrayI32FloatGrad: - case NVPTXISD::Tex3DFloatI32: + case NVPTXISD::Tex2DArrayS32S32: + case NVPTXISD::Tex2DArrayS32Float: + case NVPTXISD::Tex2DArrayS32FloatLevel: + case NVPTXISD::Tex2DArrayS32FloatGrad: + case NVPTXISD::Tex2DArrayU32S32: + case NVPTXISD::Tex2DArrayU32Float: + case NVPTXISD::Tex2DArrayU32FloatLevel: + case NVPTXISD::Tex2DArrayU32FloatGrad: + case NVPTXISD::Tex3DFloatS32: case NVPTXISD::Tex3DFloatFloat: case NVPTXISD::Tex3DFloatFloatLevel: case NVPTXISD::Tex3DFloatFloatGrad: - case NVPTXISD::Tex3DI32I32: - case NVPTXISD::Tex3DI32Float: - case NVPTXISD::Tex3DI32FloatLevel: - case NVPTXISD::Tex3DI32FloatGrad: + case NVPTXISD::Tex3DS32S32: + case NVPTXISD::Tex3DS32Float: + case NVPTXISD::Tex3DS32FloatLevel: + case NVPTXISD::Tex3DS32FloatGrad: + case NVPTXISD::Tex3DU32S32: + case NVPTXISD::Tex3DU32Float: + case NVPTXISD::Tex3DU32FloatLevel: + case NVPTXISD::Tex3DU32FloatGrad: + case NVPTXISD::TexCubeFloatFloat: + case NVPTXISD::TexCubeFloatFloatLevel: + case NVPTXISD::TexCubeS32Float: + case NVPTXISD::TexCubeS32FloatLevel: + case NVPTXISD::TexCubeU32Float: + case NVPTXISD::TexCubeU32FloatLevel: + case NVPTXISD::TexCubeArrayFloatFloat: + case NVPTXISD::TexCubeArrayFloatFloatLevel: + case NVPTXISD::TexCubeArrayS32Float: + case NVPTXISD::TexCubeArrayS32FloatLevel: + case NVPTXISD::TexCubeArrayU32Float: + case NVPTXISD::TexCubeArrayU32FloatLevel: + case NVPTXISD::Tld4R2DFloatFloat: + case NVPTXISD::Tld4G2DFloatFloat: + case NVPTXISD::Tld4B2DFloatFloat: + case NVPTXISD::Tld4A2DFloatFloat: + case NVPTXISD::Tld4R2DS64Float: + case NVPTXISD::Tld4G2DS64Float: + case NVPTXISD::Tld4B2DS64Float: + case NVPTXISD::Tld4A2DS64Float: + case NVPTXISD::Tld4R2DU64Float: + case NVPTXISD::Tld4G2DU64Float: + case NVPTXISD::Tld4B2DU64Float: + case NVPTXISD::Tld4A2DU64Float: + case NVPTXISD::TexUnified1DFloatS32: + case NVPTXISD::TexUnified1DFloatFloat: + case NVPTXISD::TexUnified1DFloatFloatLevel: + case NVPTXISD::TexUnified1DFloatFloatGrad: + case NVPTXISD::TexUnified1DS32S32: + case NVPTXISD::TexUnified1DS32Float: + case NVPTXISD::TexUnified1DS32FloatLevel: + case NVPTXISD::TexUnified1DS32FloatGrad: + case NVPTXISD::TexUnified1DU32S32: + case NVPTXISD::TexUnified1DU32Float: + case NVPTXISD::TexUnified1DU32FloatLevel: + case NVPTXISD::TexUnified1DU32FloatGrad: + case NVPTXISD::TexUnified1DArrayFloatS32: + case NVPTXISD::TexUnified1DArrayFloatFloat: + case NVPTXISD::TexUnified1DArrayFloatFloatLevel: + case NVPTXISD::TexUnified1DArrayFloatFloatGrad: + case NVPTXISD::TexUnified1DArrayS32S32: + case NVPTXISD::TexUnified1DArrayS32Float: + case NVPTXISD::TexUnified1DArrayS32FloatLevel: + case NVPTXISD::TexUnified1DArrayS32FloatGrad: + case NVPTXISD::TexUnified1DArrayU32S32: + case NVPTXISD::TexUnified1DArrayU32Float: + case NVPTXISD::TexUnified1DArrayU32FloatLevel: + case NVPTXISD::TexUnified1DArrayU32FloatGrad: + case NVPTXISD::TexUnified2DFloatS32: + case NVPTXISD::TexUnified2DFloatFloat: + case NVPTXISD::TexUnified2DFloatFloatLevel: + case NVPTXISD::TexUnified2DFloatFloatGrad: + case NVPTXISD::TexUnified2DS32S32: + case NVPTXISD::TexUnified2DS32Float: + case NVPTXISD::TexUnified2DS32FloatLevel: + case NVPTXISD::TexUnified2DS32FloatGrad: + case NVPTXISD::TexUnified2DU32S32: + case NVPTXISD::TexUnified2DU32Float: + case NVPTXISD::TexUnified2DU32FloatLevel: + case NVPTXISD::TexUnified2DU32FloatGrad: + case NVPTXISD::TexUnified2DArrayFloatS32: + case NVPTXISD::TexUnified2DArrayFloatFloat: + case NVPTXISD::TexUnified2DArrayFloatFloatLevel: + case NVPTXISD::TexUnified2DArrayFloatFloatGrad: + case NVPTXISD::TexUnified2DArrayS32S32: + case NVPTXISD::TexUnified2DArrayS32Float: + case NVPTXISD::TexUnified2DArrayS32FloatLevel: + case NVPTXISD::TexUnified2DArrayS32FloatGrad: + case NVPTXISD::TexUnified2DArrayU32S32: + case NVPTXISD::TexUnified2DArrayU32Float: + case NVPTXISD::TexUnified2DArrayU32FloatLevel: + case NVPTXISD::TexUnified2DArrayU32FloatGrad: + case NVPTXISD::TexUnified3DFloatS32: + case NVPTXISD::TexUnified3DFloatFloat: + case NVPTXISD::TexUnified3DFloatFloatLevel: + case NVPTXISD::TexUnified3DFloatFloatGrad: + case NVPTXISD::TexUnified3DS32S32: + case NVPTXISD::TexUnified3DS32Float: + case NVPTXISD::TexUnified3DS32FloatLevel: + case NVPTXISD::TexUnified3DS32FloatGrad: + case NVPTXISD::TexUnified3DU32S32: + case NVPTXISD::TexUnified3DU32Float: + case NVPTXISD::TexUnified3DU32FloatLevel: + case NVPTXISD::TexUnified3DU32FloatGrad: + case NVPTXISD::TexUnifiedCubeFloatFloat: + case NVPTXISD::TexUnifiedCubeFloatFloatLevel: + case NVPTXISD::TexUnifiedCubeS32Float: + case NVPTXISD::TexUnifiedCubeS32FloatLevel: + case NVPTXISD::TexUnifiedCubeU32Float: + case NVPTXISD::TexUnifiedCubeU32FloatLevel: + case NVPTXISD::TexUnifiedCubeArrayFloatFloat: + case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: + case NVPTXISD::TexUnifiedCubeArrayS32Float: + case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: + case NVPTXISD::TexUnifiedCubeArrayU32Float: + case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: + case NVPTXISD::Tld4UnifiedR2DFloatFloat: + case NVPTXISD::Tld4UnifiedG2DFloatFloat: + case NVPTXISD::Tld4UnifiedB2DFloatFloat: + case NVPTXISD::Tld4UnifiedA2DFloatFloat: + case NVPTXISD::Tld4UnifiedR2DS64Float: + case NVPTXISD::Tld4UnifiedG2DS64Float: + case NVPTXISD::Tld4UnifiedB2DS64Float: + case NVPTXISD::Tld4UnifiedA2DS64Float: + case NVPTXISD::Tld4UnifiedR2DU64Float: + case NVPTXISD::Tld4UnifiedG2DU64Float: + case NVPTXISD::Tld4UnifiedB2DU64Float: + case NVPTXISD::Tld4UnifiedA2DU64Float: ResNode = SelectTextureIntrinsic(N); break; + case NVPTXISD::Suld1DI8Clamp: + case NVPTXISD::Suld1DI16Clamp: + case NVPTXISD::Suld1DI32Clamp: + case NVPTXISD::Suld1DI64Clamp: + case NVPTXISD::Suld1DV2I8Clamp: + case NVPTXISD::Suld1DV2I16Clamp: + case NVPTXISD::Suld1DV2I32Clamp: + case NVPTXISD::Suld1DV2I64Clamp: + case NVPTXISD::Suld1DV4I8Clamp: + case NVPTXISD::Suld1DV4I16Clamp: + case NVPTXISD::Suld1DV4I32Clamp: + case NVPTXISD::Suld1DArrayI8Clamp: + case NVPTXISD::Suld1DArrayI16Clamp: + case NVPTXISD::Suld1DArrayI32Clamp: + case NVPTXISD::Suld1DArrayI64Clamp: + case NVPTXISD::Suld1DArrayV2I8Clamp: + case NVPTXISD::Suld1DArrayV2I16Clamp: + case NVPTXISD::Suld1DArrayV2I32Clamp: + case NVPTXISD::Suld1DArrayV2I64Clamp: + case NVPTXISD::Suld1DArrayV4I8Clamp: + case NVPTXISD::Suld1DArrayV4I16Clamp: + case NVPTXISD::Suld1DArrayV4I32Clamp: + case NVPTXISD::Suld2DI8Clamp: + case NVPTXISD::Suld2DI16Clamp: + case NVPTXISD::Suld2DI32Clamp: + case NVPTXISD::Suld2DI64Clamp: + case NVPTXISD::Suld2DV2I8Clamp: + case NVPTXISD::Suld2DV2I16Clamp: + case NVPTXISD::Suld2DV2I32Clamp: + case NVPTXISD::Suld2DV2I64Clamp: + case NVPTXISD::Suld2DV4I8Clamp: + case NVPTXISD::Suld2DV4I16Clamp: + case NVPTXISD::Suld2DV4I32Clamp: + case NVPTXISD::Suld2DArrayI8Clamp: + case NVPTXISD::Suld2DArrayI16Clamp: + case NVPTXISD::Suld2DArrayI32Clamp: + case NVPTXISD::Suld2DArrayI64Clamp: + case NVPTXISD::Suld2DArrayV2I8Clamp: + case NVPTXISD::Suld2DArrayV2I16Clamp: + case NVPTXISD::Suld2DArrayV2I32Clamp: + case NVPTXISD::Suld2DArrayV2I64Clamp: + case NVPTXISD::Suld2DArrayV4I8Clamp: + case NVPTXISD::Suld2DArrayV4I16Clamp: + case NVPTXISD::Suld2DArrayV4I32Clamp: + case NVPTXISD::Suld3DI8Clamp: + case NVPTXISD::Suld3DI16Clamp: + case NVPTXISD::Suld3DI32Clamp: + case NVPTXISD::Suld3DI64Clamp: + case NVPTXISD::Suld3DV2I8Clamp: + case NVPTXISD::Suld3DV2I16Clamp: + case NVPTXISD::Suld3DV2I32Clamp: + case NVPTXISD::Suld3DV2I64Clamp: + case NVPTXISD::Suld3DV4I8Clamp: + case NVPTXISD::Suld3DV4I16Clamp: + case NVPTXISD::Suld3DV4I32Clamp: case NVPTXISD::Suld1DI8Trap: case NVPTXISD::Suld1DI16Trap: case NVPTXISD::Suld1DI32Trap: + case NVPTXISD::Suld1DI64Trap: case NVPTXISD::Suld1DV2I8Trap: case NVPTXISD::Suld1DV2I16Trap: case NVPTXISD::Suld1DV2I32Trap: + case NVPTXISD::Suld1DV2I64Trap: case NVPTXISD::Suld1DV4I8Trap: case NVPTXISD::Suld1DV4I16Trap: case NVPTXISD::Suld1DV4I32Trap: case NVPTXISD::Suld1DArrayI8Trap: case NVPTXISD::Suld1DArrayI16Trap: case NVPTXISD::Suld1DArrayI32Trap: + case NVPTXISD::Suld1DArrayI64Trap: case NVPTXISD::Suld1DArrayV2I8Trap: case NVPTXISD::Suld1DArrayV2I16Trap: case NVPTXISD::Suld1DArrayV2I32Trap: + case NVPTXISD::Suld1DArrayV2I64Trap: case NVPTXISD::Suld1DArrayV4I8Trap: case NVPTXISD::Suld1DArrayV4I16Trap: case NVPTXISD::Suld1DArrayV4I32Trap: case NVPTXISD::Suld2DI8Trap: case NVPTXISD::Suld2DI16Trap: case NVPTXISD::Suld2DI32Trap: + case NVPTXISD::Suld2DI64Trap: case NVPTXISD::Suld2DV2I8Trap: case NVPTXISD::Suld2DV2I16Trap: case NVPTXISD::Suld2DV2I32Trap: + case NVPTXISD::Suld2DV2I64Trap: case NVPTXISD::Suld2DV4I8Trap: case NVPTXISD::Suld2DV4I16Trap: case NVPTXISD::Suld2DV4I32Trap: case NVPTXISD::Suld2DArrayI8Trap: case NVPTXISD::Suld2DArrayI16Trap: case NVPTXISD::Suld2DArrayI32Trap: + case NVPTXISD::Suld2DArrayI64Trap: case NVPTXISD::Suld2DArrayV2I8Trap: case NVPTXISD::Suld2DArrayV2I16Trap: case NVPTXISD::Suld2DArrayV2I32Trap: + case NVPTXISD::Suld2DArrayV2I64Trap: case NVPTXISD::Suld2DArrayV4I8Trap: case NVPTXISD::Suld2DArrayV4I16Trap: case NVPTXISD::Suld2DArrayV4I32Trap: case NVPTXISD::Suld3DI8Trap: case NVPTXISD::Suld3DI16Trap: case NVPTXISD::Suld3DI32Trap: + case NVPTXISD::Suld3DI64Trap: case NVPTXISD::Suld3DV2I8Trap: case NVPTXISD::Suld3DV2I16Trap: case NVPTXISD::Suld3DV2I32Trap: + case NVPTXISD::Suld3DV2I64Trap: case NVPTXISD::Suld3DV4I8Trap: case NVPTXISD::Suld3DV4I16Trap: case NVPTXISD::Suld3DV4I32Trap: + case NVPTXISD::Suld1DI8Zero: + case NVPTXISD::Suld1DI16Zero: + case NVPTXISD::Suld1DI32Zero: + case NVPTXISD::Suld1DI64Zero: + case NVPTXISD::Suld1DV2I8Zero: + case NVPTXISD::Suld1DV2I16Zero: + case NVPTXISD::Suld1DV2I32Zero: + case NVPTXISD::Suld1DV2I64Zero: + case NVPTXISD::Suld1DV4I8Zero: + case NVPTXISD::Suld1DV4I16Zero: + case NVPTXISD::Suld1DV4I32Zero: + case NVPTXISD::Suld1DArrayI8Zero: + case NVPTXISD::Suld1DArrayI16Zero: + case NVPTXISD::Suld1DArrayI32Zero: + case NVPTXISD::Suld1DArrayI64Zero: + case NVPTXISD::Suld1DArrayV2I8Zero: + case NVPTXISD::Suld1DArrayV2I16Zero: + case NVPTXISD::Suld1DArrayV2I32Zero: + case NVPTXISD::Suld1DArrayV2I64Zero: + case NVPTXISD::Suld1DArrayV4I8Zero: + case NVPTXISD::Suld1DArrayV4I16Zero: + case NVPTXISD::Suld1DArrayV4I32Zero: + case NVPTXISD::Suld2DI8Zero: + case NVPTXISD::Suld2DI16Zero: + case NVPTXISD::Suld2DI32Zero: + case NVPTXISD::Suld2DI64Zero: + case NVPTXISD::Suld2DV2I8Zero: + case NVPTXISD::Suld2DV2I16Zero: + case NVPTXISD::Suld2DV2I32Zero: + case NVPTXISD::Suld2DV2I64Zero: + case NVPTXISD::Suld2DV4I8Zero: + case NVPTXISD::Suld2DV4I16Zero: + case NVPTXISD::Suld2DV4I32Zero: + case NVPTXISD::Suld2DArrayI8Zero: + case NVPTXISD::Suld2DArrayI16Zero: + case NVPTXISD::Suld2DArrayI32Zero: + case NVPTXISD::Suld2DArrayI64Zero: + case NVPTXISD::Suld2DArrayV2I8Zero: + case NVPTXISD::Suld2DArrayV2I16Zero: + case NVPTXISD::Suld2DArrayV2I32Zero: + case NVPTXISD::Suld2DArrayV2I64Zero: + case NVPTXISD::Suld2DArrayV4I8Zero: + case NVPTXISD::Suld2DArrayV4I16Zero: + case NVPTXISD::Suld2DArrayV4I32Zero: + case NVPTXISD::Suld3DI8Zero: + case NVPTXISD::Suld3DI16Zero: + case NVPTXISD::Suld3DI32Zero: + case NVPTXISD::Suld3DI64Zero: + case NVPTXISD::Suld3DV2I8Zero: + case NVPTXISD::Suld3DV2I16Zero: + case NVPTXISD::Suld3DV2I32Zero: + case NVPTXISD::Suld3DV2I64Zero: + case NVPTXISD::Suld3DV4I8Zero: + case NVPTXISD::Suld3DV4I16Zero: + case NVPTXISD::Suld3DV4I32Zero: ResNode = SelectSurfaceIntrinsic(N); break; case ISD::AND: @@ -2781,16 +3015,14 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) { SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) { SDValue Chain = N->getOperand(0); - SDValue TexRef = N->getOperand(1); - SDValue SampRef = N->getOperand(2); SDNode *Ret = nullptr; unsigned Opc = 0; SmallVector<SDValue, 8> Ops; switch (N->getOpcode()) { default: return nullptr; - case NVPTXISD::Tex1DFloatI32: - Opc = NVPTX::TEX_1D_F32_I32; + case NVPTXISD::Tex1DFloatS32: + Opc = NVPTX::TEX_1D_F32_S32; break; case NVPTXISD::Tex1DFloatFloat: Opc = NVPTX::TEX_1D_F32_F32; @@ -2801,20 +3033,32 @@ SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) { case NVPTXISD::Tex1DFloatFloatGrad: Opc = NVPTX::TEX_1D_F32_F32_GRAD; break; - case NVPTXISD::Tex1DI32I32: - Opc = NVPTX::TEX_1D_I32_I32; + case NVPTXISD::Tex1DS32S32: + Opc = NVPTX::TEX_1D_S32_S32; + break; + case NVPTXISD::Tex1DS32Float: + Opc = NVPTX::TEX_1D_S32_F32; + break; + case NVPTXISD::Tex1DS32FloatLevel: + Opc = NVPTX::TEX_1D_S32_F32_LEVEL; break; - case NVPTXISD::Tex1DI32Float: - Opc = NVPTX::TEX_1D_I32_F32; + case NVPTXISD::Tex1DS32FloatGrad: + Opc = NVPTX::TEX_1D_S32_F32_GRAD; break; - case NVPTXISD::Tex1DI32FloatLevel: - Opc = NVPTX::TEX_1D_I32_F32_LEVEL; + case NVPTXISD::Tex1DU32S32: + Opc = NVPTX::TEX_1D_U32_S32; break; - case NVPTXISD::Tex1DI32FloatGrad: - Opc = NVPTX::TEX_1D_I32_F32_GRAD; + case NVPTXISD::Tex1DU32Float: + Opc = NVPTX::TEX_1D_U32_F32; break; - case NVPTXISD::Tex1DArrayFloatI32: - Opc = NVPTX::TEX_1D_ARRAY_F32_I32; + case NVPTXISD::Tex1DU32FloatLevel: + Opc = NVPTX::TEX_1D_U32_F32_LEVEL; + break; + case NVPTXISD::Tex1DU32FloatGrad: + Opc = NVPTX::TEX_1D_U32_F32_GRAD; + break; + case NVPTXISD::Tex1DArrayFloatS32: + Opc = NVPTX::TEX_1D_ARRAY_F32_S32; break; case NVPTXISD::Tex1DArrayFloatFloat: Opc = NVPTX::TEX_1D_ARRAY_F32_F32; @@ -2825,20 +3069,32 @@ SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) { case NVPTXISD::Tex1DArrayFloatFloatGrad: Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD; break; - case NVPTXISD::Tex1DArrayI32I32: - Opc = NVPTX::TEX_1D_ARRAY_I32_I32; + case NVPTXISD::Tex1DArrayS32S32: + Opc = NVPTX::TEX_1D_ARRAY_S32_S32; + break; + case NVPTXISD::Tex1DArrayS32Float: + Opc = NVPTX::TEX_1D_ARRAY_S32_F32; break; - case NVPTXISD::Tex1DArrayI32Float: - Opc = NVPTX::TEX_1D_ARRAY_I32_F32; + case NVPTXISD::Tex1DArrayS32FloatLevel: + Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL; break; - case NVPTXISD::Tex1DArrayI32FloatLevel: - Opc = NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL; + case NVPTXISD::Tex1DArrayS32FloatGrad: + Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD; break; - case NVPTXISD::Tex1DArrayI32FloatGrad: - Opc = NVPTX::TEX_1D_ARRAY_I32_F32_GRAD; + case NVPTXISD::Tex1DArrayU32S32: + Opc = NVPTX::TEX_1D_ARRAY_U32_S32; break; - case NVPTXISD::Tex2DFloatI32: - Opc = NVPTX::TEX_2D_F32_I32; + case NVPTXISD::Tex1DArrayU32Float: + Opc = NVPTX::TEX_1D_ARRAY_U32_F32; + break; + case NVPTXISD::Tex1DArrayU32FloatLevel: + Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL; + break; + case NVPTXISD::Tex1DArrayU32FloatGrad: + Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD; + break; + case NVPTXISD::Tex2DFloatS32: + Opc = NVPTX::TEX_2D_F32_S32; break; case NVPTXISD::Tex2DFloatFloat: Opc = NVPTX::TEX_2D_F32_F32; @@ -2849,20 +3105,32 @@ SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) { case NVPTXISD::Tex2DFloatFloatGrad: Opc = NVPTX::TEX_2D_F32_F32_GRAD; break; - case NVPTXISD::Tex2DI32I32: - Opc = NVPTX::TEX_2D_I32_I32; + case NVPTXISD::Tex2DS32S32: + Opc = NVPTX::TEX_2D_S32_S32; + break; + case NVPTXISD::Tex2DS32Float: + Opc = NVPTX::TEX_2D_S32_F32; + break; + case NVPTXISD::Tex2DS32FloatLevel: + Opc = NVPTX::TEX_2D_S32_F32_LEVEL; + break; + case NVPTXISD::Tex2DS32FloatGrad: + Opc = NVPTX::TEX_2D_S32_F32_GRAD; break; - case NVPTXISD::Tex2DI32Float: - Opc = NVPTX::TEX_2D_I32_F32; + case NVPTXISD::Tex2DU32S32: + Opc = NVPTX::TEX_2D_U32_S32; break; - case NVPTXISD::Tex2DI32FloatLevel: - Opc = NVPTX::TEX_2D_I32_F32_LEVEL; + case NVPTXISD::Tex2DU32Float: + Opc = NVPTX::TEX_2D_U32_F32; break; - case NVPTXISD::Tex2DI32FloatGrad: - Opc = NVPTX::TEX_2D_I32_F32_GRAD; + case NVPTXISD::Tex2DU32FloatLevel: + Opc = NVPTX::TEX_2D_U32_F32_LEVEL; break; - case NVPTXISD::Tex2DArrayFloatI32: - Opc = NVPTX::TEX_2D_ARRAY_F32_I32; + case NVPTXISD::Tex2DU32FloatGrad: + Opc = NVPTX::TEX_2D_U32_F32_GRAD; + break; + case NVPTXISD::Tex2DArrayFloatS32: + Opc = NVPTX::TEX_2D_ARRAY_F32_S32; break; case NVPTXISD::Tex2DArrayFloatFloat: Opc = NVPTX::TEX_2D_ARRAY_F32_F32; @@ -2873,20 +3141,32 @@ SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) { case NVPTXISD::Tex2DArrayFloatFloatGrad: Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD; break; - case NVPTXISD::Tex2DArrayI32I32: - Opc = NVPTX::TEX_2D_ARRAY_I32_I32; + case NVPTXISD::Tex2DArrayS32S32: + Opc = NVPTX::TEX_2D_ARRAY_S32_S32; + break; + case NVPTXISD::Tex2DArrayS32Float: + Opc = NVPTX::TEX_2D_ARRAY_S32_F32; + break; + case NVPTXISD::Tex2DArrayS32FloatLevel: + Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL; break; - case NVPTXISD::Tex2DArrayI32Float: - Opc = NVPTX::TEX_2D_ARRAY_I32_F32; + case NVPTXISD::Tex2DArrayS32FloatGrad: + Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD; break; - case NVPTXISD::Tex2DArrayI32FloatLevel: - Opc = NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL; + case NVPTXISD::Tex2DArrayU32S32: + Opc = NVPTX::TEX_2D_ARRAY_U32_S32; break; - case NVPTXISD::Tex2DArrayI32FloatGrad: - Opc = NVPTX::TEX_2D_ARRAY_I32_F32_GRAD; + case NVPTXISD::Tex2DArrayU32Float: + Opc = NVPTX::TEX_2D_ARRAY_U32_F32; break; - case NVPTXISD::Tex3DFloatI32: - Opc = NVPTX::TEX_3D_F32_I32; + case NVPTXISD::Tex2DArrayU32FloatLevel: + Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL; + break; + case NVPTXISD::Tex2DArrayU32FloatGrad: + Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD; + break; + case NVPTXISD::Tex3DFloatS32: + Opc = NVPTX::TEX_3D_F32_S32; break; case NVPTXISD::Tex3DFloatFloat: Opc = NVPTX::TEX_3D_F32_F32; @@ -2897,25 +3177,358 @@ SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) { case NVPTXISD::Tex3DFloatFloatGrad: Opc = NVPTX::TEX_3D_F32_F32_GRAD; break; - case NVPTXISD::Tex3DI32I32: - Opc = NVPTX::TEX_3D_I32_I32; + case NVPTXISD::Tex3DS32S32: + Opc = NVPTX::TEX_3D_S32_S32; + break; + case NVPTXISD::Tex3DS32Float: + Opc = NVPTX::TEX_3D_S32_F32; + break; + case NVPTXISD::Tex3DS32FloatLevel: + Opc = NVPTX::TEX_3D_S32_F32_LEVEL; + break; + case NVPTXISD::Tex3DS32FloatGrad: + Opc = NVPTX::TEX_3D_S32_F32_GRAD; + break; + case NVPTXISD::Tex3DU32S32: + Opc = NVPTX::TEX_3D_U32_S32; + break; + case NVPTXISD::Tex3DU32Float: + Opc = NVPTX::TEX_3D_U32_F32; + break; + case NVPTXISD::Tex3DU32FloatLevel: + Opc = NVPTX::TEX_3D_U32_F32_LEVEL; + break; + case NVPTXISD::Tex3DU32FloatGrad: + Opc = NVPTX::TEX_3D_U32_F32_GRAD; + break; + case NVPTXISD::TexCubeFloatFloat: + Opc = NVPTX::TEX_CUBE_F32_F32; + break; + case NVPTXISD::TexCubeFloatFloatLevel: + Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL; + break; + case NVPTXISD::TexCubeS32Float: + Opc = NVPTX::TEX_CUBE_S32_F32; + break; + case NVPTXISD::TexCubeS32FloatLevel: + Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL; + break; + case NVPTXISD::TexCubeU32Float: + Opc = NVPTX::TEX_CUBE_U32_F32; + break; + case NVPTXISD::TexCubeU32FloatLevel: + Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL; + break; + case NVPTXISD::TexCubeArrayFloatFloat: + Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32; + break; + case NVPTXISD::TexCubeArrayFloatFloatLevel: + Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL; + break; + case NVPTXISD::TexCubeArrayS32Float: + Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32; + break; + case NVPTXISD::TexCubeArrayS32FloatLevel: + Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL; + break; + case NVPTXISD::TexCubeArrayU32Float: + Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32; + break; + case NVPTXISD::TexCubeArrayU32FloatLevel: + Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL; + break; + case NVPTXISD::Tld4R2DFloatFloat: + Opc = NVPTX::TLD4_R_2D_F32_F32; + break; + case NVPTXISD::Tld4G2DFloatFloat: + Opc = NVPTX::TLD4_G_2D_F32_F32; + break; + case NVPTXISD::Tld4B2DFloatFloat: + Opc = NVPTX::TLD4_B_2D_F32_F32; + break; + case NVPTXISD::Tld4A2DFloatFloat: + Opc = NVPTX::TLD4_A_2D_F32_F32; + break; + case NVPTXISD::Tld4R2DS64Float: + Opc = NVPTX::TLD4_R_2D_S32_F32; + break; + case NVPTXISD::Tld4G2DS64Float: + Opc = NVPTX::TLD4_G_2D_S32_F32; + break; + case NVPTXISD::Tld4B2DS64Float: + Opc = NVPTX::TLD4_B_2D_S32_F32; + break; + case NVPTXISD::Tld4A2DS64Float: + Opc = NVPTX::TLD4_A_2D_S32_F32; + break; + case NVPTXISD::Tld4R2DU64Float: + Opc = NVPTX::TLD4_R_2D_U32_F32; + break; + case NVPTXISD::Tld4G2DU64Float: + Opc = NVPTX::TLD4_G_2D_U32_F32; + break; + case NVPTXISD::Tld4B2DU64Float: + Opc = NVPTX::TLD4_B_2D_U32_F32; + break; + case NVPTXISD::Tld4A2DU64Float: + Opc = NVPTX::TLD4_A_2D_U32_F32; + break; + case NVPTXISD::TexUnified1DFloatS32: + Opc = NVPTX::TEX_UNIFIED_1D_F32_S32; + break; + case NVPTXISD::TexUnified1DFloatFloat: + Opc = NVPTX::TEX_UNIFIED_1D_F32_F32; + break; + case NVPTXISD::TexUnified1DFloatFloatLevel: + Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL; + break; + case NVPTXISD::TexUnified1DFloatFloatGrad: + Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD; + break; + case NVPTXISD::TexUnified1DS32S32: + Opc = NVPTX::TEX_UNIFIED_1D_S32_S32; + break; + case NVPTXISD::TexUnified1DS32Float: + Opc = NVPTX::TEX_UNIFIED_1D_S32_F32; + break; + case NVPTXISD::TexUnified1DS32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL; + break; + case NVPTXISD::TexUnified1DS32FloatGrad: + Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD; + break; + case NVPTXISD::TexUnified1DU32S32: + Opc = NVPTX::TEX_UNIFIED_1D_U32_S32; + break; + case NVPTXISD::TexUnified1DU32Float: + Opc = NVPTX::TEX_UNIFIED_1D_U32_F32; + break; + case NVPTXISD::TexUnified1DU32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL; + break; + case NVPTXISD::TexUnified1DU32FloatGrad: + Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD; + break; + case NVPTXISD::TexUnified1DArrayFloatS32: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32; + break; + case NVPTXISD::TexUnified1DArrayFloatFloat: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32; + break; + case NVPTXISD::TexUnified1DArrayFloatFloatLevel: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL; + break; + case NVPTXISD::TexUnified1DArrayFloatFloatGrad: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD; + break; + case NVPTXISD::TexUnified1DArrayS32S32: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32; + break; + case NVPTXISD::TexUnified1DArrayS32Float: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32; + break; + case NVPTXISD::TexUnified1DArrayS32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL; + break; + case NVPTXISD::TexUnified1DArrayS32FloatGrad: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD; + break; + case NVPTXISD::TexUnified1DArrayU32S32: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32; + break; + case NVPTXISD::TexUnified1DArrayU32Float: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32; + break; + case NVPTXISD::TexUnified1DArrayU32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL; + break; + case NVPTXISD::TexUnified1DArrayU32FloatGrad: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD; + break; + case NVPTXISD::TexUnified2DFloatS32: + Opc = NVPTX::TEX_UNIFIED_2D_F32_S32; + break; + case NVPTXISD::TexUnified2DFloatFloat: + Opc = NVPTX::TEX_UNIFIED_2D_F32_F32; + break; + case NVPTXISD::TexUnified2DFloatFloatLevel: + Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL; + break; + case NVPTXISD::TexUnified2DFloatFloatGrad: + Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD; + break; + case NVPTXISD::TexUnified2DS32S32: + Opc = NVPTX::TEX_UNIFIED_2D_S32_S32; + break; + case NVPTXISD::TexUnified2DS32Float: + Opc = NVPTX::TEX_UNIFIED_2D_S32_F32; + break; + case NVPTXISD::TexUnified2DS32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL; + break; + case NVPTXISD::TexUnified2DS32FloatGrad: + Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD; + break; + case NVPTXISD::TexUnified2DU32S32: + Opc = NVPTX::TEX_UNIFIED_2D_U32_S32; + break; + case NVPTXISD::TexUnified2DU32Float: + Opc = NVPTX::TEX_UNIFIED_2D_U32_F32; + break; + case NVPTXISD::TexUnified2DU32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL; + break; + case NVPTXISD::TexUnified2DU32FloatGrad: + Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD; + break; + case NVPTXISD::TexUnified2DArrayFloatS32: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32; + break; + case NVPTXISD::TexUnified2DArrayFloatFloat: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32; + break; + case NVPTXISD::TexUnified2DArrayFloatFloatLevel: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL; + break; + case NVPTXISD::TexUnified2DArrayFloatFloatGrad: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD; + break; + case NVPTXISD::TexUnified2DArrayS32S32: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32; + break; + case NVPTXISD::TexUnified2DArrayS32Float: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32; break; - case NVPTXISD::Tex3DI32Float: - Opc = NVPTX::TEX_3D_I32_F32; + case NVPTXISD::TexUnified2DArrayS32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL; break; - case NVPTXISD::Tex3DI32FloatLevel: - Opc = NVPTX::TEX_3D_I32_F32_LEVEL; + case NVPTXISD::TexUnified2DArrayS32FloatGrad: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD; break; - case NVPTXISD::Tex3DI32FloatGrad: - Opc = NVPTX::TEX_3D_I32_F32_GRAD; + case NVPTXISD::TexUnified2DArrayU32S32: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32; + break; + case NVPTXISD::TexUnified2DArrayU32Float: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32; + break; + case NVPTXISD::TexUnified2DArrayU32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL; + break; + case NVPTXISD::TexUnified2DArrayU32FloatGrad: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD; + break; + case NVPTXISD::TexUnified3DFloatS32: + Opc = NVPTX::TEX_UNIFIED_3D_F32_S32; + break; + case NVPTXISD::TexUnified3DFloatFloat: + Opc = NVPTX::TEX_UNIFIED_3D_F32_F32; + break; + case NVPTXISD::TexUnified3DFloatFloatLevel: + Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL; + break; + case NVPTXISD::TexUnified3DFloatFloatGrad: + Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD; + break; + case NVPTXISD::TexUnified3DS32S32: + Opc = NVPTX::TEX_UNIFIED_3D_S32_S32; + break; + case NVPTXISD::TexUnified3DS32Float: + Opc = NVPTX::TEX_UNIFIED_3D_S32_F32; + break; + case NVPTXISD::TexUnified3DS32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL; + break; + case NVPTXISD::TexUnified3DS32FloatGrad: + Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD; + break; + case NVPTXISD::TexUnified3DU32S32: + Opc = NVPTX::TEX_UNIFIED_3D_U32_S32; + break; + case NVPTXISD::TexUnified3DU32Float: + Opc = NVPTX::TEX_UNIFIED_3D_U32_F32; + break; + case NVPTXISD::TexUnified3DU32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL; + break; + case NVPTXISD::TexUnified3DU32FloatGrad: + Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD; + break; + case NVPTXISD::TexUnifiedCubeFloatFloat: + Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32; + break; + case NVPTXISD::TexUnifiedCubeFloatFloatLevel: + Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL; + break; + case NVPTXISD::TexUnifiedCubeS32Float: + Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32; + break; + case NVPTXISD::TexUnifiedCubeS32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL; + break; + case NVPTXISD::TexUnifiedCubeU32Float: + Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32; + break; + case NVPTXISD::TexUnifiedCubeU32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL; + break; + case NVPTXISD::TexUnifiedCubeArrayFloatFloat: + Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32; + break; + case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: + Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL; + break; + case NVPTXISD::TexUnifiedCubeArrayS32Float: + Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32; + break; + case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL; + break; + case NVPTXISD::TexUnifiedCubeArrayU32Float: + Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32; + break; + case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL; + break; + case NVPTXISD::Tld4UnifiedR2DFloatFloat: + Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32; + break; + case NVPTXISD::Tld4UnifiedG2DFloatFloat: + Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32; + break; + case NVPTXISD::Tld4UnifiedB2DFloatFloat: + Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32; + break; + case NVPTXISD::Tld4UnifiedA2DFloatFloat: + Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32; + break; + case NVPTXISD::Tld4UnifiedR2DS64Float: + Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32; + break; + case NVPTXISD::Tld4UnifiedG2DS64Float: + Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32; + break; + case NVPTXISD::Tld4UnifiedB2DS64Float: + Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32; + break; + case NVPTXISD::Tld4UnifiedA2DS64Float: + Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32; + break; + case NVPTXISD::Tld4UnifiedR2DU64Float: + Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32; + break; + case NVPTXISD::Tld4UnifiedG2DU64Float: + Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32; + break; + case NVPTXISD::Tld4UnifiedB2DU64Float: + Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32; + break; + case NVPTXISD::Tld4UnifiedA2DU64Float: + Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32; break; } - Ops.push_back(TexRef); - Ops.push_back(SampRef); - - // Copy over indices - for (unsigned i = 3; i < N->getNumOperands(); ++i) { + // Copy over operands + for (unsigned i = 1; i < N->getNumOperands(); ++i) { Ops.push_back(N->getOperand(i)); } @@ -2932,6 +3545,402 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { SmallVector<SDValue, 8> Ops; switch (N->getOpcode()) { default: return nullptr; + case NVPTXISD::Suld1DI8Clamp: + Opc = NVPTX::SULD_1D_I8_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DI16Clamp: + Opc = NVPTX::SULD_1D_I16_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DI32Clamp: + Opc = NVPTX::SULD_1D_I32_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DI64Clamp: + Opc = NVPTX::SULD_1D_I64_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I8Clamp: + Opc = NVPTX::SULD_1D_V2I8_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I16Clamp: + Opc = NVPTX::SULD_1D_V2I16_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I32Clamp: + Opc = NVPTX::SULD_1D_V2I32_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I64Clamp: + Opc = NVPTX::SULD_1D_V2I64_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV4I8Clamp: + Opc = NVPTX::SULD_1D_V4I8_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV4I16Clamp: + Opc = NVPTX::SULD_1D_V4I16_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV4I32Clamp: + Opc = NVPTX::SULD_1D_V4I32_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI8Clamp: + Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI16Clamp: + Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI32Clamp: + Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI64Clamp: + Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I8Clamp: + Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I16Clamp: + Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I32Clamp: + Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I64Clamp: + Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV4I8Clamp: + Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV4I16Clamp: + Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV4I32Clamp: + Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI8Clamp: + Opc = NVPTX::SULD_2D_I8_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI16Clamp: + Opc = NVPTX::SULD_2D_I16_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI32Clamp: + Opc = NVPTX::SULD_2D_I32_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI64Clamp: + Opc = NVPTX::SULD_2D_I64_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I8Clamp: + Opc = NVPTX::SULD_2D_V2I8_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I16Clamp: + Opc = NVPTX::SULD_2D_V2I16_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I32Clamp: + Opc = NVPTX::SULD_2D_V2I32_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I64Clamp: + Opc = NVPTX::SULD_2D_V2I64_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I8Clamp: + Opc = NVPTX::SULD_2D_V4I8_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I16Clamp: + Opc = NVPTX::SULD_2D_V4I16_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I32Clamp: + Opc = NVPTX::SULD_2D_V4I32_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI8Clamp: + Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI16Clamp: + Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI32Clamp: + Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI64Clamp: + Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I8Clamp: + Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I16Clamp: + Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I32Clamp: + Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I64Clamp: + Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV4I8Clamp: + Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV4I16Clamp: + Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV4I32Clamp: + Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI8Clamp: + Opc = NVPTX::SULD_3D_I8_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI16Clamp: + Opc = NVPTX::SULD_3D_I16_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI32Clamp: + Opc = NVPTX::SULD_3D_I32_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI64Clamp: + Opc = NVPTX::SULD_3D_I64_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I8Clamp: + Opc = NVPTX::SULD_3D_V2I8_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I16Clamp: + Opc = NVPTX::SULD_3D_V2I16_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I32Clamp: + Opc = NVPTX::SULD_3D_V2I32_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I64Clamp: + Opc = NVPTX::SULD_3D_V2I64_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I8Clamp: + Opc = NVPTX::SULD_3D_V4I8_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I16Clamp: + Opc = NVPTX::SULD_3D_V4I16_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I32Clamp: + Opc = NVPTX::SULD_3D_V4I32_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; case NVPTXISD::Suld1DI8Trap: Opc = NVPTX::SULD_1D_I8_TRAP; Ops.push_back(TexHandle); @@ -2950,6 +3959,12 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { Ops.push_back(N->getOperand(2)); Ops.push_back(Chain); break; + case NVPTXISD::Suld1DI64Trap: + Opc = NVPTX::SULD_1D_I64_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; case NVPTXISD::Suld1DV2I8Trap: Opc = NVPTX::SULD_1D_V2I8_TRAP; Ops.push_back(TexHandle); @@ -2968,6 +3983,12 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { Ops.push_back(N->getOperand(2)); Ops.push_back(Chain); break; + case NVPTXISD::Suld1DV2I64Trap: + Opc = NVPTX::SULD_1D_V2I64_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; case NVPTXISD::Suld1DV4I8Trap: Opc = NVPTX::SULD_1D_V4I8_TRAP; Ops.push_back(TexHandle); @@ -3007,6 +4028,13 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; + case NVPTXISD::Suld1DArrayI64Trap: + Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; case NVPTXISD::Suld1DArrayV2I8Trap: Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP; Ops.push_back(TexHandle); @@ -3028,6 +4056,13 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; + case NVPTXISD::Suld1DArrayV2I64Trap: + Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; case NVPTXISD::Suld1DArrayV4I8Trap: Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP; Ops.push_back(TexHandle); @@ -3070,6 +4105,13 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; + case NVPTXISD::Suld2DI64Trap: + Opc = NVPTX::SULD_2D_I64_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; case NVPTXISD::Suld2DV2I8Trap: Opc = NVPTX::SULD_2D_V2I8_TRAP; Ops.push_back(TexHandle); @@ -3091,6 +4133,13 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; + case NVPTXISD::Suld2DV2I64Trap: + Opc = NVPTX::SULD_2D_V2I64_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; case NVPTXISD::Suld2DV4I8Trap: Opc = NVPTX::SULD_2D_V4I8_TRAP; Ops.push_back(TexHandle); @@ -3136,6 +4185,14 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; + case NVPTXISD::Suld2DArrayI64Trap: + Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; case NVPTXISD::Suld2DArrayV2I8Trap: Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP; Ops.push_back(TexHandle); @@ -3160,6 +4217,14 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; + case NVPTXISD::Suld2DArrayV2I64Trap: + Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; case NVPTXISD::Suld2DArrayV4I8Trap: Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP; Ops.push_back(TexHandle); @@ -3208,6 +4273,14 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; + case NVPTXISD::Suld3DI64Trap: + Opc = NVPTX::SULD_3D_I64_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; case NVPTXISD::Suld3DV2I8Trap: Opc = NVPTX::SULD_3D_V2I8_TRAP; Ops.push_back(TexHandle); @@ -3232,6 +4305,14 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; + case NVPTXISD::Suld3DV2I64Trap: + Opc = NVPTX::SULD_3D_V2I64_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; case NVPTXISD::Suld3DV4I8Trap: Opc = NVPTX::SULD_3D_V4I8_TRAP; Ops.push_back(TexHandle); @@ -3256,11 +4337,408 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; + case NVPTXISD::Suld1DI8Zero: + Opc = NVPTX::SULD_1D_I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DI16Zero: + Opc = NVPTX::SULD_1D_I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DI32Zero: + Opc = NVPTX::SULD_1D_I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DI64Zero: + Opc = NVPTX::SULD_1D_I64_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I8Zero: + Opc = NVPTX::SULD_1D_V2I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I16Zero: + Opc = NVPTX::SULD_1D_V2I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I32Zero: + Opc = NVPTX::SULD_1D_V2I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I64Zero: + Opc = NVPTX::SULD_1D_V2I64_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV4I8Zero: + Opc = NVPTX::SULD_1D_V4I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV4I16Zero: + Opc = NVPTX::SULD_1D_V4I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV4I32Zero: + Opc = NVPTX::SULD_1D_V4I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI8Zero: + Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI16Zero: + Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI32Zero: + Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI64Zero: + Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I8Zero: + Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I16Zero: + Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I32Zero: + Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I64Zero: + Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV4I8Zero: + Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV4I16Zero: + Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV4I32Zero: + Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI8Zero: + Opc = NVPTX::SULD_2D_I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI16Zero: + Opc = NVPTX::SULD_2D_I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI32Zero: + Opc = NVPTX::SULD_2D_I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI64Zero: + Opc = NVPTX::SULD_2D_I64_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I8Zero: + Opc = NVPTX::SULD_2D_V2I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I16Zero: + Opc = NVPTX::SULD_2D_V2I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I32Zero: + Opc = NVPTX::SULD_2D_V2I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I64Zero: + Opc = NVPTX::SULD_2D_V2I64_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I8Zero: + Opc = NVPTX::SULD_2D_V4I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I16Zero: + Opc = NVPTX::SULD_2D_V4I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I32Zero: + Opc = NVPTX::SULD_2D_V4I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI8Zero: + Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI16Zero: + Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI32Zero: + Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI64Zero: + Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I8Zero: + Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I16Zero: + Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I32Zero: + Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I64Zero: + Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV4I8Zero: + Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV4I16Zero: + Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV4I32Zero: + Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI8Zero: + Opc = NVPTX::SULD_3D_I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI16Zero: + Opc = NVPTX::SULD_3D_I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI32Zero: + Opc = NVPTX::SULD_3D_I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI64Zero: + Opc = NVPTX::SULD_3D_I64_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I8Zero: + Opc = NVPTX::SULD_3D_V2I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I16Zero: + Opc = NVPTX::SULD_3D_V2I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I32Zero: + Opc = NVPTX::SULD_3D_V2I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I64Zero: + Opc = NVPTX::SULD_3D_V2I64_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I8Zero: + Opc = NVPTX::SULD_3D_V4I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I16Zero: + Opc = NVPTX::SULD_3D_V4I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I32Zero: + Opc = NVPTX::SULD_3D_V4I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; } Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); return Ret; } + /// SelectBFE - Look for instruction sequences that can be made more efficient /// by using the 'bfe' (bit-field extract) PTX instruction SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) { @@ -3563,17 +5041,10 @@ bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const { const Value *Src = nullptr; - // Even though MemIntrinsicSDNode is a subclas of MemSDNode, - // the classof() for MemSDNode does not include MemIntrinsicSDNode - // (See SelectionDAGNodes.h). So we need to check for both. if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) { if (spN == 0 && mN->getMemOperand()->getPseudoValue()) return true; Src = mN->getMemOperand()->getValue(); - } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) { - if (spN == 0 && mN->getMemOperand()->getPseudoValue()) - return true; - Src = mN->getMemOperand()->getValue(); } if (!Src) return false; diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index c44ccb2..69afcd7 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -11,6 +11,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELDAGTODAG_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXISELDAGTODAG_H + #include "NVPTX.h" #include "NVPTXISelLowering.h" #include "NVPTXRegisterInfo.h" @@ -24,20 +27,13 @@ namespace { class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { - // If true, generate corresponding FPCONTRACT. This is - // language dependent (i.e. CUDA and OpenCL works differently). - bool doFMAF64; - bool doFMAF32; - bool doFMAF64AGG; - bool doFMAF32AGG; - bool allowFMA; - // If true, generate mul.wide from sext and mul bool doMulWide; int getDivF32Level() const; bool usePrecSqrtF32() const; bool useF32FTZ() const; + bool allowFMA() const; public: explicit NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, @@ -99,3 +95,5 @@ private: }; } + +#endif diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index cb452ff..0b0b536 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -48,6 +48,12 @@ static cl::opt<bool> sched4reg( "nvptx-sched4reg", cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); +static cl::opt<unsigned> +FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, + cl::desc("NVPTX Specific: FMA contraction (0: don't do it" + " 1: do it 2: do it aggressively"), + cl::init(2)); + static bool IsPTXVectorType(MVT VT) { switch (VT.SimpleTy) { default: @@ -100,8 +106,8 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty, } // NVPTXTargetLowering Constructor. -NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) - : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM), +NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM) + : TargetLowering(TM), nvTM(&TM), nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { // always lower memset, memcpy, and memmove intrinsics to load/store @@ -197,8 +203,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); // Turn FP extload into load/fextend + setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); // Turn FP truncstore into trunc + store. + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); // PTX does not support load / store predicate registers @@ -360,73 +369,379 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { return "NVPTXISD::MUL_WIDE_SIGNED"; case NVPTXISD::MUL_WIDE_UNSIGNED: return "NVPTXISD::MUL_WIDE_UNSIGNED"; - case NVPTXISD::Tex1DFloatI32: return "NVPTXISD::Tex1DFloatI32"; + case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32"; case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat"; case NVPTXISD::Tex1DFloatFloatLevel: return "NVPTXISD::Tex1DFloatFloatLevel"; case NVPTXISD::Tex1DFloatFloatGrad: return "NVPTXISD::Tex1DFloatFloatGrad"; - case NVPTXISD::Tex1DI32I32: return "NVPTXISD::Tex1DI32I32"; - case NVPTXISD::Tex1DI32Float: return "NVPTXISD::Tex1DI32Float"; - case NVPTXISD::Tex1DI32FloatLevel: - return "NVPTXISD::Tex1DI32FloatLevel"; - case NVPTXISD::Tex1DI32FloatGrad: - return "NVPTXISD::Tex1DI32FloatGrad"; - case NVPTXISD::Tex1DArrayFloatI32: return "NVPTXISD::Tex2DArrayFloatI32"; - case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; + case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32"; + case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float"; + case NVPTXISD::Tex1DS32FloatLevel: + return "NVPTXISD::Tex1DS32FloatLevel"; + case NVPTXISD::Tex1DS32FloatGrad: + return "NVPTXISD::Tex1DS32FloatGrad"; + case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32"; + case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float"; + case NVPTXISD::Tex1DU32FloatLevel: + return "NVPTXISD::Tex1DU32FloatLevel"; + case NVPTXISD::Tex1DU32FloatGrad: + return "NVPTXISD::Tex1DU32FloatGrad"; + case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32"; + case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat"; case NVPTXISD::Tex1DArrayFloatFloatLevel: - return "NVPTXISD::Tex2DArrayFloatFloatLevel"; + return "NVPTXISD::Tex1DArrayFloatFloatLevel"; case NVPTXISD::Tex1DArrayFloatFloatGrad: - return "NVPTXISD::Tex2DArrayFloatFloatGrad"; - case NVPTXISD::Tex1DArrayI32I32: return "NVPTXISD::Tex2DArrayI32I32"; - case NVPTXISD::Tex1DArrayI32Float: return "NVPTXISD::Tex2DArrayI32Float"; - case NVPTXISD::Tex1DArrayI32FloatLevel: - return "NVPTXISD::Tex2DArrayI32FloatLevel"; - case NVPTXISD::Tex1DArrayI32FloatGrad: - return "NVPTXISD::Tex2DArrayI32FloatGrad"; - case NVPTXISD::Tex2DFloatI32: return "NVPTXISD::Tex2DFloatI32"; + return "NVPTXISD::Tex1DArrayFloatFloatGrad"; + case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32"; + case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float"; + case NVPTXISD::Tex1DArrayS32FloatLevel: + return "NVPTXISD::Tex1DArrayS32FloatLevel"; + case NVPTXISD::Tex1DArrayS32FloatGrad: + return "NVPTXISD::Tex1DArrayS32FloatGrad"; + case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32"; + case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float"; + case NVPTXISD::Tex1DArrayU32FloatLevel: + return "NVPTXISD::Tex1DArrayU32FloatLevel"; + case NVPTXISD::Tex1DArrayU32FloatGrad: + return "NVPTXISD::Tex1DArrayU32FloatGrad"; + case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32"; case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat"; case NVPTXISD::Tex2DFloatFloatLevel: return "NVPTXISD::Tex2DFloatFloatLevel"; case NVPTXISD::Tex2DFloatFloatGrad: return "NVPTXISD::Tex2DFloatFloatGrad"; - case NVPTXISD::Tex2DI32I32: return "NVPTXISD::Tex2DI32I32"; - case NVPTXISD::Tex2DI32Float: return "NVPTXISD::Tex2DI32Float"; - case NVPTXISD::Tex2DI32FloatLevel: - return "NVPTXISD::Tex2DI32FloatLevel"; - case NVPTXISD::Tex2DI32FloatGrad: - return "NVPTXISD::Tex2DI32FloatGrad"; - case NVPTXISD::Tex2DArrayFloatI32: return "NVPTXISD::Tex2DArrayFloatI32"; + case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32"; + case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float"; + case NVPTXISD::Tex2DS32FloatLevel: + return "NVPTXISD::Tex2DS32FloatLevel"; + case NVPTXISD::Tex2DS32FloatGrad: + return "NVPTXISD::Tex2DS32FloatGrad"; + case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32"; + case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float"; + case NVPTXISD::Tex2DU32FloatLevel: + return "NVPTXISD::Tex2DU32FloatLevel"; + case NVPTXISD::Tex2DU32FloatGrad: + return "NVPTXISD::Tex2DU32FloatGrad"; + case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32"; case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; case NVPTXISD::Tex2DArrayFloatFloatLevel: return "NVPTXISD::Tex2DArrayFloatFloatLevel"; case NVPTXISD::Tex2DArrayFloatFloatGrad: return "NVPTXISD::Tex2DArrayFloatFloatGrad"; - case NVPTXISD::Tex2DArrayI32I32: return "NVPTXISD::Tex2DArrayI32I32"; - case NVPTXISD::Tex2DArrayI32Float: return "NVPTXISD::Tex2DArrayI32Float"; - case NVPTXISD::Tex2DArrayI32FloatLevel: - return "NVPTXISD::Tex2DArrayI32FloatLevel"; - case NVPTXISD::Tex2DArrayI32FloatGrad: - return "NVPTXISD::Tex2DArrayI32FloatGrad"; - case NVPTXISD::Tex3DFloatI32: return "NVPTXISD::Tex3DFloatI32"; + case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32"; + case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float"; + case NVPTXISD::Tex2DArrayS32FloatLevel: + return "NVPTXISD::Tex2DArrayS32FloatLevel"; + case NVPTXISD::Tex2DArrayS32FloatGrad: + return "NVPTXISD::Tex2DArrayS32FloatGrad"; + case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32"; + case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float"; + case NVPTXISD::Tex2DArrayU32FloatLevel: + return "NVPTXISD::Tex2DArrayU32FloatLevel"; + case NVPTXISD::Tex2DArrayU32FloatGrad: + return "NVPTXISD::Tex2DArrayU32FloatGrad"; + case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32"; case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat"; case NVPTXISD::Tex3DFloatFloatLevel: return "NVPTXISD::Tex3DFloatFloatLevel"; case NVPTXISD::Tex3DFloatFloatGrad: return "NVPTXISD::Tex3DFloatFloatGrad"; - case NVPTXISD::Tex3DI32I32: return "NVPTXISD::Tex3DI32I32"; - case NVPTXISD::Tex3DI32Float: return "NVPTXISD::Tex3DI32Float"; - case NVPTXISD::Tex3DI32FloatLevel: - return "NVPTXISD::Tex3DI32FloatLevel"; - case NVPTXISD::Tex3DI32FloatGrad: - return "NVPTXISD::Tex3DI32FloatGrad"; + case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32"; + case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float"; + case NVPTXISD::Tex3DS32FloatLevel: + return "NVPTXISD::Tex3DS32FloatLevel"; + case NVPTXISD::Tex3DS32FloatGrad: + return "NVPTXISD::Tex3DS32FloatGrad"; + case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32"; + case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float"; + case NVPTXISD::Tex3DU32FloatLevel: + return "NVPTXISD::Tex3DU32FloatLevel"; + case NVPTXISD::Tex3DU32FloatGrad: + return "NVPTXISD::Tex3DU32FloatGrad"; + case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat"; + case NVPTXISD::TexCubeFloatFloatLevel: + return "NVPTXISD::TexCubeFloatFloatLevel"; + case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float"; + case NVPTXISD::TexCubeS32FloatLevel: + return "NVPTXISD::TexCubeS32FloatLevel"; + case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float"; + case NVPTXISD::TexCubeU32FloatLevel: + return "NVPTXISD::TexCubeU32FloatLevel"; + case NVPTXISD::TexCubeArrayFloatFloat: + return "NVPTXISD::TexCubeArrayFloatFloat"; + case NVPTXISD::TexCubeArrayFloatFloatLevel: + return "NVPTXISD::TexCubeArrayFloatFloatLevel"; + case NVPTXISD::TexCubeArrayS32Float: + return "NVPTXISD::TexCubeArrayS32Float"; + case NVPTXISD::TexCubeArrayS32FloatLevel: + return "NVPTXISD::TexCubeArrayS32FloatLevel"; + case NVPTXISD::TexCubeArrayU32Float: + return "NVPTXISD::TexCubeArrayU32Float"; + case NVPTXISD::TexCubeArrayU32FloatLevel: + return "NVPTXISD::TexCubeArrayU32FloatLevel"; + case NVPTXISD::Tld4R2DFloatFloat: + return "NVPTXISD::Tld4R2DFloatFloat"; + case NVPTXISD::Tld4G2DFloatFloat: + return "NVPTXISD::Tld4G2DFloatFloat"; + case NVPTXISD::Tld4B2DFloatFloat: + return "NVPTXISD::Tld4B2DFloatFloat"; + case NVPTXISD::Tld4A2DFloatFloat: + return "NVPTXISD::Tld4A2DFloatFloat"; + case NVPTXISD::Tld4R2DS64Float: + return "NVPTXISD::Tld4R2DS64Float"; + case NVPTXISD::Tld4G2DS64Float: + return "NVPTXISD::Tld4G2DS64Float"; + case NVPTXISD::Tld4B2DS64Float: + return "NVPTXISD::Tld4B2DS64Float"; + case NVPTXISD::Tld4A2DS64Float: + return "NVPTXISD::Tld4A2DS64Float"; + case NVPTXISD::Tld4R2DU64Float: + return "NVPTXISD::Tld4R2DU64Float"; + case NVPTXISD::Tld4G2DU64Float: + return "NVPTXISD::Tld4G2DU64Float"; + case NVPTXISD::Tld4B2DU64Float: + return "NVPTXISD::Tld4B2DU64Float"; + case NVPTXISD::Tld4A2DU64Float: + return "NVPTXISD::Tld4A2DU64Float"; + + case NVPTXISD::TexUnified1DFloatS32: + return "NVPTXISD::TexUnified1DFloatS32"; + case NVPTXISD::TexUnified1DFloatFloat: + return "NVPTXISD::TexUnified1DFloatFloat"; + case NVPTXISD::TexUnified1DFloatFloatLevel: + return "NVPTXISD::TexUnified1DFloatFloatLevel"; + case NVPTXISD::TexUnified1DFloatFloatGrad: + return "NVPTXISD::TexUnified1DFloatFloatGrad"; + case NVPTXISD::TexUnified1DS32S32: + return "NVPTXISD::TexUnified1DS32S32"; + case NVPTXISD::TexUnified1DS32Float: + return "NVPTXISD::TexUnified1DS32Float"; + case NVPTXISD::TexUnified1DS32FloatLevel: + return "NVPTXISD::TexUnified1DS32FloatLevel"; + case NVPTXISD::TexUnified1DS32FloatGrad: + return "NVPTXISD::TexUnified1DS32FloatGrad"; + case NVPTXISD::TexUnified1DU32S32: + return "NVPTXISD::TexUnified1DU32S32"; + case NVPTXISD::TexUnified1DU32Float: + return "NVPTXISD::TexUnified1DU32Float"; + case NVPTXISD::TexUnified1DU32FloatLevel: + return "NVPTXISD::TexUnified1DU32FloatLevel"; + case NVPTXISD::TexUnified1DU32FloatGrad: + return "NVPTXISD::TexUnified1DU32FloatGrad"; + case NVPTXISD::TexUnified1DArrayFloatS32: + return "NVPTXISD::TexUnified1DArrayFloatS32"; + case NVPTXISD::TexUnified1DArrayFloatFloat: + return "NVPTXISD::TexUnified1DArrayFloatFloat"; + case NVPTXISD::TexUnified1DArrayFloatFloatLevel: + return "NVPTXISD::TexUnified1DArrayFloatFloatLevel"; + case NVPTXISD::TexUnified1DArrayFloatFloatGrad: + return "NVPTXISD::TexUnified1DArrayFloatFloatGrad"; + case NVPTXISD::TexUnified1DArrayS32S32: + return "NVPTXISD::TexUnified1DArrayS32S32"; + case NVPTXISD::TexUnified1DArrayS32Float: + return "NVPTXISD::TexUnified1DArrayS32Float"; + case NVPTXISD::TexUnified1DArrayS32FloatLevel: + return "NVPTXISD::TexUnified1DArrayS32FloatLevel"; + case NVPTXISD::TexUnified1DArrayS32FloatGrad: + return "NVPTXISD::TexUnified1DArrayS32FloatGrad"; + case NVPTXISD::TexUnified1DArrayU32S32: + return "NVPTXISD::TexUnified1DArrayU32S32"; + case NVPTXISD::TexUnified1DArrayU32Float: + return "NVPTXISD::TexUnified1DArrayU32Float"; + case NVPTXISD::TexUnified1DArrayU32FloatLevel: + return "NVPTXISD::TexUnified1DArrayU32FloatLevel"; + case NVPTXISD::TexUnified1DArrayU32FloatGrad: + return "NVPTXISD::TexUnified1DArrayU32FloatGrad"; + case NVPTXISD::TexUnified2DFloatS32: + return "NVPTXISD::TexUnified2DFloatS32"; + case NVPTXISD::TexUnified2DFloatFloat: + return "NVPTXISD::TexUnified2DFloatFloat"; + case NVPTXISD::TexUnified2DFloatFloatLevel: + return "NVPTXISD::TexUnified2DFloatFloatLevel"; + case NVPTXISD::TexUnified2DFloatFloatGrad: + return "NVPTXISD::TexUnified2DFloatFloatGrad"; + case NVPTXISD::TexUnified2DS32S32: + return "NVPTXISD::TexUnified2DS32S32"; + case NVPTXISD::TexUnified2DS32Float: + return "NVPTXISD::TexUnified2DS32Float"; + case NVPTXISD::TexUnified2DS32FloatLevel: + return "NVPTXISD::TexUnified2DS32FloatLevel"; + case NVPTXISD::TexUnified2DS32FloatGrad: + return "NVPTXISD::TexUnified2DS32FloatGrad"; + case NVPTXISD::TexUnified2DU32S32: + return "NVPTXISD::TexUnified2DU32S32"; + case NVPTXISD::TexUnified2DU32Float: + return "NVPTXISD::TexUnified2DU32Float"; + case NVPTXISD::TexUnified2DU32FloatLevel: + return "NVPTXISD::TexUnified2DU32FloatLevel"; + case NVPTXISD::TexUnified2DU32FloatGrad: + return "NVPTXISD::TexUnified2DU32FloatGrad"; + case NVPTXISD::TexUnified2DArrayFloatS32: + return "NVPTXISD::TexUnified2DArrayFloatS32"; + case NVPTXISD::TexUnified2DArrayFloatFloat: + return "NVPTXISD::TexUnified2DArrayFloatFloat"; + case NVPTXISD::TexUnified2DArrayFloatFloatLevel: + return "NVPTXISD::TexUnified2DArrayFloatFloatLevel"; + case NVPTXISD::TexUnified2DArrayFloatFloatGrad: + return "NVPTXISD::TexUnified2DArrayFloatFloatGrad"; + case NVPTXISD::TexUnified2DArrayS32S32: + return "NVPTXISD::TexUnified2DArrayS32S32"; + case NVPTXISD::TexUnified2DArrayS32Float: + return "NVPTXISD::TexUnified2DArrayS32Float"; + case NVPTXISD::TexUnified2DArrayS32FloatLevel: + return "NVPTXISD::TexUnified2DArrayS32FloatLevel"; + case NVPTXISD::TexUnified2DArrayS32FloatGrad: + return "NVPTXISD::TexUnified2DArrayS32FloatGrad"; + case NVPTXISD::TexUnified2DArrayU32S32: + return "NVPTXISD::TexUnified2DArrayU32S32"; + case NVPTXISD::TexUnified2DArrayU32Float: + return "NVPTXISD::TexUnified2DArrayU32Float"; + case NVPTXISD::TexUnified2DArrayU32FloatLevel: + return "NVPTXISD::TexUnified2DArrayU32FloatLevel"; + case NVPTXISD::TexUnified2DArrayU32FloatGrad: + return "NVPTXISD::TexUnified2DArrayU32FloatGrad"; + case NVPTXISD::TexUnified3DFloatS32: + return "NVPTXISD::TexUnified3DFloatS32"; + case NVPTXISD::TexUnified3DFloatFloat: + return "NVPTXISD::TexUnified3DFloatFloat"; + case NVPTXISD::TexUnified3DFloatFloatLevel: + return "NVPTXISD::TexUnified3DFloatFloatLevel"; + case NVPTXISD::TexUnified3DFloatFloatGrad: + return "NVPTXISD::TexUnified3DFloatFloatGrad"; + case NVPTXISD::TexUnified3DS32S32: + return "NVPTXISD::TexUnified3DS32S32"; + case NVPTXISD::TexUnified3DS32Float: + return "NVPTXISD::TexUnified3DS32Float"; + case NVPTXISD::TexUnified3DS32FloatLevel: + return "NVPTXISD::TexUnified3DS32FloatLevel"; + case NVPTXISD::TexUnified3DS32FloatGrad: + return "NVPTXISD::TexUnified3DS32FloatGrad"; + case NVPTXISD::TexUnified3DU32S32: + return "NVPTXISD::TexUnified3DU32S32"; + case NVPTXISD::TexUnified3DU32Float: + return "NVPTXISD::TexUnified3DU32Float"; + case NVPTXISD::TexUnified3DU32FloatLevel: + return "NVPTXISD::TexUnified3DU32FloatLevel"; + case NVPTXISD::TexUnified3DU32FloatGrad: + return "NVPTXISD::TexUnified3DU32FloatGrad"; + case NVPTXISD::TexUnifiedCubeFloatFloat: + return "NVPTXISD::TexUnifiedCubeFloatFloat"; + case NVPTXISD::TexUnifiedCubeFloatFloatLevel: + return "NVPTXISD::TexUnifiedCubeFloatFloatLevel"; + case NVPTXISD::TexUnifiedCubeS32Float: + return "NVPTXISD::TexUnifiedCubeS32Float"; + case NVPTXISD::TexUnifiedCubeS32FloatLevel: + return "NVPTXISD::TexUnifiedCubeS32FloatLevel"; + case NVPTXISD::TexUnifiedCubeU32Float: + return "NVPTXISD::TexUnifiedCubeU32Float"; + case NVPTXISD::TexUnifiedCubeU32FloatLevel: + return "NVPTXISD::TexUnifiedCubeU32FloatLevel"; + case NVPTXISD::TexUnifiedCubeArrayFloatFloat: + return "NVPTXISD::TexUnifiedCubeArrayFloatFloat"; + case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: + return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel"; + case NVPTXISD::TexUnifiedCubeArrayS32Float: + return "NVPTXISD::TexUnifiedCubeArrayS32Float"; + case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: + return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel"; + case NVPTXISD::TexUnifiedCubeArrayU32Float: + return "NVPTXISD::TexUnifiedCubeArrayU32Float"; + case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: + return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel"; + case NVPTXISD::Tld4UnifiedR2DFloatFloat: + return "NVPTXISD::Tld4UnifiedR2DFloatFloat"; + case NVPTXISD::Tld4UnifiedG2DFloatFloat: + return "NVPTXISD::Tld4UnifiedG2DFloatFloat"; + case NVPTXISD::Tld4UnifiedB2DFloatFloat: + return "NVPTXISD::Tld4UnifiedB2DFloatFloat"; + case NVPTXISD::Tld4UnifiedA2DFloatFloat: + return "NVPTXISD::Tld4UnifiedA2DFloatFloat"; + case NVPTXISD::Tld4UnifiedR2DS64Float: + return "NVPTXISD::Tld4UnifiedR2DS64Float"; + case NVPTXISD::Tld4UnifiedG2DS64Float: + return "NVPTXISD::Tld4UnifiedG2DS64Float"; + case NVPTXISD::Tld4UnifiedB2DS64Float: + return "NVPTXISD::Tld4UnifiedB2DS64Float"; + case NVPTXISD::Tld4UnifiedA2DS64Float: + return "NVPTXISD::Tld4UnifiedA2DS64Float"; + case NVPTXISD::Tld4UnifiedR2DU64Float: + return "NVPTXISD::Tld4UnifiedR2DU64Float"; + case NVPTXISD::Tld4UnifiedG2DU64Float: + return "NVPTXISD::Tld4UnifiedG2DU64Float"; + case NVPTXISD::Tld4UnifiedB2DU64Float: + return "NVPTXISD::Tld4UnifiedB2DU64Float"; + case NVPTXISD::Tld4UnifiedA2DU64Float: + return "NVPTXISD::Tld4UnifiedA2DU64Float"; + + case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp"; + case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp"; + case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp"; + case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp"; + case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp"; + case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp"; + case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp"; + case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp"; + case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp"; + case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp"; + case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp"; + + case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp"; + case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp"; + case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp"; + case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp"; + case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp"; + case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp"; + case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp"; + case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp"; + case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp"; + case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp"; + case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp"; + + case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp"; + case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp"; + case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp"; + case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp"; + case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp"; + case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp"; + case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp"; + case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp"; + case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp"; + case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp"; + case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp"; + + case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp"; + case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp"; + case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp"; + case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp"; + case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp"; + case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp"; + case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp"; + case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp"; + case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp"; + case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp"; + case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp"; + + case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp"; + case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp"; + case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp"; + case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp"; + case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp"; + case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp"; + case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp"; + case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp"; + case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp"; + case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp"; + case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp"; case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap"; case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap"; case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap"; + case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap"; case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap"; case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap"; case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap"; + case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap"; case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap"; case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap"; case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap"; @@ -434,9 +749,11 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap"; case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap"; case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap"; + case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap"; case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap"; case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap"; case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap"; + case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap"; case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap"; case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap"; case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap"; @@ -444,9 +761,11 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap"; case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap"; case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap"; + case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap"; case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap"; case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap"; case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap"; + case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap"; case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap"; case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap"; case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap"; @@ -454,9 +773,11 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap"; case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap"; case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap"; + case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap"; case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap"; case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap"; case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap"; + case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap"; case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap"; case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap"; case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap"; @@ -464,12 +785,74 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap"; case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap"; case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap"; + case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap"; case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap"; case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap"; case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap"; + case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap"; case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap"; case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap"; case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap"; + + case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero"; + case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero"; + case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero"; + case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero"; + case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero"; + case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero"; + case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero"; + case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero"; + case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero"; + case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero"; + case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero"; + + case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero"; + case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero"; + case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero"; + case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero"; + case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero"; + case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero"; + case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero"; + case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero"; + case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero"; + case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero"; + case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero"; + + case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero"; + case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero"; + case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero"; + case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero"; + case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero"; + case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero"; + case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero"; + case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero"; + case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero"; + case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero"; + case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero"; + + case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero"; + case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero"; + case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero"; + case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero"; + case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero"; + case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero"; + case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero"; + case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero"; + case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero"; + case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero"; + case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero"; + + case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero"; + case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero"; + case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero"; + case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero"; + case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero"; + case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero"; + case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero"; + case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero"; + case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero"; + case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero"; + case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero"; } } @@ -972,7 +1355,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // .param .align 16 .b8 retval0[<size-in-bytes>], or // .param .b<size-in-bits> retval0 unsigned resultsz = TD->getTypeAllocSizeInBits(retTy); - if (retTy->isSingleValueType()) { + // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for + // these three types to match the logic in + // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype. + // Plus, this behavior is consistent with nvcc's. + if (retTy->isFloatingPointTy() || retTy->isIntegerTy() || + retTy->isPointerTy()) { // Scalar needs to be at least 32bit wide if (resultsz < 32) resultsz = 32; @@ -1068,8 +1456,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, EVT ObjectVT = getValueType(retTy); unsigned NumElts = ObjectVT.getVectorNumElements(); EVT EltVT = ObjectVT.getVectorElementType(); - assert(nvTM->getTargetLowering()->getNumRegisters(F->getContext(), - ObjectVT) == NumElts && + assert(nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters( + F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized"); unsigned sz = EltVT.getSizeInBits(); bool needTruncate = sz < 8 ? true : false; @@ -1494,6 +1882,21 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { break; } + MemSDNode *MemSD = cast<MemSDNode>(N); + const DataLayout *TD = getDataLayout(); + + unsigned Align = MemSD->getAlignment(); + unsigned PrefAlign = + TD->getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext())); + if (Align < PrefAlign) { + // This store is not sufficiently aligned, so bail out and let this vector + // store be scalarized. Note that we may still be able to emit smaller + // vector stores. For example, if we are storing a <4 x float> with an + // alignment of 8, this check will fail but the legalizer will try again + // with 2 x <2 x float>, which will succeed with an alignment of 8. + return SDValue(); + } + unsigned Opcode = 0; EVT EltVT = ValVT.getVectorElementType(); unsigned NumElts = ValVT.getVectorNumElements(); @@ -1536,8 +1939,6 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { Ops.push_back(N->getOperand(i)); } - MemSDNode *MemSD = cast<MemSDNode>(N); - SDValue NewSt = DAG.getMemIntrinsicNode( Opcode, DL, DAG.getVTList(MVT::Other), Ops, MemSD->getMemoryVT(), MemSD->getMemOperand()); @@ -1632,7 +2033,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( const Function *F = MF.getFunction(); const AttributeSet &PAL = F->getAttributes(); - const TargetLowering *TLI = DAG.getTarget().getTargetLowering(); + const TargetLowering *TLI = DAG.getSubtarget().getTargetLowering(); SDValue Root = DAG.getRoot(); std::vector<SDValue> OutChains; @@ -1746,7 +2147,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( ISD::SEXTLOAD : ISD::ZEXTLOAD; p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr, MachinePointerInfo(srcValue), partVT, false, - false, partAlign); + false, false, partAlign); } else { p = DAG.getLoad(partVT, dl, Root, srcAddr, MachinePointerInfo(srcValue), false, false, false, @@ -1767,7 +2168,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( unsigned NumElts = ObjectVT.getVectorNumElements(); assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized"); - unsigned Ofst = 0; EVT EltVT = ObjectVT.getVectorElementType(); // V1 load @@ -1776,10 +2176,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( // We only have one element, so just directly load it Value *SrcValue = Constant::getNullValue(PointerType::get( EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); - SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, - DAG.getConstant(Ofst, getPointerTy())); SDValue P = DAG.getLoad( - EltVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, + EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false, true, TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); if (P.getNode()) @@ -1788,7 +2186,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P); InVals.push_back(P); - Ofst += TD->getTypeAllocSize(EltVT.getTypeForEVT(F->getContext())); ++InsIdx; } else if (NumElts == 2) { // V2 load @@ -1796,10 +2193,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2); Value *SrcValue = Constant::getNullValue(PointerType::get( VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); - SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, - DAG.getConstant(Ofst, getPointerTy())); SDValue P = DAG.getLoad( - VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, + VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false, true, TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); if (P.getNode()) @@ -1817,7 +2212,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( InVals.push_back(Elt0); InVals.push_back(Elt1); - Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); InsIdx += 2; } else { // V4 loads @@ -1835,6 +2229,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( VecSize = 2; } EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); + unsigned Ofst = 0; for (unsigned i = 0; i < NumElts; i += VecSize) { Value *SrcValue = Constant::getNullValue( PointerType::get(VecVT.getTypeForEVT(F->getContext()), @@ -1879,6 +2274,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( ISD::SEXTLOAD : ISD::ZEXTLOAD; p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg, MachinePointerInfo(srcValue), ObjectVT, false, false, + false, TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); } else { p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg, @@ -2132,90 +2528,357 @@ static unsigned getOpcForTextureInstr(unsigned Intrinsic) { default: return 0; - case Intrinsic::nvvm_tex_1d_v4f32_i32: - return NVPTXISD::Tex1DFloatI32; + case Intrinsic::nvvm_tex_1d_v4f32_s32: + return NVPTXISD::Tex1DFloatS32; case Intrinsic::nvvm_tex_1d_v4f32_f32: return NVPTXISD::Tex1DFloatFloat; case Intrinsic::nvvm_tex_1d_level_v4f32_f32: return NVPTXISD::Tex1DFloatFloatLevel; case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: return NVPTXISD::Tex1DFloatFloatGrad; - case Intrinsic::nvvm_tex_1d_v4i32_i32: - return NVPTXISD::Tex1DI32I32; - case Intrinsic::nvvm_tex_1d_v4i32_f32: - return NVPTXISD::Tex1DI32Float; - case Intrinsic::nvvm_tex_1d_level_v4i32_f32: - return NVPTXISD::Tex1DI32FloatLevel; - case Intrinsic::nvvm_tex_1d_grad_v4i32_f32: - return NVPTXISD::Tex1DI32FloatGrad; - - case Intrinsic::nvvm_tex_1d_array_v4f32_i32: - return NVPTXISD::Tex1DArrayFloatI32; + case Intrinsic::nvvm_tex_1d_v4s32_s32: + return NVPTXISD::Tex1DS32S32; + case Intrinsic::nvvm_tex_1d_v4s32_f32: + return NVPTXISD::Tex1DS32Float; + case Intrinsic::nvvm_tex_1d_level_v4s32_f32: + return NVPTXISD::Tex1DS32FloatLevel; + case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: + return NVPTXISD::Tex1DS32FloatGrad; + case Intrinsic::nvvm_tex_1d_v4u32_s32: + return NVPTXISD::Tex1DU32S32; + case Intrinsic::nvvm_tex_1d_v4u32_f32: + return NVPTXISD::Tex1DU32Float; + case Intrinsic::nvvm_tex_1d_level_v4u32_f32: + return NVPTXISD::Tex1DU32FloatLevel; + case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: + return NVPTXISD::Tex1DU32FloatGrad; + + case Intrinsic::nvvm_tex_1d_array_v4f32_s32: + return NVPTXISD::Tex1DArrayFloatS32; case Intrinsic::nvvm_tex_1d_array_v4f32_f32: return NVPTXISD::Tex1DArrayFloatFloat; case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: return NVPTXISD::Tex1DArrayFloatFloatLevel; case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: return NVPTXISD::Tex1DArrayFloatFloatGrad; - case Intrinsic::nvvm_tex_1d_array_v4i32_i32: - return NVPTXISD::Tex1DArrayI32I32; - case Intrinsic::nvvm_tex_1d_array_v4i32_f32: - return NVPTXISD::Tex1DArrayI32Float; - case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32: - return NVPTXISD::Tex1DArrayI32FloatLevel; - case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32: - return NVPTXISD::Tex1DArrayI32FloatGrad; - - case Intrinsic::nvvm_tex_2d_v4f32_i32: - return NVPTXISD::Tex2DFloatI32; + case Intrinsic::nvvm_tex_1d_array_v4s32_s32: + return NVPTXISD::Tex1DArrayS32S32; + case Intrinsic::nvvm_tex_1d_array_v4s32_f32: + return NVPTXISD::Tex1DArrayS32Float; + case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: + return NVPTXISD::Tex1DArrayS32FloatLevel; + case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: + return NVPTXISD::Tex1DArrayS32FloatGrad; + case Intrinsic::nvvm_tex_1d_array_v4u32_s32: + return NVPTXISD::Tex1DArrayU32S32; + case Intrinsic::nvvm_tex_1d_array_v4u32_f32: + return NVPTXISD::Tex1DArrayU32Float; + case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: + return NVPTXISD::Tex1DArrayU32FloatLevel; + case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: + return NVPTXISD::Tex1DArrayU32FloatGrad; + + case Intrinsic::nvvm_tex_2d_v4f32_s32: + return NVPTXISD::Tex2DFloatS32; case Intrinsic::nvvm_tex_2d_v4f32_f32: return NVPTXISD::Tex2DFloatFloat; case Intrinsic::nvvm_tex_2d_level_v4f32_f32: return NVPTXISD::Tex2DFloatFloatLevel; case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: return NVPTXISD::Tex2DFloatFloatGrad; - case Intrinsic::nvvm_tex_2d_v4i32_i32: - return NVPTXISD::Tex2DI32I32; - case Intrinsic::nvvm_tex_2d_v4i32_f32: - return NVPTXISD::Tex2DI32Float; - case Intrinsic::nvvm_tex_2d_level_v4i32_f32: - return NVPTXISD::Tex2DI32FloatLevel; - case Intrinsic::nvvm_tex_2d_grad_v4i32_f32: - return NVPTXISD::Tex2DI32FloatGrad; - - case Intrinsic::nvvm_tex_2d_array_v4f32_i32: - return NVPTXISD::Tex2DArrayFloatI32; + case Intrinsic::nvvm_tex_2d_v4s32_s32: + return NVPTXISD::Tex2DS32S32; + case Intrinsic::nvvm_tex_2d_v4s32_f32: + return NVPTXISD::Tex2DS32Float; + case Intrinsic::nvvm_tex_2d_level_v4s32_f32: + return NVPTXISD::Tex2DS32FloatLevel; + case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: + return NVPTXISD::Tex2DS32FloatGrad; + case Intrinsic::nvvm_tex_2d_v4u32_s32: + return NVPTXISD::Tex2DU32S32; + case Intrinsic::nvvm_tex_2d_v4u32_f32: + return NVPTXISD::Tex2DU32Float; + case Intrinsic::nvvm_tex_2d_level_v4u32_f32: + return NVPTXISD::Tex2DU32FloatLevel; + case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: + return NVPTXISD::Tex2DU32FloatGrad; + + case Intrinsic::nvvm_tex_2d_array_v4f32_s32: + return NVPTXISD::Tex2DArrayFloatS32; case Intrinsic::nvvm_tex_2d_array_v4f32_f32: return NVPTXISD::Tex2DArrayFloatFloat; case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: return NVPTXISD::Tex2DArrayFloatFloatLevel; case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: return NVPTXISD::Tex2DArrayFloatFloatGrad; - case Intrinsic::nvvm_tex_2d_array_v4i32_i32: - return NVPTXISD::Tex2DArrayI32I32; - case Intrinsic::nvvm_tex_2d_array_v4i32_f32: - return NVPTXISD::Tex2DArrayI32Float; - case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32: - return NVPTXISD::Tex2DArrayI32FloatLevel; - case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32: - return NVPTXISD::Tex2DArrayI32FloatGrad; - - case Intrinsic::nvvm_tex_3d_v4f32_i32: - return NVPTXISD::Tex3DFloatI32; + case Intrinsic::nvvm_tex_2d_array_v4s32_s32: + return NVPTXISD::Tex2DArrayS32S32; + case Intrinsic::nvvm_tex_2d_array_v4s32_f32: + return NVPTXISD::Tex2DArrayS32Float; + case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: + return NVPTXISD::Tex2DArrayS32FloatLevel; + case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: + return NVPTXISD::Tex2DArrayS32FloatGrad; + case Intrinsic::nvvm_tex_2d_array_v4u32_s32: + return NVPTXISD::Tex2DArrayU32S32; + case Intrinsic::nvvm_tex_2d_array_v4u32_f32: + return NVPTXISD::Tex2DArrayU32Float; + case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: + return NVPTXISD::Tex2DArrayU32FloatLevel; + case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: + return NVPTXISD::Tex2DArrayU32FloatGrad; + + case Intrinsic::nvvm_tex_3d_v4f32_s32: + return NVPTXISD::Tex3DFloatS32; case Intrinsic::nvvm_tex_3d_v4f32_f32: return NVPTXISD::Tex3DFloatFloat; case Intrinsic::nvvm_tex_3d_level_v4f32_f32: return NVPTXISD::Tex3DFloatFloatLevel; case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: return NVPTXISD::Tex3DFloatFloatGrad; - case Intrinsic::nvvm_tex_3d_v4i32_i32: - return NVPTXISD::Tex3DI32I32; - case Intrinsic::nvvm_tex_3d_v4i32_f32: - return NVPTXISD::Tex3DI32Float; - case Intrinsic::nvvm_tex_3d_level_v4i32_f32: - return NVPTXISD::Tex3DI32FloatLevel; - case Intrinsic::nvvm_tex_3d_grad_v4i32_f32: - return NVPTXISD::Tex3DI32FloatGrad; + case Intrinsic::nvvm_tex_3d_v4s32_s32: + return NVPTXISD::Tex3DS32S32; + case Intrinsic::nvvm_tex_3d_v4s32_f32: + return NVPTXISD::Tex3DS32Float; + case Intrinsic::nvvm_tex_3d_level_v4s32_f32: + return NVPTXISD::Tex3DS32FloatLevel; + case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: + return NVPTXISD::Tex3DS32FloatGrad; + case Intrinsic::nvvm_tex_3d_v4u32_s32: + return NVPTXISD::Tex3DU32S32; + case Intrinsic::nvvm_tex_3d_v4u32_f32: + return NVPTXISD::Tex3DU32Float; + case Intrinsic::nvvm_tex_3d_level_v4u32_f32: + return NVPTXISD::Tex3DU32FloatLevel; + case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: + return NVPTXISD::Tex3DU32FloatGrad; + + case Intrinsic::nvvm_tex_cube_v4f32_f32: + return NVPTXISD::TexCubeFloatFloat; + case Intrinsic::nvvm_tex_cube_level_v4f32_f32: + return NVPTXISD::TexCubeFloatFloatLevel; + case Intrinsic::nvvm_tex_cube_v4s32_f32: + return NVPTXISD::TexCubeS32Float; + case Intrinsic::nvvm_tex_cube_level_v4s32_f32: + return NVPTXISD::TexCubeS32FloatLevel; + case Intrinsic::nvvm_tex_cube_v4u32_f32: + return NVPTXISD::TexCubeU32Float; + case Intrinsic::nvvm_tex_cube_level_v4u32_f32: + return NVPTXISD::TexCubeU32FloatLevel; + + case Intrinsic::nvvm_tex_cube_array_v4f32_f32: + return NVPTXISD::TexCubeArrayFloatFloat; + case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: + return NVPTXISD::TexCubeArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_cube_array_v4s32_f32: + return NVPTXISD::TexCubeArrayS32Float; + case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: + return NVPTXISD::TexCubeArrayS32FloatLevel; + case Intrinsic::nvvm_tex_cube_array_v4u32_f32: + return NVPTXISD::TexCubeArrayU32Float; + case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: + return NVPTXISD::TexCubeArrayU32FloatLevel; + + case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: + return NVPTXISD::Tld4R2DFloatFloat; + case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: + return NVPTXISD::Tld4G2DFloatFloat; + case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: + return NVPTXISD::Tld4B2DFloatFloat; + case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: + return NVPTXISD::Tld4A2DFloatFloat; + case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: + return NVPTXISD::Tld4R2DS64Float; + case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: + return NVPTXISD::Tld4G2DS64Float; + case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: + return NVPTXISD::Tld4B2DS64Float; + case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: + return NVPTXISD::Tld4A2DS64Float; + case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: + return NVPTXISD::Tld4R2DU64Float; + case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: + return NVPTXISD::Tld4G2DU64Float; + case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: + return NVPTXISD::Tld4B2DU64Float; + case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: + return NVPTXISD::Tld4A2DU64Float; + + case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: + return NVPTXISD::TexUnified1DFloatS32; + case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: + return NVPTXISD::TexUnified1DFloatFloat; + case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: + return NVPTXISD::TexUnified1DFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: + return NVPTXISD::TexUnified1DFloatFloatGrad; + case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: + return NVPTXISD::TexUnified1DS32S32; + case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: + return NVPTXISD::TexUnified1DS32Float; + case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: + return NVPTXISD::TexUnified1DS32FloatLevel; + case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: + return NVPTXISD::TexUnified1DS32FloatGrad; + case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: + return NVPTXISD::TexUnified1DU32S32; + case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: + return NVPTXISD::TexUnified1DU32Float; + case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: + return NVPTXISD::TexUnified1DU32FloatLevel; + case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: + return NVPTXISD::TexUnified1DU32FloatGrad; + + case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: + return NVPTXISD::TexUnified1DArrayFloatS32; + case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: + return NVPTXISD::TexUnified1DArrayFloatFloat; + case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: + return NVPTXISD::TexUnified1DArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: + return NVPTXISD::TexUnified1DArrayFloatFloatGrad; + case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: + return NVPTXISD::TexUnified1DArrayS32S32; + case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: + return NVPTXISD::TexUnified1DArrayS32Float; + case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: + return NVPTXISD::TexUnified1DArrayS32FloatLevel; + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: + return NVPTXISD::TexUnified1DArrayS32FloatGrad; + case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: + return NVPTXISD::TexUnified1DArrayU32S32; + case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: + return NVPTXISD::TexUnified1DArrayU32Float; + case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: + return NVPTXISD::TexUnified1DArrayU32FloatLevel; + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: + return NVPTXISD::TexUnified1DArrayU32FloatGrad; + + case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: + return NVPTXISD::TexUnified2DFloatS32; + case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: + return NVPTXISD::TexUnified2DFloatFloat; + case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: + return NVPTXISD::TexUnified2DFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: + return NVPTXISD::TexUnified2DFloatFloatGrad; + case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: + return NVPTXISD::TexUnified2DS32S32; + case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: + return NVPTXISD::TexUnified2DS32Float; + case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: + return NVPTXISD::TexUnified2DS32FloatLevel; + case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: + return NVPTXISD::TexUnified2DS32FloatGrad; + case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: + return NVPTXISD::TexUnified2DU32S32; + case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: + return NVPTXISD::TexUnified2DU32Float; + case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: + return NVPTXISD::TexUnified2DU32FloatLevel; + case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: + return NVPTXISD::TexUnified2DU32FloatGrad; + + case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: + return NVPTXISD::TexUnified2DArrayFloatS32; + case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: + return NVPTXISD::TexUnified2DArrayFloatFloat; + case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: + return NVPTXISD::TexUnified2DArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: + return NVPTXISD::TexUnified2DArrayFloatFloatGrad; + case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: + return NVPTXISD::TexUnified2DArrayS32S32; + case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: + return NVPTXISD::TexUnified2DArrayS32Float; + case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: + return NVPTXISD::TexUnified2DArrayS32FloatLevel; + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: + return NVPTXISD::TexUnified2DArrayS32FloatGrad; + case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: + return NVPTXISD::TexUnified2DArrayU32S32; + case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: + return NVPTXISD::TexUnified2DArrayU32Float; + case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: + return NVPTXISD::TexUnified2DArrayU32FloatLevel; + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: + return NVPTXISD::TexUnified2DArrayU32FloatGrad; + + case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: + return NVPTXISD::TexUnified3DFloatS32; + case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: + return NVPTXISD::TexUnified3DFloatFloat; + case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: + return NVPTXISD::TexUnified3DFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: + return NVPTXISD::TexUnified3DFloatFloatGrad; + case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: + return NVPTXISD::TexUnified3DS32S32; + case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: + return NVPTXISD::TexUnified3DS32Float; + case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: + return NVPTXISD::TexUnified3DS32FloatLevel; + case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: + return NVPTXISD::TexUnified3DS32FloatGrad; + case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: + return NVPTXISD::TexUnified3DU32S32; + case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: + return NVPTXISD::TexUnified3DU32Float; + case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: + return NVPTXISD::TexUnified3DU32FloatLevel; + case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: + return NVPTXISD::TexUnified3DU32FloatGrad; + + case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: + return NVPTXISD::TexUnifiedCubeFloatFloat; + case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: + return NVPTXISD::TexUnifiedCubeFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: + return NVPTXISD::TexUnifiedCubeS32Float; + case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: + return NVPTXISD::TexUnifiedCubeS32FloatLevel; + case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: + return NVPTXISD::TexUnifiedCubeU32Float; + case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: + return NVPTXISD::TexUnifiedCubeU32FloatLevel; + + case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: + return NVPTXISD::TexUnifiedCubeArrayFloatFloat; + case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: + return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: + return NVPTXISD::TexUnifiedCubeArrayS32Float; + case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: + return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel; + case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: + return NVPTXISD::TexUnifiedCubeArrayU32Float; + case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: + return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel; + + case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: + return NVPTXISD::Tld4UnifiedR2DFloatFloat; + case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: + return NVPTXISD::Tld4UnifiedG2DFloatFloat; + case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: + return NVPTXISD::Tld4UnifiedB2DFloatFloat; + case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: + return NVPTXISD::Tld4UnifiedA2DFloatFloat; + case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: + return NVPTXISD::Tld4UnifiedR2DS64Float; + case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: + return NVPTXISD::Tld4UnifiedG2DS64Float; + case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: + return NVPTXISD::Tld4UnifiedB2DS64Float; + case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: + return NVPTXISD::Tld4UnifiedA2DS64Float; + case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: + return NVPTXISD::Tld4UnifiedR2DU64Float; + case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: + return NVPTXISD::Tld4UnifiedG2DU64Float; + case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: + return NVPTXISD::Tld4UnifiedB2DU64Float; + case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: + return NVPTXISD::Tld4UnifiedA2DU64Float; } } @@ -2223,18 +2886,132 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { switch (Intrinsic) { default: return 0; + case Intrinsic::nvvm_suld_1d_i8_clamp: + return NVPTXISD::Suld1DI8Clamp; + case Intrinsic::nvvm_suld_1d_i16_clamp: + return NVPTXISD::Suld1DI16Clamp; + case Intrinsic::nvvm_suld_1d_i32_clamp: + return NVPTXISD::Suld1DI32Clamp; + case Intrinsic::nvvm_suld_1d_i64_clamp: + return NVPTXISD::Suld1DI64Clamp; + case Intrinsic::nvvm_suld_1d_v2i8_clamp: + return NVPTXISD::Suld1DV2I8Clamp; + case Intrinsic::nvvm_suld_1d_v2i16_clamp: + return NVPTXISD::Suld1DV2I16Clamp; + case Intrinsic::nvvm_suld_1d_v2i32_clamp: + return NVPTXISD::Suld1DV2I32Clamp; + case Intrinsic::nvvm_suld_1d_v2i64_clamp: + return NVPTXISD::Suld1DV2I64Clamp; + case Intrinsic::nvvm_suld_1d_v4i8_clamp: + return NVPTXISD::Suld1DV4I8Clamp; + case Intrinsic::nvvm_suld_1d_v4i16_clamp: + return NVPTXISD::Suld1DV4I16Clamp; + case Intrinsic::nvvm_suld_1d_v4i32_clamp: + return NVPTXISD::Suld1DV4I32Clamp; + case Intrinsic::nvvm_suld_1d_array_i8_clamp: + return NVPTXISD::Suld1DArrayI8Clamp; + case Intrinsic::nvvm_suld_1d_array_i16_clamp: + return NVPTXISD::Suld1DArrayI16Clamp; + case Intrinsic::nvvm_suld_1d_array_i32_clamp: + return NVPTXISD::Suld1DArrayI32Clamp; + case Intrinsic::nvvm_suld_1d_array_i64_clamp: + return NVPTXISD::Suld1DArrayI64Clamp; + case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: + return NVPTXISD::Suld1DArrayV2I8Clamp; + case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: + return NVPTXISD::Suld1DArrayV2I16Clamp; + case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: + return NVPTXISD::Suld1DArrayV2I32Clamp; + case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: + return NVPTXISD::Suld1DArrayV2I64Clamp; + case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: + return NVPTXISD::Suld1DArrayV4I8Clamp; + case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: + return NVPTXISD::Suld1DArrayV4I16Clamp; + case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: + return NVPTXISD::Suld1DArrayV4I32Clamp; + case Intrinsic::nvvm_suld_2d_i8_clamp: + return NVPTXISD::Suld2DI8Clamp; + case Intrinsic::nvvm_suld_2d_i16_clamp: + return NVPTXISD::Suld2DI16Clamp; + case Intrinsic::nvvm_suld_2d_i32_clamp: + return NVPTXISD::Suld2DI32Clamp; + case Intrinsic::nvvm_suld_2d_i64_clamp: + return NVPTXISD::Suld2DI64Clamp; + case Intrinsic::nvvm_suld_2d_v2i8_clamp: + return NVPTXISD::Suld2DV2I8Clamp; + case Intrinsic::nvvm_suld_2d_v2i16_clamp: + return NVPTXISD::Suld2DV2I16Clamp; + case Intrinsic::nvvm_suld_2d_v2i32_clamp: + return NVPTXISD::Suld2DV2I32Clamp; + case Intrinsic::nvvm_suld_2d_v2i64_clamp: + return NVPTXISD::Suld2DV2I64Clamp; + case Intrinsic::nvvm_suld_2d_v4i8_clamp: + return NVPTXISD::Suld2DV4I8Clamp; + case Intrinsic::nvvm_suld_2d_v4i16_clamp: + return NVPTXISD::Suld2DV4I16Clamp; + case Intrinsic::nvvm_suld_2d_v4i32_clamp: + return NVPTXISD::Suld2DV4I32Clamp; + case Intrinsic::nvvm_suld_2d_array_i8_clamp: + return NVPTXISD::Suld2DArrayI8Clamp; + case Intrinsic::nvvm_suld_2d_array_i16_clamp: + return NVPTXISD::Suld2DArrayI16Clamp; + case Intrinsic::nvvm_suld_2d_array_i32_clamp: + return NVPTXISD::Suld2DArrayI32Clamp; + case Intrinsic::nvvm_suld_2d_array_i64_clamp: + return NVPTXISD::Suld2DArrayI64Clamp; + case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: + return NVPTXISD::Suld2DArrayV2I8Clamp; + case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: + return NVPTXISD::Suld2DArrayV2I16Clamp; + case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: + return NVPTXISD::Suld2DArrayV2I32Clamp; + case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: + return NVPTXISD::Suld2DArrayV2I64Clamp; + case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: + return NVPTXISD::Suld2DArrayV4I8Clamp; + case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: + return NVPTXISD::Suld2DArrayV4I16Clamp; + case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: + return NVPTXISD::Suld2DArrayV4I32Clamp; + case Intrinsic::nvvm_suld_3d_i8_clamp: + return NVPTXISD::Suld3DI8Clamp; + case Intrinsic::nvvm_suld_3d_i16_clamp: + return NVPTXISD::Suld3DI16Clamp; + case Intrinsic::nvvm_suld_3d_i32_clamp: + return NVPTXISD::Suld3DI32Clamp; + case Intrinsic::nvvm_suld_3d_i64_clamp: + return NVPTXISD::Suld3DI64Clamp; + case Intrinsic::nvvm_suld_3d_v2i8_clamp: + return NVPTXISD::Suld3DV2I8Clamp; + case Intrinsic::nvvm_suld_3d_v2i16_clamp: + return NVPTXISD::Suld3DV2I16Clamp; + case Intrinsic::nvvm_suld_3d_v2i32_clamp: + return NVPTXISD::Suld3DV2I32Clamp; + case Intrinsic::nvvm_suld_3d_v2i64_clamp: + return NVPTXISD::Suld3DV2I64Clamp; + case Intrinsic::nvvm_suld_3d_v4i8_clamp: + return NVPTXISD::Suld3DV4I8Clamp; + case Intrinsic::nvvm_suld_3d_v4i16_clamp: + return NVPTXISD::Suld3DV4I16Clamp; + case Intrinsic::nvvm_suld_3d_v4i32_clamp: + return NVPTXISD::Suld3DV4I32Clamp; case Intrinsic::nvvm_suld_1d_i8_trap: return NVPTXISD::Suld1DI8Trap; case Intrinsic::nvvm_suld_1d_i16_trap: return NVPTXISD::Suld1DI16Trap; case Intrinsic::nvvm_suld_1d_i32_trap: return NVPTXISD::Suld1DI32Trap; + case Intrinsic::nvvm_suld_1d_i64_trap: + return NVPTXISD::Suld1DI64Trap; case Intrinsic::nvvm_suld_1d_v2i8_trap: return NVPTXISD::Suld1DV2I8Trap; case Intrinsic::nvvm_suld_1d_v2i16_trap: return NVPTXISD::Suld1DV2I16Trap; case Intrinsic::nvvm_suld_1d_v2i32_trap: return NVPTXISD::Suld1DV2I32Trap; + case Intrinsic::nvvm_suld_1d_v2i64_trap: + return NVPTXISD::Suld1DV2I64Trap; case Intrinsic::nvvm_suld_1d_v4i8_trap: return NVPTXISD::Suld1DV4I8Trap; case Intrinsic::nvvm_suld_1d_v4i16_trap: @@ -2247,12 +3024,16 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { return NVPTXISD::Suld1DArrayI16Trap; case Intrinsic::nvvm_suld_1d_array_i32_trap: return NVPTXISD::Suld1DArrayI32Trap; + case Intrinsic::nvvm_suld_1d_array_i64_trap: + return NVPTXISD::Suld1DArrayI64Trap; case Intrinsic::nvvm_suld_1d_array_v2i8_trap: return NVPTXISD::Suld1DArrayV2I8Trap; case Intrinsic::nvvm_suld_1d_array_v2i16_trap: return NVPTXISD::Suld1DArrayV2I16Trap; case Intrinsic::nvvm_suld_1d_array_v2i32_trap: return NVPTXISD::Suld1DArrayV2I32Trap; + case Intrinsic::nvvm_suld_1d_array_v2i64_trap: + return NVPTXISD::Suld1DArrayV2I64Trap; case Intrinsic::nvvm_suld_1d_array_v4i8_trap: return NVPTXISD::Suld1DArrayV4I8Trap; case Intrinsic::nvvm_suld_1d_array_v4i16_trap: @@ -2265,12 +3046,16 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { return NVPTXISD::Suld2DI16Trap; case Intrinsic::nvvm_suld_2d_i32_trap: return NVPTXISD::Suld2DI32Trap; + case Intrinsic::nvvm_suld_2d_i64_trap: + return NVPTXISD::Suld2DI64Trap; case Intrinsic::nvvm_suld_2d_v2i8_trap: return NVPTXISD::Suld2DV2I8Trap; case Intrinsic::nvvm_suld_2d_v2i16_trap: return NVPTXISD::Suld2DV2I16Trap; case Intrinsic::nvvm_suld_2d_v2i32_trap: return NVPTXISD::Suld2DV2I32Trap; + case Intrinsic::nvvm_suld_2d_v2i64_trap: + return NVPTXISD::Suld2DV2I64Trap; case Intrinsic::nvvm_suld_2d_v4i8_trap: return NVPTXISD::Suld2DV4I8Trap; case Intrinsic::nvvm_suld_2d_v4i16_trap: @@ -2283,12 +3068,16 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { return NVPTXISD::Suld2DArrayI16Trap; case Intrinsic::nvvm_suld_2d_array_i32_trap: return NVPTXISD::Suld2DArrayI32Trap; + case Intrinsic::nvvm_suld_2d_array_i64_trap: + return NVPTXISD::Suld2DArrayI64Trap; case Intrinsic::nvvm_suld_2d_array_v2i8_trap: return NVPTXISD::Suld2DArrayV2I8Trap; case Intrinsic::nvvm_suld_2d_array_v2i16_trap: return NVPTXISD::Suld2DArrayV2I16Trap; case Intrinsic::nvvm_suld_2d_array_v2i32_trap: return NVPTXISD::Suld2DArrayV2I32Trap; + case Intrinsic::nvvm_suld_2d_array_v2i64_trap: + return NVPTXISD::Suld2DArrayV2I64Trap; case Intrinsic::nvvm_suld_2d_array_v4i8_trap: return NVPTXISD::Suld2DArrayV4I8Trap; case Intrinsic::nvvm_suld_2d_array_v4i16_trap: @@ -2301,18 +3090,132 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { return NVPTXISD::Suld3DI16Trap; case Intrinsic::nvvm_suld_3d_i32_trap: return NVPTXISD::Suld3DI32Trap; + case Intrinsic::nvvm_suld_3d_i64_trap: + return NVPTXISD::Suld3DI64Trap; case Intrinsic::nvvm_suld_3d_v2i8_trap: return NVPTXISD::Suld3DV2I8Trap; case Intrinsic::nvvm_suld_3d_v2i16_trap: return NVPTXISD::Suld3DV2I16Trap; case Intrinsic::nvvm_suld_3d_v2i32_trap: return NVPTXISD::Suld3DV2I32Trap; + case Intrinsic::nvvm_suld_3d_v2i64_trap: + return NVPTXISD::Suld3DV2I64Trap; case Intrinsic::nvvm_suld_3d_v4i8_trap: return NVPTXISD::Suld3DV4I8Trap; case Intrinsic::nvvm_suld_3d_v4i16_trap: return NVPTXISD::Suld3DV4I16Trap; case Intrinsic::nvvm_suld_3d_v4i32_trap: return NVPTXISD::Suld3DV4I32Trap; + case Intrinsic::nvvm_suld_1d_i8_zero: + return NVPTXISD::Suld1DI8Zero; + case Intrinsic::nvvm_suld_1d_i16_zero: + return NVPTXISD::Suld1DI16Zero; + case Intrinsic::nvvm_suld_1d_i32_zero: + return NVPTXISD::Suld1DI32Zero; + case Intrinsic::nvvm_suld_1d_i64_zero: + return NVPTXISD::Suld1DI64Zero; + case Intrinsic::nvvm_suld_1d_v2i8_zero: + return NVPTXISD::Suld1DV2I8Zero; + case Intrinsic::nvvm_suld_1d_v2i16_zero: + return NVPTXISD::Suld1DV2I16Zero; + case Intrinsic::nvvm_suld_1d_v2i32_zero: + return NVPTXISD::Suld1DV2I32Zero; + case Intrinsic::nvvm_suld_1d_v2i64_zero: + return NVPTXISD::Suld1DV2I64Zero; + case Intrinsic::nvvm_suld_1d_v4i8_zero: + return NVPTXISD::Suld1DV4I8Zero; + case Intrinsic::nvvm_suld_1d_v4i16_zero: + return NVPTXISD::Suld1DV4I16Zero; + case Intrinsic::nvvm_suld_1d_v4i32_zero: + return NVPTXISD::Suld1DV4I32Zero; + case Intrinsic::nvvm_suld_1d_array_i8_zero: + return NVPTXISD::Suld1DArrayI8Zero; + case Intrinsic::nvvm_suld_1d_array_i16_zero: + return NVPTXISD::Suld1DArrayI16Zero; + case Intrinsic::nvvm_suld_1d_array_i32_zero: + return NVPTXISD::Suld1DArrayI32Zero; + case Intrinsic::nvvm_suld_1d_array_i64_zero: + return NVPTXISD::Suld1DArrayI64Zero; + case Intrinsic::nvvm_suld_1d_array_v2i8_zero: + return NVPTXISD::Suld1DArrayV2I8Zero; + case Intrinsic::nvvm_suld_1d_array_v2i16_zero: + return NVPTXISD::Suld1DArrayV2I16Zero; + case Intrinsic::nvvm_suld_1d_array_v2i32_zero: + return NVPTXISD::Suld1DArrayV2I32Zero; + case Intrinsic::nvvm_suld_1d_array_v2i64_zero: + return NVPTXISD::Suld1DArrayV2I64Zero; + case Intrinsic::nvvm_suld_1d_array_v4i8_zero: + return NVPTXISD::Suld1DArrayV4I8Zero; + case Intrinsic::nvvm_suld_1d_array_v4i16_zero: + return NVPTXISD::Suld1DArrayV4I16Zero; + case Intrinsic::nvvm_suld_1d_array_v4i32_zero: + return NVPTXISD::Suld1DArrayV4I32Zero; + case Intrinsic::nvvm_suld_2d_i8_zero: + return NVPTXISD::Suld2DI8Zero; + case Intrinsic::nvvm_suld_2d_i16_zero: + return NVPTXISD::Suld2DI16Zero; + case Intrinsic::nvvm_suld_2d_i32_zero: + return NVPTXISD::Suld2DI32Zero; + case Intrinsic::nvvm_suld_2d_i64_zero: + return NVPTXISD::Suld2DI64Zero; + case Intrinsic::nvvm_suld_2d_v2i8_zero: + return NVPTXISD::Suld2DV2I8Zero; + case Intrinsic::nvvm_suld_2d_v2i16_zero: + return NVPTXISD::Suld2DV2I16Zero; + case Intrinsic::nvvm_suld_2d_v2i32_zero: + return NVPTXISD::Suld2DV2I32Zero; + case Intrinsic::nvvm_suld_2d_v2i64_zero: + return NVPTXISD::Suld2DV2I64Zero; + case Intrinsic::nvvm_suld_2d_v4i8_zero: + return NVPTXISD::Suld2DV4I8Zero; + case Intrinsic::nvvm_suld_2d_v4i16_zero: + return NVPTXISD::Suld2DV4I16Zero; + case Intrinsic::nvvm_suld_2d_v4i32_zero: + return NVPTXISD::Suld2DV4I32Zero; + case Intrinsic::nvvm_suld_2d_array_i8_zero: + return NVPTXISD::Suld2DArrayI8Zero; + case Intrinsic::nvvm_suld_2d_array_i16_zero: + return NVPTXISD::Suld2DArrayI16Zero; + case Intrinsic::nvvm_suld_2d_array_i32_zero: + return NVPTXISD::Suld2DArrayI32Zero; + case Intrinsic::nvvm_suld_2d_array_i64_zero: + return NVPTXISD::Suld2DArrayI64Zero; + case Intrinsic::nvvm_suld_2d_array_v2i8_zero: + return NVPTXISD::Suld2DArrayV2I8Zero; + case Intrinsic::nvvm_suld_2d_array_v2i16_zero: + return NVPTXISD::Suld2DArrayV2I16Zero; + case Intrinsic::nvvm_suld_2d_array_v2i32_zero: + return NVPTXISD::Suld2DArrayV2I32Zero; + case Intrinsic::nvvm_suld_2d_array_v2i64_zero: + return NVPTXISD::Suld2DArrayV2I64Zero; + case Intrinsic::nvvm_suld_2d_array_v4i8_zero: + return NVPTXISD::Suld2DArrayV4I8Zero; + case Intrinsic::nvvm_suld_2d_array_v4i16_zero: + return NVPTXISD::Suld2DArrayV4I16Zero; + case Intrinsic::nvvm_suld_2d_array_v4i32_zero: + return NVPTXISD::Suld2DArrayV4I32Zero; + case Intrinsic::nvvm_suld_3d_i8_zero: + return NVPTXISD::Suld3DI8Zero; + case Intrinsic::nvvm_suld_3d_i16_zero: + return NVPTXISD::Suld3DI16Zero; + case Intrinsic::nvvm_suld_3d_i32_zero: + return NVPTXISD::Suld3DI32Zero; + case Intrinsic::nvvm_suld_3d_i64_zero: + return NVPTXISD::Suld3DI64Zero; + case Intrinsic::nvvm_suld_3d_v2i8_zero: + return NVPTXISD::Suld3DV2I8Zero; + case Intrinsic::nvvm_suld_3d_v2i16_zero: + return NVPTXISD::Suld3DV2I16Zero; + case Intrinsic::nvvm_suld_3d_v2i32_zero: + return NVPTXISD::Suld3DV2I32Zero; + case Intrinsic::nvvm_suld_3d_v2i64_zero: + return NVPTXISD::Suld3DV2I64Zero; + case Intrinsic::nvvm_suld_3d_v4i8_zero: + return NVPTXISD::Suld3DV4I8Zero; + case Intrinsic::nvvm_suld_3d_v4i16_zero: + return NVPTXISD::Suld3DV4I16Zero; + case Intrinsic::nvvm_suld_3d_v4i32_zero: + return NVPTXISD::Suld3DV4I32Zero; } } @@ -2366,16 +3269,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.vol = 0; Info.readMem = true; Info.writeMem = false; - - // alignment is available as metadata. - // Grab it and set the alignment. - assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata"); - MDNode *AlignMD = I.getMetadata("align"); - assert(AlignMD && "Must have a non-null MDNode"); - assert(AlignMD->getNumOperands() == 1 && "Must have a single operand"); - Value *Align = AlignMD->getOperand(0); - int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue(); - Info.align = Alignment; + Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); return true; } @@ -2395,42 +3289,69 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.vol = 0; Info.readMem = true; Info.writeMem = false; - - // alignment is available as metadata. - // Grab it and set the alignment. - assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata"); - MDNode *AlignMD = I.getMetadata("align"); - assert(AlignMD && "Must have a non-null MDNode"); - assert(AlignMD->getNumOperands() == 1 && "Must have a single operand"); - Value *Align = AlignMD->getOperand(0); - int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue(); - Info.align = Alignment; + Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); return true; } - case Intrinsic::nvvm_tex_1d_v4f32_i32: + case Intrinsic::nvvm_tex_1d_v4f32_s32: case Intrinsic::nvvm_tex_1d_v4f32_f32: case Intrinsic::nvvm_tex_1d_level_v4f32_f32: case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: - case Intrinsic::nvvm_tex_1d_array_v4f32_i32: + case Intrinsic::nvvm_tex_1d_array_v4f32_s32: case Intrinsic::nvvm_tex_1d_array_v4f32_f32: case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: - case Intrinsic::nvvm_tex_2d_v4f32_i32: + case Intrinsic::nvvm_tex_2d_v4f32_s32: case Intrinsic::nvvm_tex_2d_v4f32_f32: case Intrinsic::nvvm_tex_2d_level_v4f32_f32: case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: - case Intrinsic::nvvm_tex_2d_array_v4f32_i32: + case Intrinsic::nvvm_tex_2d_array_v4f32_s32: case Intrinsic::nvvm_tex_2d_array_v4f32_f32: case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: - case Intrinsic::nvvm_tex_3d_v4f32_i32: + case Intrinsic::nvvm_tex_3d_v4f32_s32: case Intrinsic::nvvm_tex_3d_v4f32_f32: case Intrinsic::nvvm_tex_3d_level_v4f32_f32: - case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: { + case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_cube_v4f32_f32: + case Intrinsic::nvvm_tex_cube_level_v4f32_f32: + case Intrinsic::nvvm_tex_cube_array_v4f32_f32: + case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: + case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: + case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: + case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: + case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: + case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: + case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: + case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: + case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: + case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: + case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: { Info.opc = getOpcForTextureInstr(Intrinsic); - Info.memVT = MVT::f32; + Info.memVT = MVT::v4f32; Info.ptrVal = nullptr; Info.offset = 0; Info.vol = 0; @@ -2439,28 +3360,120 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.align = 16; return true; } - case Intrinsic::nvvm_tex_1d_v4i32_i32: - case Intrinsic::nvvm_tex_1d_v4i32_f32: - case Intrinsic::nvvm_tex_1d_level_v4i32_f32: - case Intrinsic::nvvm_tex_1d_grad_v4i32_f32: - case Intrinsic::nvvm_tex_1d_array_v4i32_i32: - case Intrinsic::nvvm_tex_1d_array_v4i32_f32: - case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32: - case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32: - case Intrinsic::nvvm_tex_2d_v4i32_i32: - case Intrinsic::nvvm_tex_2d_v4i32_f32: - case Intrinsic::nvvm_tex_2d_level_v4i32_f32: - case Intrinsic::nvvm_tex_2d_grad_v4i32_f32: - case Intrinsic::nvvm_tex_2d_array_v4i32_i32: - case Intrinsic::nvvm_tex_2d_array_v4i32_f32: - case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32: - case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32: - case Intrinsic::nvvm_tex_3d_v4i32_i32: - case Intrinsic::nvvm_tex_3d_v4i32_f32: - case Intrinsic::nvvm_tex_3d_level_v4i32_f32: - case Intrinsic::nvvm_tex_3d_grad_v4i32_f32: { + case Intrinsic::nvvm_tex_1d_v4s32_s32: + case Intrinsic::nvvm_tex_1d_v4s32_f32: + case Intrinsic::nvvm_tex_1d_level_v4s32_f32: + case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_1d_array_v4s32_s32: + case Intrinsic::nvvm_tex_1d_array_v4s32_f32: + case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: + case Intrinsic::nvvm_tex_2d_v4s32_s32: + case Intrinsic::nvvm_tex_2d_v4s32_f32: + case Intrinsic::nvvm_tex_2d_level_v4s32_f32: + case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_2d_array_v4s32_s32: + case Intrinsic::nvvm_tex_2d_array_v4s32_f32: + case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: + case Intrinsic::nvvm_tex_3d_v4s32_s32: + case Intrinsic::nvvm_tex_3d_v4s32_f32: + case Intrinsic::nvvm_tex_3d_level_v4s32_f32: + case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_cube_v4s32_f32: + case Intrinsic::nvvm_tex_cube_level_v4s32_f32: + case Intrinsic::nvvm_tex_cube_array_v4s32_f32: + case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_cube_v4u32_f32: + case Intrinsic::nvvm_tex_cube_level_v4u32_f32: + case Intrinsic::nvvm_tex_cube_array_v4u32_f32: + case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: + case Intrinsic::nvvm_tex_1d_v4u32_s32: + case Intrinsic::nvvm_tex_1d_v4u32_f32: + case Intrinsic::nvvm_tex_1d_level_v4u32_f32: + case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: + case Intrinsic::nvvm_tex_1d_array_v4u32_s32: + case Intrinsic::nvvm_tex_1d_array_v4u32_f32: + case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: + case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: + case Intrinsic::nvvm_tex_2d_v4u32_s32: + case Intrinsic::nvvm_tex_2d_v4u32_f32: + case Intrinsic::nvvm_tex_2d_level_v4u32_f32: + case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: + case Intrinsic::nvvm_tex_2d_array_v4u32_s32: + case Intrinsic::nvvm_tex_2d_array_v4u32_f32: + case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: + case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: + case Intrinsic::nvvm_tex_3d_v4u32_s32: + case Intrinsic::nvvm_tex_3d_v4u32_f32: + case Intrinsic::nvvm_tex_3d_level_v4u32_f32: + case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: + case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: + case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: + case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: + case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: + case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: + case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: + case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: + case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: + case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: + case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: + case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: + case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: + case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: + case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: + case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: + case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: + case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: + case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: + case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: { Info.opc = getOpcForTextureInstr(Intrinsic); - Info.memVT = MVT::i32; + Info.memVT = MVT::v4i32; Info.ptrVal = nullptr; Info.offset = 0; Info.vol = 0; @@ -2469,6 +3482,21 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.align = 16; return true; } + case Intrinsic::nvvm_suld_1d_i8_clamp: + case Intrinsic::nvvm_suld_1d_v2i8_clamp: + case Intrinsic::nvvm_suld_1d_v4i8_clamp: + case Intrinsic::nvvm_suld_1d_array_i8_clamp: + case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: + case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: + case Intrinsic::nvvm_suld_2d_i8_clamp: + case Intrinsic::nvvm_suld_2d_v2i8_clamp: + case Intrinsic::nvvm_suld_2d_v4i8_clamp: + case Intrinsic::nvvm_suld_2d_array_i8_clamp: + case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: + case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: + case Intrinsic::nvvm_suld_3d_i8_clamp: + case Intrinsic::nvvm_suld_3d_v2i8_clamp: + case Intrinsic::nvvm_suld_3d_v4i8_clamp: case Intrinsic::nvvm_suld_1d_i8_trap: case Intrinsic::nvvm_suld_1d_v2i8_trap: case Intrinsic::nvvm_suld_1d_v4i8_trap: @@ -2483,7 +3511,22 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( case Intrinsic::nvvm_suld_2d_array_v4i8_trap: case Intrinsic::nvvm_suld_3d_i8_trap: case Intrinsic::nvvm_suld_3d_v2i8_trap: - case Intrinsic::nvvm_suld_3d_v4i8_trap: { + case Intrinsic::nvvm_suld_3d_v4i8_trap: + case Intrinsic::nvvm_suld_1d_i8_zero: + case Intrinsic::nvvm_suld_1d_v2i8_zero: + case Intrinsic::nvvm_suld_1d_v4i8_zero: + case Intrinsic::nvvm_suld_1d_array_i8_zero: + case Intrinsic::nvvm_suld_1d_array_v2i8_zero: + case Intrinsic::nvvm_suld_1d_array_v4i8_zero: + case Intrinsic::nvvm_suld_2d_i8_zero: + case Intrinsic::nvvm_suld_2d_v2i8_zero: + case Intrinsic::nvvm_suld_2d_v4i8_zero: + case Intrinsic::nvvm_suld_2d_array_i8_zero: + case Intrinsic::nvvm_suld_2d_array_v2i8_zero: + case Intrinsic::nvvm_suld_2d_array_v4i8_zero: + case Intrinsic::nvvm_suld_3d_i8_zero: + case Intrinsic::nvvm_suld_3d_v2i8_zero: + case Intrinsic::nvvm_suld_3d_v4i8_zero: { Info.opc = getOpcForSurfaceInstr(Intrinsic); Info.memVT = MVT::i8; Info.ptrVal = nullptr; @@ -2494,6 +3537,21 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.align = 16; return true; } + case Intrinsic::nvvm_suld_1d_i16_clamp: + case Intrinsic::nvvm_suld_1d_v2i16_clamp: + case Intrinsic::nvvm_suld_1d_v4i16_clamp: + case Intrinsic::nvvm_suld_1d_array_i16_clamp: + case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: + case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: + case Intrinsic::nvvm_suld_2d_i16_clamp: + case Intrinsic::nvvm_suld_2d_v2i16_clamp: + case Intrinsic::nvvm_suld_2d_v4i16_clamp: + case Intrinsic::nvvm_suld_2d_array_i16_clamp: + case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: + case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: + case Intrinsic::nvvm_suld_3d_i16_clamp: + case Intrinsic::nvvm_suld_3d_v2i16_clamp: + case Intrinsic::nvvm_suld_3d_v4i16_clamp: case Intrinsic::nvvm_suld_1d_i16_trap: case Intrinsic::nvvm_suld_1d_v2i16_trap: case Intrinsic::nvvm_suld_1d_v4i16_trap: @@ -2508,7 +3566,22 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( case Intrinsic::nvvm_suld_2d_array_v4i16_trap: case Intrinsic::nvvm_suld_3d_i16_trap: case Intrinsic::nvvm_suld_3d_v2i16_trap: - case Intrinsic::nvvm_suld_3d_v4i16_trap: { + case Intrinsic::nvvm_suld_3d_v4i16_trap: + case Intrinsic::nvvm_suld_1d_i16_zero: + case Intrinsic::nvvm_suld_1d_v2i16_zero: + case Intrinsic::nvvm_suld_1d_v4i16_zero: + case Intrinsic::nvvm_suld_1d_array_i16_zero: + case Intrinsic::nvvm_suld_1d_array_v2i16_zero: + case Intrinsic::nvvm_suld_1d_array_v4i16_zero: + case Intrinsic::nvvm_suld_2d_i16_zero: + case Intrinsic::nvvm_suld_2d_v2i16_zero: + case Intrinsic::nvvm_suld_2d_v4i16_zero: + case Intrinsic::nvvm_suld_2d_array_i16_zero: + case Intrinsic::nvvm_suld_2d_array_v2i16_zero: + case Intrinsic::nvvm_suld_2d_array_v4i16_zero: + case Intrinsic::nvvm_suld_3d_i16_zero: + case Intrinsic::nvvm_suld_3d_v2i16_zero: + case Intrinsic::nvvm_suld_3d_v4i16_zero: { Info.opc = getOpcForSurfaceInstr(Intrinsic); Info.memVT = MVT::i16; Info.ptrVal = nullptr; @@ -2519,6 +3592,21 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.align = 16; return true; } + case Intrinsic::nvvm_suld_1d_i32_clamp: + case Intrinsic::nvvm_suld_1d_v2i32_clamp: + case Intrinsic::nvvm_suld_1d_v4i32_clamp: + case Intrinsic::nvvm_suld_1d_array_i32_clamp: + case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: + case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: + case Intrinsic::nvvm_suld_2d_i32_clamp: + case Intrinsic::nvvm_suld_2d_v2i32_clamp: + case Intrinsic::nvvm_suld_2d_v4i32_clamp: + case Intrinsic::nvvm_suld_2d_array_i32_clamp: + case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: + case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: + case Intrinsic::nvvm_suld_3d_i32_clamp: + case Intrinsic::nvvm_suld_3d_v2i32_clamp: + case Intrinsic::nvvm_suld_3d_v4i32_clamp: case Intrinsic::nvvm_suld_1d_i32_trap: case Intrinsic::nvvm_suld_1d_v2i32_trap: case Intrinsic::nvvm_suld_1d_v4i32_trap: @@ -2533,7 +3621,22 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( case Intrinsic::nvvm_suld_2d_array_v4i32_trap: case Intrinsic::nvvm_suld_3d_i32_trap: case Intrinsic::nvvm_suld_3d_v2i32_trap: - case Intrinsic::nvvm_suld_3d_v4i32_trap: { + case Intrinsic::nvvm_suld_3d_v4i32_trap: + case Intrinsic::nvvm_suld_1d_i32_zero: + case Intrinsic::nvvm_suld_1d_v2i32_zero: + case Intrinsic::nvvm_suld_1d_v4i32_zero: + case Intrinsic::nvvm_suld_1d_array_i32_zero: + case Intrinsic::nvvm_suld_1d_array_v2i32_zero: + case Intrinsic::nvvm_suld_1d_array_v4i32_zero: + case Intrinsic::nvvm_suld_2d_i32_zero: + case Intrinsic::nvvm_suld_2d_v2i32_zero: + case Intrinsic::nvvm_suld_2d_v4i32_zero: + case Intrinsic::nvvm_suld_2d_array_i32_zero: + case Intrinsic::nvvm_suld_2d_array_v2i32_zero: + case Intrinsic::nvvm_suld_2d_array_v4i32_zero: + case Intrinsic::nvvm_suld_3d_i32_zero: + case Intrinsic::nvvm_suld_3d_v2i32_zero: + case Intrinsic::nvvm_suld_3d_v4i32_zero: { Info.opc = getOpcForSurfaceInstr(Intrinsic); Info.memVT = MVT::i32; Info.ptrVal = nullptr; @@ -2544,7 +3647,46 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.align = 16; return true; } - + case Intrinsic::nvvm_suld_1d_i64_clamp: + case Intrinsic::nvvm_suld_1d_v2i64_clamp: + case Intrinsic::nvvm_suld_1d_array_i64_clamp: + case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: + case Intrinsic::nvvm_suld_2d_i64_clamp: + case Intrinsic::nvvm_suld_2d_v2i64_clamp: + case Intrinsic::nvvm_suld_2d_array_i64_clamp: + case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: + case Intrinsic::nvvm_suld_3d_i64_clamp: + case Intrinsic::nvvm_suld_3d_v2i64_clamp: + case Intrinsic::nvvm_suld_1d_i64_trap: + case Intrinsic::nvvm_suld_1d_v2i64_trap: + case Intrinsic::nvvm_suld_1d_array_i64_trap: + case Intrinsic::nvvm_suld_1d_array_v2i64_trap: + case Intrinsic::nvvm_suld_2d_i64_trap: + case Intrinsic::nvvm_suld_2d_v2i64_trap: + case Intrinsic::nvvm_suld_2d_array_i64_trap: + case Intrinsic::nvvm_suld_2d_array_v2i64_trap: + case Intrinsic::nvvm_suld_3d_i64_trap: + case Intrinsic::nvvm_suld_3d_v2i64_trap: + case Intrinsic::nvvm_suld_1d_i64_zero: + case Intrinsic::nvvm_suld_1d_v2i64_zero: + case Intrinsic::nvvm_suld_1d_array_i64_zero: + case Intrinsic::nvvm_suld_1d_array_v2i64_zero: + case Intrinsic::nvvm_suld_2d_i64_zero: + case Intrinsic::nvvm_suld_2d_v2i64_zero: + case Intrinsic::nvvm_suld_2d_array_i64_zero: + case Intrinsic::nvvm_suld_2d_array_v2i64_zero: + case Intrinsic::nvvm_suld_3d_i64_zero: + case Intrinsic::nvvm_suld_3d_v2i64_zero: { + Info.opc = getOpcForSurfaceInstr(Intrinsic); + Info.memVT = MVT::i64; + Info.ptrVal = nullptr; + Info.offset = 0; + Info.vol = 0; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + } } return false; } @@ -2648,7 +3790,31 @@ unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { // NVPTX DAG Combining //===----------------------------------------------------------------------===// -extern unsigned FMAContractLevel; +bool NVPTXTargetLowering::allowFMA(MachineFunction &MF, + CodeGenOpt::Level OptLevel) const { + const Function *F = MF.getFunction(); + const TargetOptions &TO = MF.getTarget().Options; + + // Always honor command-line argument + if (FMAContractLevelOpt.getNumOccurrences() > 0) { + return FMAContractLevelOpt > 0; + } else if (OptLevel == 0) { + // Do not contract if we're not optimizing the code + return false; + } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) { + // Honor TargetOptions flags that explicitly say fusion is okay + return true; + } else if (F->hasFnAttribute("unsafe-fp-math")) { + // Check for unsafe-fp-math=true coming from Clang + Attribute Attr = F->getFnAttribute("unsafe-fp-math"); + StringRef Val = Attr.getValueAsString(); + if (Val == "true") + return true; + } + + // We did not have a clear indication that fusion is allowed, so assume not + return false; +} /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with /// operands N0 and N1. This is a helper for PerformADDCombine that is @@ -2682,7 +3848,9 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, } else if (N0.getOpcode() == ISD::FMUL) { if (VT == MVT::f32 || VT == MVT::f64) { - if (FMAContractLevel == 0) + const auto *TLI = static_cast<const NVPTXTargetLowering *>( + &DAG.getTargetLoweringInfo()); + if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel)) return SDValue(); // For floating point: @@ -2867,13 +4035,13 @@ static bool IsMulWideOperandDemotable(SDValue Op, if (Op.getOpcode() == ISD::SIGN_EXTEND || Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { EVT OrigVT = Op.getOperand(0).getValueType(); - if (OrigVT.getSizeInBits() == OptSize) { + if (OrigVT.getSizeInBits() <= OptSize) { S = Signed; return true; } } else if (Op.getOpcode() == ISD::ZERO_EXTEND) { EVT OrigVT = Op.getOperand(0).getValueType(); - if (OrigVT.getSizeInBits() == OptSize) { + if (OrigVT.getSizeInBits() <= OptSize) { S = Unsigned; return true; } @@ -3027,8 +4195,7 @@ static SDValue PerformSHLCombine(SDNode *N, SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { - // FIXME: Get this from the DAG somehow - CodeGenOpt::Level OptLevel = CodeGenOpt::Aggressive; + CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel(); switch (N->getOpcode()) { default: break; case ISD::ADD: @@ -3046,6 +4213,7 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, + const DataLayout *TD, SmallVectorImpl<SDValue> &Results) { EVT ResVT = N->getValueType(0); SDLoc DL(N); @@ -3073,6 +4241,20 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, break; } + LoadSDNode *LD = cast<LoadSDNode>(N); + + unsigned Align = LD->getAlignment(); + unsigned PrefAlign = + TD->getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext())); + if (Align < PrefAlign) { + // This load is not sufficiently aligned, so bail out and let this vector + // load be scalarized. Note that we may still be able to emit smaller + // vector loads. For example, if we are loading a <4 x float> with an + // alignment of 8, this check will fail but the legalizer will try again + // with 2 x <2 x float>, which will succeed with an alignment of 8. + return; + } + EVT EltVT = ResVT.getVectorElementType(); unsigned NumElts = ResVT.getVectorNumElements(); @@ -3109,8 +4291,6 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) OtherOps.push_back(N->getOperand(i)); - LoadSDNode *LD = cast<LoadSDNode>(N); - // The select routine does not have access to the LoadSDNode instance, so // pass along the extension information OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType())); @@ -3283,7 +4463,7 @@ void NVPTXTargetLowering::ReplaceNodeResults( default: report_fatal_error("Unhandled custom legalization"); case ISD::LOAD: - ReplaceLoadVector(N, DAG, Results); + ReplaceLoadVector(N, DAG, getDataLayout(), Results); return; case ISD::INTRINSIC_W_CHAIN: ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); @@ -3316,3 +4496,10 @@ NVPTXTargetObjectFile::~NVPTXTargetObjectFile() { delete DwarfRangesSection; delete DwarfMacroInfoSection; } + +const MCSection * +NVPTXTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, + SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { + return getDataSection(); +} diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 7b4026d..d66d81a 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTXISELLOWERING_H -#define NVPTXISELLOWERING_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H #include "NVPTX.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -77,54 +77,244 @@ enum NodeType { StoreRetvalV4, // Texture intrinsics - Tex1DFloatI32, + Tex1DFloatS32, Tex1DFloatFloat, Tex1DFloatFloatLevel, Tex1DFloatFloatGrad, - Tex1DI32I32, - Tex1DI32Float, - Tex1DI32FloatLevel, - Tex1DI32FloatGrad, - Tex1DArrayFloatI32, + Tex1DS32S32, + Tex1DS32Float, + Tex1DS32FloatLevel, + Tex1DS32FloatGrad, + Tex1DU32S32, + Tex1DU32Float, + Tex1DU32FloatLevel, + Tex1DU32FloatGrad, + Tex1DArrayFloatS32, Tex1DArrayFloatFloat, Tex1DArrayFloatFloatLevel, Tex1DArrayFloatFloatGrad, - Tex1DArrayI32I32, - Tex1DArrayI32Float, - Tex1DArrayI32FloatLevel, - Tex1DArrayI32FloatGrad, - Tex2DFloatI32, + Tex1DArrayS32S32, + Tex1DArrayS32Float, + Tex1DArrayS32FloatLevel, + Tex1DArrayS32FloatGrad, + Tex1DArrayU32S32, + Tex1DArrayU32Float, + Tex1DArrayU32FloatLevel, + Tex1DArrayU32FloatGrad, + Tex2DFloatS32, Tex2DFloatFloat, Tex2DFloatFloatLevel, Tex2DFloatFloatGrad, - Tex2DI32I32, - Tex2DI32Float, - Tex2DI32FloatLevel, - Tex2DI32FloatGrad, - Tex2DArrayFloatI32, + Tex2DS32S32, + Tex2DS32Float, + Tex2DS32FloatLevel, + Tex2DS32FloatGrad, + Tex2DU32S32, + Tex2DU32Float, + Tex2DU32FloatLevel, + Tex2DU32FloatGrad, + Tex2DArrayFloatS32, Tex2DArrayFloatFloat, Tex2DArrayFloatFloatLevel, Tex2DArrayFloatFloatGrad, - Tex2DArrayI32I32, - Tex2DArrayI32Float, - Tex2DArrayI32FloatLevel, - Tex2DArrayI32FloatGrad, - Tex3DFloatI32, + Tex2DArrayS32S32, + Tex2DArrayS32Float, + Tex2DArrayS32FloatLevel, + Tex2DArrayS32FloatGrad, + Tex2DArrayU32S32, + Tex2DArrayU32Float, + Tex2DArrayU32FloatLevel, + Tex2DArrayU32FloatGrad, + Tex3DFloatS32, Tex3DFloatFloat, Tex3DFloatFloatLevel, Tex3DFloatFloatGrad, - Tex3DI32I32, - Tex3DI32Float, - Tex3DI32FloatLevel, - Tex3DI32FloatGrad, + Tex3DS32S32, + Tex3DS32Float, + Tex3DS32FloatLevel, + Tex3DS32FloatGrad, + Tex3DU32S32, + Tex3DU32Float, + Tex3DU32FloatLevel, + Tex3DU32FloatGrad, + TexCubeFloatFloat, + TexCubeFloatFloatLevel, + TexCubeS32Float, + TexCubeS32FloatLevel, + TexCubeU32Float, + TexCubeU32FloatLevel, + TexCubeArrayFloatFloat, + TexCubeArrayFloatFloatLevel, + TexCubeArrayS32Float, + TexCubeArrayS32FloatLevel, + TexCubeArrayU32Float, + TexCubeArrayU32FloatLevel, + Tld4R2DFloatFloat, + Tld4G2DFloatFloat, + Tld4B2DFloatFloat, + Tld4A2DFloatFloat, + Tld4R2DS64Float, + Tld4G2DS64Float, + Tld4B2DS64Float, + Tld4A2DS64Float, + Tld4R2DU64Float, + Tld4G2DU64Float, + Tld4B2DU64Float, + Tld4A2DU64Float, + TexUnified1DFloatS32, + TexUnified1DFloatFloat, + TexUnified1DFloatFloatLevel, + TexUnified1DFloatFloatGrad, + TexUnified1DS32S32, + TexUnified1DS32Float, + TexUnified1DS32FloatLevel, + TexUnified1DS32FloatGrad, + TexUnified1DU32S32, + TexUnified1DU32Float, + TexUnified1DU32FloatLevel, + TexUnified1DU32FloatGrad, + TexUnified1DArrayFloatS32, + TexUnified1DArrayFloatFloat, + TexUnified1DArrayFloatFloatLevel, + TexUnified1DArrayFloatFloatGrad, + TexUnified1DArrayS32S32, + TexUnified1DArrayS32Float, + TexUnified1DArrayS32FloatLevel, + TexUnified1DArrayS32FloatGrad, + TexUnified1DArrayU32S32, + TexUnified1DArrayU32Float, + TexUnified1DArrayU32FloatLevel, + TexUnified1DArrayU32FloatGrad, + TexUnified2DFloatS32, + TexUnified2DFloatFloat, + TexUnified2DFloatFloatLevel, + TexUnified2DFloatFloatGrad, + TexUnified2DS32S32, + TexUnified2DS32Float, + TexUnified2DS32FloatLevel, + TexUnified2DS32FloatGrad, + TexUnified2DU32S32, + TexUnified2DU32Float, + TexUnified2DU32FloatLevel, + TexUnified2DU32FloatGrad, + TexUnified2DArrayFloatS32, + TexUnified2DArrayFloatFloat, + TexUnified2DArrayFloatFloatLevel, + TexUnified2DArrayFloatFloatGrad, + TexUnified2DArrayS32S32, + TexUnified2DArrayS32Float, + TexUnified2DArrayS32FloatLevel, + TexUnified2DArrayS32FloatGrad, + TexUnified2DArrayU32S32, + TexUnified2DArrayU32Float, + TexUnified2DArrayU32FloatLevel, + TexUnified2DArrayU32FloatGrad, + TexUnified3DFloatS32, + TexUnified3DFloatFloat, + TexUnified3DFloatFloatLevel, + TexUnified3DFloatFloatGrad, + TexUnified3DS32S32, + TexUnified3DS32Float, + TexUnified3DS32FloatLevel, + TexUnified3DS32FloatGrad, + TexUnified3DU32S32, + TexUnified3DU32Float, + TexUnified3DU32FloatLevel, + TexUnified3DU32FloatGrad, + TexUnifiedCubeFloatFloat, + TexUnifiedCubeFloatFloatLevel, + TexUnifiedCubeS32Float, + TexUnifiedCubeS32FloatLevel, + TexUnifiedCubeU32Float, + TexUnifiedCubeU32FloatLevel, + TexUnifiedCubeArrayFloatFloat, + TexUnifiedCubeArrayFloatFloatLevel, + TexUnifiedCubeArrayS32Float, + TexUnifiedCubeArrayS32FloatLevel, + TexUnifiedCubeArrayU32Float, + TexUnifiedCubeArrayU32FloatLevel, + Tld4UnifiedR2DFloatFloat, + Tld4UnifiedG2DFloatFloat, + Tld4UnifiedB2DFloatFloat, + Tld4UnifiedA2DFloatFloat, + Tld4UnifiedR2DS64Float, + Tld4UnifiedG2DS64Float, + Tld4UnifiedB2DS64Float, + Tld4UnifiedA2DS64Float, + Tld4UnifiedR2DU64Float, + Tld4UnifiedG2DU64Float, + Tld4UnifiedB2DU64Float, + Tld4UnifiedA2DU64Float, // Surface intrinsics + Suld1DI8Clamp, + Suld1DI16Clamp, + Suld1DI32Clamp, + Suld1DI64Clamp, + Suld1DV2I8Clamp, + Suld1DV2I16Clamp, + Suld1DV2I32Clamp, + Suld1DV2I64Clamp, + Suld1DV4I8Clamp, + Suld1DV4I16Clamp, + Suld1DV4I32Clamp, + + Suld1DArrayI8Clamp, + Suld1DArrayI16Clamp, + Suld1DArrayI32Clamp, + Suld1DArrayI64Clamp, + Suld1DArrayV2I8Clamp, + Suld1DArrayV2I16Clamp, + Suld1DArrayV2I32Clamp, + Suld1DArrayV2I64Clamp, + Suld1DArrayV4I8Clamp, + Suld1DArrayV4I16Clamp, + Suld1DArrayV4I32Clamp, + + Suld2DI8Clamp, + Suld2DI16Clamp, + Suld2DI32Clamp, + Suld2DI64Clamp, + Suld2DV2I8Clamp, + Suld2DV2I16Clamp, + Suld2DV2I32Clamp, + Suld2DV2I64Clamp, + Suld2DV4I8Clamp, + Suld2DV4I16Clamp, + Suld2DV4I32Clamp, + + Suld2DArrayI8Clamp, + Suld2DArrayI16Clamp, + Suld2DArrayI32Clamp, + Suld2DArrayI64Clamp, + Suld2DArrayV2I8Clamp, + Suld2DArrayV2I16Clamp, + Suld2DArrayV2I32Clamp, + Suld2DArrayV2I64Clamp, + Suld2DArrayV4I8Clamp, + Suld2DArrayV4I16Clamp, + Suld2DArrayV4I32Clamp, + + Suld3DI8Clamp, + Suld3DI16Clamp, + Suld3DI32Clamp, + Suld3DI64Clamp, + Suld3DV2I8Clamp, + Suld3DV2I16Clamp, + Suld3DV2I32Clamp, + Suld3DV2I64Clamp, + Suld3DV4I8Clamp, + Suld3DV4I16Clamp, + Suld3DV4I32Clamp, + Suld1DI8Trap, Suld1DI16Trap, Suld1DI32Trap, + Suld1DI64Trap, Suld1DV2I8Trap, Suld1DV2I16Trap, Suld1DV2I32Trap, + Suld1DV2I64Trap, Suld1DV4I8Trap, Suld1DV4I16Trap, Suld1DV4I32Trap, @@ -132,9 +322,11 @@ enum NodeType { Suld1DArrayI8Trap, Suld1DArrayI16Trap, Suld1DArrayI32Trap, + Suld1DArrayI64Trap, Suld1DArrayV2I8Trap, Suld1DArrayV2I16Trap, Suld1DArrayV2I32Trap, + Suld1DArrayV2I64Trap, Suld1DArrayV4I8Trap, Suld1DArrayV4I16Trap, Suld1DArrayV4I32Trap, @@ -142,9 +334,11 @@ enum NodeType { Suld2DI8Trap, Suld2DI16Trap, Suld2DI32Trap, + Suld2DI64Trap, Suld2DV2I8Trap, Suld2DV2I16Trap, Suld2DV2I32Trap, + Suld2DV2I64Trap, Suld2DV4I8Trap, Suld2DV4I16Trap, Suld2DV4I32Trap, @@ -152,9 +346,11 @@ enum NodeType { Suld2DArrayI8Trap, Suld2DArrayI16Trap, Suld2DArrayI32Trap, + Suld2DArrayI64Trap, Suld2DArrayV2I8Trap, Suld2DArrayV2I16Trap, Suld2DArrayV2I32Trap, + Suld2DArrayV2I64Trap, Suld2DArrayV4I8Trap, Suld2DArrayV4I16Trap, Suld2DArrayV4I32Trap, @@ -162,12 +358,74 @@ enum NodeType { Suld3DI8Trap, Suld3DI16Trap, Suld3DI32Trap, + Suld3DI64Trap, Suld3DV2I8Trap, Suld3DV2I16Trap, Suld3DV2I32Trap, + Suld3DV2I64Trap, Suld3DV4I8Trap, Suld3DV4I16Trap, - Suld3DV4I32Trap + Suld3DV4I32Trap, + + Suld1DI8Zero, + Suld1DI16Zero, + Suld1DI32Zero, + Suld1DI64Zero, + Suld1DV2I8Zero, + Suld1DV2I16Zero, + Suld1DV2I32Zero, + Suld1DV2I64Zero, + Suld1DV4I8Zero, + Suld1DV4I16Zero, + Suld1DV4I32Zero, + + Suld1DArrayI8Zero, + Suld1DArrayI16Zero, + Suld1DArrayI32Zero, + Suld1DArrayI64Zero, + Suld1DArrayV2I8Zero, + Suld1DArrayV2I16Zero, + Suld1DArrayV2I32Zero, + Suld1DArrayV2I64Zero, + Suld1DArrayV4I8Zero, + Suld1DArrayV4I16Zero, + Suld1DArrayV4I32Zero, + + Suld2DI8Zero, + Suld2DI16Zero, + Suld2DI32Zero, + Suld2DI64Zero, + Suld2DV2I8Zero, + Suld2DV2I16Zero, + Suld2DV2I32Zero, + Suld2DV2I64Zero, + Suld2DV4I8Zero, + Suld2DV4I16Zero, + Suld2DV4I32Zero, + + Suld2DArrayI8Zero, + Suld2DArrayI16Zero, + Suld2DArrayI32Zero, + Suld2DArrayI64Zero, + Suld2DArrayV2I8Zero, + Suld2DArrayV2I16Zero, + Suld2DArrayV2I32Zero, + Suld2DArrayV2I64Zero, + Suld2DArrayV4I8Zero, + Suld2DArrayV4I16Zero, + Suld2DArrayV4I32Zero, + + Suld3DI8Zero, + Suld3DI16Zero, + Suld3DI32Zero, + Suld3DI64Zero, + Suld3DV2I8Zero, + Suld3DV2I16Zero, + Suld3DV2I32Zero, + Suld3DV2I64Zero, + Suld3DV4I8Zero, + Suld3DV4I16Zero, + Suld3DV4I32Zero }; } @@ -178,7 +436,7 @@ class NVPTXSubtarget; //===--------------------------------------------------------------------===// class NVPTXTargetLowering : public TargetLowering { public: - explicit NVPTXTargetLowering(NVPTXTargetMachine &TM); + explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM); SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; @@ -237,7 +495,7 @@ public: std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; - NVPTXTargetMachine *nvTM; + const NVPTXTargetMachine *nvTM; // PTX always uses 32-bit shift amounts MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } @@ -245,6 +503,10 @@ public: TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; + bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const; + + bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; } + private: const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here @@ -274,4 +536,4 @@ private: }; } // namespace llvm -#endif // NVPTXISELLOWERING_H +#endif diff --git a/lib/Target/NVPTX/NVPTXInstrFormats.td b/lib/Target/NVPTX/NVPTXInstrFormats.td index f11f1b8..ffcb5d5 100644 --- a/lib/Target/NVPTX/NVPTXInstrFormats.td +++ b/lib/Target/NVPTX/NVPTXInstrFormats.td @@ -36,8 +36,24 @@ class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern> bit IsLoad = 0; bit IsStore = 0; - let TSFlags{3-0} = VecInstType; - let TSFlags{4-4} = IsSimpleMove; - let TSFlags{5-5} = IsLoad; - let TSFlags{6-6} = IsStore; + bit IsTex = 0; + bit IsSust = 0; + bit IsSurfTexQuery = 0; + bit IsTexModeUnified = 0; + + // The following field is encoded as log2 of the vector size minus one, + // with 0 meaning the operation is not a surface instruction. For example, + // if IsSuld == 2, then the instruction is a suld instruction with vector size + // 2**(2-1) = 2. + bits<2> IsSuld = 0; + + let TSFlags{3-0} = VecInstType; + let TSFlags{4-4} = IsSimpleMove; + let TSFlags{5-5} = IsLoad; + let TSFlags{6-6} = IsStore; + let TSFlags{7} = IsTex; + let TSFlags{9-8} = IsSuld; + let TSFlags{10} = IsSust; + let TSFlags{11} = IsSurfTexQuery; + let TSFlags{12} = IsTexModeUnified; } diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h index 2ac2974..6de7536 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.h +++ b/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTXINSTRUCTIONINFO_H -#define NVPTXINSTRUCTIONINFO_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXINSTRINFO_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXINSTRINFO_H #include "NVPTX.h" #include "NVPTXRegisterInfo.h" diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index d2c0373..9900b8c 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -139,17 +139,10 @@ def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">; def doF32FTZ : Predicate<"useF32FTZ()">; def doNoF32FTZ : Predicate<"!useF32FTZ()">; -def doFMAF32 : Predicate<"doFMAF32">; -def doFMAF32_ftz : Predicate<"(doFMAF32 && useF32FTZ())">; -def doFMAF32AGG : Predicate<"doFMAF32AGG">; -def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && useF32FTZ())">; -def doFMAF64 : Predicate<"doFMAF64">; -def doFMAF64AGG : Predicate<"doFMAF64AGG">; - def doMulWide : Predicate<"doMulWide">; -def allowFMA : Predicate<"allowFMA">; -def allowFMA_ftz : Predicate<"(allowFMA && useF32FTZ())">; +def allowFMA : Predicate<"allowFMA()">; +def noFMA : Predicate<"!allowFMA()">; def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">; def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">; @@ -222,13 +215,13 @@ multiclass F3<string OpcStr, SDNode OpNode> { !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, - Requires<[allowFMA_ftz]>; + Requires<[allowFMA, doF32FTZ]>; def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, f32imm:$b), !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, - Requires<[allowFMA_ftz]>; + Requires<[allowFMA, doF32FTZ]>; def f32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, Float32Regs:$b), !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), @@ -248,34 +241,38 @@ multiclass F3_rn<string OpcStr, SDNode OpNode> { (ins Float64Regs:$a, Float64Regs:$b), !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), [(set Float64Regs:$dst, - (OpNode Float64Regs:$a, Float64Regs:$b))]>; + (OpNode Float64Regs:$a, Float64Regs:$b))]>, + Requires<[noFMA]>; def f64ri : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a, f64imm:$b), !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), [(set Float64Regs:$dst, - (OpNode Float64Regs:$a, fpimm:$b))]>; + (OpNode Float64Regs:$a, fpimm:$b))]>, + Requires<[noFMA]>; def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, Float32Regs:$b), !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, - Requires<[doF32FTZ]>; + Requires<[noFMA, doF32FTZ]>; def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, f32imm:$b), !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, - Requires<[doF32FTZ]>; + Requires<[noFMA, doF32FTZ]>; def f32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, Float32Regs:$b), !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), [(set Float32Regs:$dst, - (OpNode Float32Regs:$a, Float32Regs:$b))]>; + (OpNode Float32Regs:$a, Float32Regs:$b))]>, + Requires<[noFMA]>; def f32ri : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, f32imm:$b), !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), [(set Float32Regs:$dst, - (OpNode Float32Regs:$a, fpimm:$b))]>; + (OpNode Float32Regs:$a, fpimm:$b))]>, + Requires<[noFMA]>; } multiclass F2<string OpcStr, SDNode OpNode> { @@ -919,8 +916,8 @@ multiclass FPCONTRACT64<string OpcStr, Predicate Pred> { } defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doF32FTZ>; -defm FMA32 : FPCONTRACT32<"fma.rn.f32", doNoF32FTZ>; -defm FMA64 : FPCONTRACT64<"fma.rn.f64", doNoF32FTZ>; +defm FMA32 : FPCONTRACT32<"fma.rn.f32", true>; +defm FMA64 : FPCONTRACT64<"fma.rn.f64", true>; def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), "sin.approx.f32 \t$dst, $src;", @@ -1917,7 +1914,7 @@ def StoreParamV2I8 : StoreParamV2Inst<Int16Regs, ".b8">; def StoreParamV4I32 : NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2, Int32Regs:$val3, Int32Regs:$val4, i32imm:$a, i32imm:$b), - "st.param.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};", + "st.param.v4.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};", []>; def StoreParamV4I16 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 0ad3dfa..14e51aa 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -792,13 +792,18 @@ def INT_NVVM_H2F : F_MATH_1<!strconcat("{{\n\t", "}}")))), Float32Regs, Int16Regs, int_nvvm_h2f>; -def : Pat<(f32 (f16_to_f32 Int16Regs:$a)), +def : Pat<(f32 (f16_to_fp Int16Regs:$a)), (CVT_f32_f16 Int16Regs:$a, CvtNONE)>; -def : Pat<(i16 (f32_to_f16 Float32Regs:$a)), +def : Pat<(i16 (fp_to_f16 Float32Regs:$a)), (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(i16 (f32_to_f16 Float32Regs:$a)), +def : Pat<(i16 (fp_to_f16 Float32Regs:$a)), (CVT_f16_f32 Float32Regs:$a, CvtRN)>; +def : Pat<(f64 (f16_to_fp Int16Regs:$a)), + (CVT_f64_f16 Int16Regs:$a, CvtNONE)>; +def : Pat<(i16 (fp_to_f16 Float64Regs:$a)), + (CVT_f16_f64 Float64Regs:$a, CvtRN)>; + // // Bitcast // @@ -1936,9 +1941,10 @@ def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be // also defined in NVPTXReplaceImageHandles.cpp - +// texmode_independent +let IsTex = 1, IsTexModeUnified = 0 in { // Texture fetch instructions using handles -def TEX_1D_F32_I32 +def TEX_1D_F32_S32 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, Float32Regs:$b, Float32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), @@ -1965,19 +1971,19 @@ def TEX_1D_F32_F32_GRAD "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", []>; -def TEX_1D_I32_I32 +def TEX_1D_S32_S32 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", []>; -def TEX_1D_I32_F32 +def TEX_1D_S32_F32 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", []>; -def TEX_1D_I32_F32_LEVEL +def TEX_1D_S32_F32_LEVEL : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, @@ -1985,7 +1991,7 @@ def TEX_1D_I32_F32_LEVEL "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$x\\}], $lod;", []>; -def TEX_1D_I32_F32_GRAD +def TEX_1D_S32_F32_GRAD : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, @@ -1993,8 +1999,36 @@ def TEX_1D_I32_F32_GRAD "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", []>; +def TEX_1D_U32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), + "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", + []>; +def TEX_1D_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), + "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", + []>; +def TEX_1D_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, + Float32Regs:$lod), + "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x\\}], $lod;", + []>; +def TEX_1D_U32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; -def TEX_1D_ARRAY_F32_I32 +def TEX_1D_ARRAY_F32_S32 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, Float32Regs:$b, Float32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), @@ -2024,21 +2058,21 @@ def TEX_1D_ARRAY_F32_F32_GRAD "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", []>; -def TEX_1D_ARRAY_I32_I32 +def TEX_1D_ARRAY_S32_S32 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$l, $x\\}];", []>; -def TEX_1D_ARRAY_I32_F32 +def TEX_1D_ARRAY_S32_F32 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$l, $x\\}];", []>; -def TEX_1D_ARRAY_I32_F32_LEVEL +def TEX_1D_ARRAY_S32_F32_LEVEL : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, @@ -2046,7 +2080,7 @@ def TEX_1D_ARRAY_I32_F32_LEVEL "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$l, $x\\}], $lod;", []>; -def TEX_1D_ARRAY_I32_F32_GRAD +def TEX_1D_ARRAY_S32_F32_GRAD : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, @@ -2054,8 +2088,38 @@ def TEX_1D_ARRAY_I32_F32_GRAD "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", []>; +def TEX_1D_ARRAY_U32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}];", + []>; +def TEX_1D_ARRAY_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), + "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}];", + []>; +def TEX_1D_ARRAY_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$lod), + "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}], $lod;", + []>; +def TEX_1D_ARRAY_U32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; -def TEX_2D_F32_I32 +def TEX_2D_F32_S32 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, Float32Regs:$b, Float32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), @@ -2087,21 +2151,21 @@ def TEX_2D_F32_F32_GRAD "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " "\\{$grady0, $grady1\\};", []>; -def TEX_2D_I32_I32 +def TEX_2D_S32_S32 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$x, $y\\}];", []>; -def TEX_2D_I32_F32 +def TEX_2D_S32_F32 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$x, $y\\}];", []>; -def TEX_2D_I32_F32_LEVEL +def TEX_2D_S32_F32_LEVEL : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, @@ -2109,7 +2173,7 @@ def TEX_2D_I32_F32_LEVEL "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$x, $y\\}], $lod;", []>; -def TEX_2D_I32_F32_GRAD +def TEX_2D_S32_F32_GRAD : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, @@ -2119,8 +2183,40 @@ def TEX_2D_I32_F32_GRAD "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " "\\{$grady0, $grady1\\};", []>; +def TEX_2D_U32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TEX_2D_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TEX_2D_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$lod), + "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}], $lod;", + []>; +def TEX_2D_U32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; -def TEX_2D_ARRAY_F32_I32 +def TEX_2D_ARRAY_F32_S32 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, Float32Regs:$b, Float32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, @@ -2154,7 +2250,7 @@ def TEX_2D_ARRAY_F32_F32_GRAD "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " "\\{$grady0, $grady1\\};", []>; -def TEX_2D_ARRAY_I32_I32 +def TEX_2D_ARRAY_S32_S32 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, @@ -2162,7 +2258,7 @@ def TEX_2D_ARRAY_I32_I32 "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$l, $x, $y, $y\\}];", []>; -def TEX_2D_ARRAY_I32_F32 +def TEX_2D_ARRAY_S32_F32 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, @@ -2170,7 +2266,7 @@ def TEX_2D_ARRAY_I32_F32 "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$l, $x, $y, $y\\}];", []>; -def TEX_2D_ARRAY_I32_F32_LEVEL +def TEX_2D_ARRAY_S32_F32_LEVEL : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, @@ -2178,7 +2274,7 @@ def TEX_2D_ARRAY_I32_F32_LEVEL "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", []>; -def TEX_2D_ARRAY_I32_F32_GRAD +def TEX_2D_ARRAY_S32_F32_GRAD : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, @@ -2189,8 +2285,43 @@ def TEX_2D_ARRAY_I32_F32_GRAD "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " "\\{$grady0, $grady1\\};", []>; +def TEX_2D_ARRAY_U32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$y), + "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_2D_ARRAY_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y), + "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_2D_ARRAY_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, Float32Regs:$lod), + "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", + []>; +def TEX_2D_ARRAY_U32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; -def TEX_3D_F32_I32 +def TEX_3D_F32_S32 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, Float32Regs:$b, Float32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, @@ -2227,7 +2358,7 @@ def TEX_3D_F32_F32_GRAD "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " "\\{$grady0, $grady1, $grady2, $grady2\\};", []>; -def TEX_3D_I32_I32 +def TEX_3D_S32_S32 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, @@ -2235,7 +2366,7 @@ def TEX_3D_I32_I32 "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$x, $y, $z, $z\\}];", []>; -def TEX_3D_I32_F32 +def TEX_3D_S32_F32 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, @@ -2243,7 +2374,7 @@ def TEX_3D_I32_F32 "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$x, $y, $z, $z\\}];", []>; -def TEX_3D_I32_F32_LEVEL +def TEX_3D_S32_F32_LEVEL : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, @@ -2251,7 +2382,7 @@ def TEX_3D_I32_F32_LEVEL "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", []>; -def TEX_3D_I32_F32_GRAD +def TEX_3D_S32_F32_GRAD : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, @@ -2264,104 +2395,1276 @@ def TEX_3D_I32_F32_GRAD "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " "\\{$grady0, $grady1, $grady2, $grady2\\};", []>; +def TEX_3D_U32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$z), + "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_3D_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z), + "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_3D_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, Float32Regs:$lod), + "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", + []>; +def TEX_3D_U32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$gradx2, Float32Regs:$grady0, + Float32Regs:$grady1, Float32Regs:$grady2), + "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}], " + "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " + "\\{$grady0, $grady1, $grady2, $grady2\\};", + []>; +def TEX_CUBE_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_CUBE_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", + []>; +def TEX_CUBE_S32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_CUBE_S32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", + []>; +def TEX_CUBE_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_CUBE_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", + []>; -// Surface load instructions -def SULD_1D_I8_TRAP +def TEX_CUBE_ARRAY_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $z\\}];", + []>; +def TEX_CUBE_ARRAY_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", + []>; +def TEX_CUBE_ARRAY_S32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $z\\}];", + []>; +def TEX_CUBE_ARRAY_S32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", + []>; +def TEX_CUBE_ARRAY_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $z\\}];", + []>; +def TEX_CUBE_ARRAY_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", + []>; + +def TLD4_R_2D_F32_F32 + : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, + Float32Regs:$v2, Float32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TLD4_G_2D_F32_F32 + : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, + Float32Regs:$v2, Float32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TLD4_B_2D_F32_F32 + : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, + Float32Regs:$v2, Float32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TLD4_A_2D_F32_F32 + : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, + Float32Regs:$v2, Float32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TLD4_R_2D_S32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TLD4_G_2D_S32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TLD4_B_2D_S32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TLD4_A_2D_S32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TLD4_R_2D_U32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TLD4_G_2D_U32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TLD4_B_2D_U32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TLD4_A_2D_U32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +} + + +// texmode_unified +let IsTex = 1, IsTexModeUnified = 1 in { +// Texture fetch instructions using handles +def TEX_UNIFIED_1D_F32_S32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$x), + "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", + []>; +def TEX_UNIFIED_1D_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x), + "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", + []>; +def TEX_UNIFIED_1D_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod), + "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x\\}], $lod;", + []>; +def TEX_UNIFIED_1D_F32_F32_GRAD + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; +def TEX_UNIFIED_1D_S32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$x), + "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", + []>; +def TEX_UNIFIED_1D_S32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x), + "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", + []>; +def TEX_UNIFIED_1D_S32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, + Float32Regs:$lod), + "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x\\}], $lod;", + []>; +def TEX_UNIFIED_1D_S32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; +def TEX_UNIFIED_1D_U32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$x), + "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", + []>; +def TEX_UNIFIED_1D_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x), + "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", + []>; +def TEX_UNIFIED_1D_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, + Float32Regs:$lod), + "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x\\}], $lod;", + []>; +def TEX_UNIFIED_1D_U32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; + +def TEX_UNIFIED_1D_ARRAY_F32_S32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), + "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}];", + []>; +def TEX_UNIFIED_1D_ARRAY_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), + "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}];", + []>; +def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$lod), + "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}], $lod;", + []>; +def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; +def TEX_UNIFIED_1D_ARRAY_S32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), + "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}];", + []>; +def TEX_UNIFIED_1D_ARRAY_S32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), + "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}];", + []>; +def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$lod), + "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}], $lod;", + []>; +def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; +def TEX_UNIFIED_1D_ARRAY_U32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), + "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}];", + []>; +def TEX_UNIFIED_1D_ARRAY_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), + "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}];", + []>; +def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$lod), + "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}], $lod;", + []>; +def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; + +def TEX_UNIFIED_2D_F32_S32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), + "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TEX_UNIFIED_2D_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TEX_UNIFIED_2D_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$lod), + "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}], $lod;", + []>; +def TEX_UNIFIED_2D_F32_F32_GRAD + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; +def TEX_UNIFIED_2D_S32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), + "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TEX_UNIFIED_2D_S32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TEX_UNIFIED_2D_S32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$lod), + "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}], $lod;", + []>; +def TEX_UNIFIED_2D_S32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; +def TEX_UNIFIED_2D_U32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), + "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TEX_UNIFIED_2D_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TEX_UNIFIED_2D_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$lod), + "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}], $lod;", + []>; +def TEX_UNIFIED_2D_U32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; + +def TEX_UNIFIED_2D_ARRAY_F32_S32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$y), + "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_UNIFIED_2D_ARRAY_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y), + "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, Float32Regs:$lod), + "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}], $lod;", + []>; +def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; +def TEX_UNIFIED_2D_ARRAY_S32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$y), + "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_UNIFIED_2D_ARRAY_S32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y), + "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, Float32Regs:$lod), + "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}], $lod;", + []>; +def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; +def TEX_UNIFIED_2D_ARRAY_U32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$y), + "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_UNIFIED_2D_ARRAY_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y), + "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, Float32Regs:$lod), + "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}], $lod;", + []>; +def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; + +def TEX_UNIFIED_3D_F32_S32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$z), + "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_UNIFIED_3D_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z), + "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_UNIFIED_3D_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, Float32Regs:$lod), + "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}], $lod;", + []>; +def TEX_UNIFIED_3D_F32_F32_GRAD + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$gradx2, Float32Regs:$grady0, + Float32Regs:$grady1, Float32Regs:$grady2), + "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}], " + "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " + "\\{$grady0, $grady1, $grady2, $grady2\\};", + []>; +def TEX_UNIFIED_3D_S32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$z), + "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_UNIFIED_3D_S32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z), + "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_UNIFIED_3D_S32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, Float32Regs:$lod), + "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}], $lod;", + []>; +def TEX_UNIFIED_3D_S32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$gradx2, Float32Regs:$grady0, + Float32Regs:$grady1, Float32Regs:$grady2), + "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}], " + "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " + "\\{$grady0, $grady1, $grady2, $grady2\\};", + []>; +def TEX_UNIFIED_3D_U32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$z), + "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_UNIFIED_3D_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z), + "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_UNIFIED_3D_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, Float32Regs:$lod), + "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}], $lod;", + []>; +def TEX_UNIFIED_3D_U32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$gradx2, Float32Regs:$grady0, + Float32Regs:$grady1, Float32Regs:$grady2), + "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}], " + "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " + "\\{$grady0, $grady1, $grady2, $grady2\\};", + []>; + +def TEX_UNIFIED_CUBE_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_UNIFIED_CUBE_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}], $lod;", + []>; +def TEX_UNIFIED_CUBE_S32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_UNIFIED_CUBE_S32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}], $lod;", + []>; +def TEX_UNIFIED_CUBE_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_UNIFIED_CUBE_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}], $lod;", + []>; + +def TEX_UNIFIED_CUBE_ARRAY_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $z\\}];", + []>; +def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $z\\}], $lod;", + []>; +def TEX_UNIFIED_CUBE_ARRAY_S32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $z\\}];", + []>; +def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $z\\}], $lod;", + []>; +def TEX_UNIFIED_CUBE_ARRAY_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $z\\}];", + []>; +def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $z\\}], $lod;", + []>; + +def TLD4_UNIFIED_R_2D_F32_F32 + : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, + Float32Regs:$v2, Float32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_G_2D_F32_F32 + : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, + Float32Regs:$v2, Float32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_B_2D_F32_F32 + : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, + Float32Regs:$v2, Float32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_A_2D_F32_F32 + : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, + Float32Regs:$v2, Float32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_R_2D_S32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_G_2D_S32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_B_2D_S32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_A_2D_S32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_R_2D_U32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_G_2D_U32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_B_2D_U32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_A_2D_U32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +} + + + +//=== Surface load instructions +// .clamp variant +let IsSuld = 1 in { +def SULD_1D_I8_CLAMP : NVPTXInst<(outs Int16Regs:$r), (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];", + "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];", []>; -def SULD_1D_I16_TRAP +def SULD_1D_I16_CLAMP : NVPTXInst<(outs Int16Regs:$r), (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];", + "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];", []>; -def SULD_1D_I32_TRAP +def SULD_1D_I32_CLAMP : NVPTXInst<(outs Int32Regs:$r), (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];", + "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];", []>; -def SULD_1D_V2I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), +def SULD_1D_I64_CLAMP + : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];", + "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];", []>; -def SULD_1D_V2I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];", + +def SULD_1D_ARRAY_I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", []>; -def SULD_1D_V2I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), +def SULD_1D_ARRAY_I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I64_CLAMP + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; + +def SULD_2D_I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_I64_CLAMP + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; + +def SULD_2D_ARRAY_I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_I64_CLAMP + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; + +def SULD_3D_I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_I64_CLAMP + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +} + +let IsSuld = 2 in { +def SULD_1D_V2I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];", + "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", []>; -def SULD_1D_V4I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +def SULD_1D_V2I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", []>; -def SULD_1D_V4I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +def SULD_1D_V2I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", []>; -def SULD_1D_V4I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +def SULD_1D_V2I64_CLAMP + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", []>; -def SULD_1D_ARRAY_I8_TRAP - : NVPTXInst<(outs Int16Regs:$r), +def SULD_1D_ARRAY_V2I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];", + "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", []>; -def SULD_1D_ARRAY_I16_TRAP - : NVPTXInst<(outs Int16Regs:$r), +def SULD_1D_ARRAY_V2I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];", + "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", []>; -def SULD_1D_ARRAY_I32_TRAP - : NVPTXInst<(outs Int32Regs:$r), +def SULD_1D_ARRAY_V2I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];", + "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", []>; -def SULD_1D_ARRAY_V2I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), +def SULD_1D_ARRAY_V2I64_CLAMP + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", []>; -def SULD_1D_ARRAY_V2I16_TRAP + +def SULD_2D_V2I8_CLAMP : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", []>; -def SULD_1D_ARRAY_V2I32_TRAP +def SULD_2D_V2I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I32_CLAMP : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", []>; -def SULD_1D_ARRAY_V4I8_TRAP +def SULD_2D_V2I64_CLAMP + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; + +def SULD_2D_ARRAY_V2I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I64_CLAMP + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; + +def SULD_3D_V2I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I64_CLAMP + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +} + +let IsSuld = 3 in { +def SULD_1D_V4I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V4I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V4I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; + +def SULD_1D_ARRAY_V4I8_CLAMP : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, " + "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " "[$s, \\{$l, $x\\}];", []>; -def SULD_1D_ARRAY_V4I16_TRAP +def SULD_1D_ARRAY_V4I16_CLAMP : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, " + "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " "[$s, \\{$l, $x\\}];", []>; -def SULD_1D_ARRAY_V4I32_TRAP +def SULD_1D_ARRAY_V4I32_CLAMP : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, " + "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " "[$s, \\{$l, $x\\}];", []>; +def SULD_2D_V4I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V4I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V4I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; + +def SULD_2D_ARRAY_V4I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V4I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V4I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; + + +def SULD_3D_V4I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V4I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V4I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; +} + + +// .trap variant +let IsSuld = 1 in { +def SULD_1D_I8_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_I16_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_I32_TRAP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_I64_TRAP + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];", + []>; + +def SULD_1D_ARRAY_I8_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I16_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I32_TRAP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I64_TRAP + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; + def SULD_2D_I8_TRAP : NVPTXInst<(outs Int16Regs:$r), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), @@ -2377,35 +3680,10 @@ def SULD_2D_I32_TRAP (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];", []>; -def SULD_2D_V2I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V2I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V2I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V4I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V4I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V4I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +def SULD_2D_I64_TRAP + : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];", []>; def SULD_2D_ARRAY_I8_TRAP @@ -2423,6 +3701,98 @@ def SULD_2D_ARRAY_I32_TRAP (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", []>; +def SULD_2D_ARRAY_I64_TRAP + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; + +def SULD_3D_I8_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_I16_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_I32_TRAP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_I64_TRAP + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +} + +let IsSuld = 2 in { +def SULD_1D_V2I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I64_TRAP + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; + +def SULD_1D_ARRAY_V2I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I64_TRAP + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; + +def SULD_2D_V2I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I64_TRAP + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; + def SULD_2D_ARRAY_V2I8_TRAP : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), @@ -2441,6 +3811,87 @@ def SULD_2D_ARRAY_V2I32_TRAP "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, " "[$s, \\{$l, $x, $y, $y\\}];", []>; +def SULD_2D_ARRAY_V2I64_TRAP + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; + +def SULD_3D_V2I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I64_TRAP + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +} + +let IsSuld = 3 in { +def SULD_1D_V4I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V4I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V4I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; + +def SULD_1D_ARRAY_V4I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V4I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V4I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; + +def SULD_2D_V4I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V4I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V4I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; + def SULD_2D_ARRAY_V4I8_TRAP : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), @@ -2460,59 +3911,343 @@ def SULD_2D_ARRAY_V4I32_TRAP "[$s, \\{$l, $x, $y, $y\\}];", []>; -def SULD_3D_I8_TRAP + +def SULD_3D_V4I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V4I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V4I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; +} + +// .zero variant +let IsSuld = 1 in { +def SULD_1D_I8_ZERO + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_I16_ZERO + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_I32_ZERO + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_I64_ZERO + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];", + []>; + +def SULD_1D_ARRAY_I8_ZERO + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I16_ZERO + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I32_ZERO + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I64_ZERO + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; + +def SULD_2D_I8_ZERO + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_I16_ZERO + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_I32_ZERO + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_I64_ZERO + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; + +def SULD_2D_ARRAY_I8_ZERO + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_I16_ZERO + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_I32_ZERO + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_I64_ZERO + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; + +def SULD_3D_I8_ZERO : NVPTXInst<(outs Int16Regs:$r), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", []>; -def SULD_3D_I16_TRAP +def SULD_3D_I16_ZERO : NVPTXInst<(outs Int16Regs:$r), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", []>; -def SULD_3D_I32_TRAP +def SULD_3D_I32_ZERO : NVPTXInst<(outs Int32Regs:$r), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", []>; -def SULD_3D_V2I8_TRAP +def SULD_3D_I64_ZERO + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +} + +let IsSuld = 2 in { +def SULD_1D_V2I8_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I16_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I32_ZERO + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I64_ZERO + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; + +def SULD_1D_ARRAY_V2I8_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I16_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I32_ZERO + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I64_ZERO + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; + +def SULD_2D_V2I8_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I16_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I32_ZERO + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I64_ZERO + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; + +def SULD_2D_ARRAY_V2I8_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I16_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I32_ZERO + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I64_ZERO + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; + +def SULD_3D_V2I8_ZERO : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", []>; -def SULD_3D_V2I16_TRAP +def SULD_3D_V2I16_ZERO : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", []>; -def SULD_3D_V2I32_TRAP +def SULD_3D_V2I32_ZERO : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", []>; -def SULD_3D_V4I8_TRAP +def SULD_3D_V2I64_ZERO + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +} + +let IsSuld = 3 in { +def SULD_1D_V4I8_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V4I16_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V4I32_ZERO + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; + +def SULD_1D_ARRAY_V4I8_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V4I16_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V4I32_ZERO + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; + +def SULD_2D_V4I8_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V4I16_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V4I32_ZERO + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; + +def SULD_2D_ARRAY_V4I8_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V4I16_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V4I32_ZERO + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; + + +def SULD_3D_V4I8_ZERO : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, " + "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, " "[$s, \\{$x, $y, $z, $z\\}];", []>; -def SULD_3D_V4I16_TRAP +def SULD_3D_V4I16_ZERO : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, " + "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, " "[$s, \\{$x, $y, $z, $z\\}];", []>; -def SULD_3D_V4I32_TRAP +def SULD_3D_V4I32_ZERO : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, " + "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, " "[$s, \\{$x, $y, $z, $z\\}];", []>; - +} //----------------------------------- // Texture Query Intrinsics //----------------------------------- + +let IsSurfTexQuery = 1 in { def TXQ_CHANNEL_ORDER : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "txq.channel_order.b32 \t$d, [$a];", @@ -2545,6 +4280,7 @@ def TXQ_NUM_MIPMAP_LEVELS : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "txq.num_mipmap_levels.b32 \t$d, [$a];", []>; +} def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), (TXQ_CHANNEL_ORDER Int64Regs:$a)>; @@ -2567,6 +4303,8 @@ def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), //----------------------------------- // Surface Query Intrinsics //----------------------------------- + +let IsSurfTexQuery = 1 in { def SUQ_CHANNEL_ORDER : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "suq.channel_order.b32 \t$d, [$a];", @@ -2591,6 +4329,7 @@ def SUQ_ARRAY_SIZE : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "suq.array_size.b32 \t$d, [$a];", []>; +} def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), (SUQ_CHANNEL_ORDER Int64Regs:$a)>; @@ -2624,8 +4363,354 @@ def ISTYPEP_TEXTURE //===- Surface Stores -----------------------------------------------------===// +let IsSust = 1 in { // Unformatted +// .clamp variant +def SUST_B_1D_B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_B_1D_B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_B_1D_B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), + "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_B_1D_B64_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), + "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_B_1D_V2B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V2B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V2B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V2B64_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), + "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V4B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, + Int16Regs:$b, Int16Regs:$a), + "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_V4B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, + Int16Regs:$b, Int16Regs:$a), + "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_V4B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; + + +def SUST_B_1D_ARRAY_B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), + "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_B_1D_ARRAY_B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), + "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_B_1D_ARRAY_B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), + "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_B_1D_ARRAY_B64_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), + "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_B_1D_ARRAY_V2B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g), + "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_ARRAY_V2B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g), + "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_ARRAY_V2B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, + Int32Regs:$g), + "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_ARRAY_V2B64_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, + Int64Regs:$g), + "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_ARRAY_V4B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_ARRAY_V4B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_ARRAY_V4B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, + Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_B_2D_B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_B64_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), + "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_V2B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g), + "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V2B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g), + "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V2B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g), + "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V2B64_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, + Int64Regs:$g), + "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V4B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_V4B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_V4B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + +def SUST_B_2D_ARRAY_B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r), + "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r), + "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r), + "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_B64_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r), + "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_V2B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_2D_ARRAY_V2B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_2D_ARRAY_V2B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g), + "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_2D_ARRAY_V2B64_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r, Int64Regs:$g), + "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_2D_ARRAY_V4B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_ARRAY_V4B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_ARRAY_V4B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_B_3D_B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_B_3D_B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_B_3D_B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r), + "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_B_3D_B64_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r), + "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_B_3D_V2B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_3D_V2B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_3D_V2B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g), + "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_3D_V2B64_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r, Int64Regs:$g), + "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_3D_V4B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_3D_V4B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_3D_V4B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +// .trap variant def SUST_B_1D_B8_TRAP : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), @@ -2641,6 +4726,11 @@ def SUST_B_1D_B32_TRAP (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", []>; +def SUST_B_1D_B64_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), + "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};", + []>; def SUST_B_1D_V2B8_TRAP : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), @@ -2656,6 +4746,11 @@ def SUST_B_1D_V2B32_TRAP (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", []>; +def SUST_B_1D_V2B64_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), + "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; def SUST_B_1D_V4B8_TRAP : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, @@ -2691,6 +4786,11 @@ def SUST_B_1D_ARRAY_B32_TRAP (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", []>; +def SUST_B_1D_ARRAY_B64_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), + "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; def SUST_B_1D_ARRAY_V2B8_TRAP : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, @@ -2709,6 +4809,12 @@ def SUST_B_1D_ARRAY_V2B32_TRAP Int32Regs:$g), "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", []>; +def SUST_B_1D_ARRAY_V2B64_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, + Int64Regs:$g), + "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; def SUST_B_1D_ARRAY_V4B8_TRAP : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, @@ -2747,6 +4853,11 @@ def SUST_B_2D_B32_TRAP (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", []>; +def SUST_B_2D_B64_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), + "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; def SUST_B_2D_V2B8_TRAP : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, @@ -2765,6 +4876,12 @@ def SUST_B_2D_V2B32_TRAP Int32Regs:$g), "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", []>; +def SUST_B_2D_V2B64_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, + Int64Regs:$g), + "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; def SUST_B_2D_V4B8_TRAP : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, @@ -2806,6 +4923,12 @@ def SUST_B_2D_ARRAY_B32_TRAP Int32Regs:$r), "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", []>; +def SUST_B_2D_ARRAY_B64_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r), + "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; def SUST_B_2D_ARRAY_V2B8_TRAP : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, @@ -2827,6 +4950,13 @@ def SUST_B_2D_ARRAY_V2B32_TRAP "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " "\\{$r, $g\\};", []>; +def SUST_B_2D_ARRAY_V2B64_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r, Int64Regs:$g), + "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; def SUST_B_2D_ARRAY_V4B8_TRAP : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, @@ -2868,6 +4998,12 @@ def SUST_B_3D_B32_TRAP Int32Regs:$r), "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", []>; +def SUST_B_3D_B64_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r), + "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; def SUST_B_3D_V2B8_TRAP : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, @@ -2889,6 +5025,13 @@ def SUST_B_3D_V2B32_TRAP "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " "\\{$r, $g\\};", []>; +def SUST_B_3D_V2B64_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r, Int64Regs:$g), + "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; def SUST_B_3D_V4B8_TRAP : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, @@ -2911,6 +5054,353 @@ def SUST_B_3D_V4B32_TRAP "\\{$r, $g, $b, $a\\};", []>; + +// .zero variant +def SUST_B_1D_B8_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_B_1D_B16_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_B_1D_B32_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), + "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_B_1D_B64_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), + "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_B_1D_V2B8_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V2B16_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V2B32_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V2B64_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), + "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V4B8_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, + Int16Regs:$b, Int16Regs:$a), + "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_V4B16_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, + Int16Regs:$b, Int16Regs:$a), + "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_V4B32_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; + + +def SUST_B_1D_ARRAY_B8_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), + "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_B_1D_ARRAY_B16_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), + "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_B_1D_ARRAY_B32_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), + "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_B_1D_ARRAY_B64_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), + "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_B_1D_ARRAY_V2B8_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g), + "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_ARRAY_V2B16_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g), + "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_ARRAY_V2B32_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, + Int32Regs:$g), + "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_ARRAY_V2B64_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, + Int64Regs:$g), + "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_ARRAY_V4B8_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_ARRAY_V4B16_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_ARRAY_V4B32_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, + Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_B_2D_B8_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_B16_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_B32_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_B64_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), + "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_V2B8_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g), + "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V2B16_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g), + "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V2B32_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g), + "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V2B64_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, + Int64Regs:$g), + "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V4B8_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_V4B16_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_V4B32_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_B_2D_ARRAY_B8_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r), + "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_B16_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r), + "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_B32_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r), + "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_B64_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r), + "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_V2B8_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_2D_ARRAY_V2B16_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_2D_ARRAY_V2B32_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g), + "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_2D_ARRAY_V2B64_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r, Int64Regs:$g), + "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_2D_ARRAY_V4B8_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_ARRAY_V4B16_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_ARRAY_V4B32_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_B_3D_B8_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_B_3D_B16_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_B_3D_B32_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r), + "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_B_3D_B64_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r), + "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_B_3D_V2B8_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_3D_V2B16_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_3D_V2B32_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g), + "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_3D_V2B64_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r, Int64Regs:$g), + "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_3D_V4B8_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_3D_V4B16_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_3D_V4B32_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + + // Formatted def SUST_P_1D_B8_TRAP @@ -3197,12 +5687,341 @@ def SUST_P_3D_V4B32_TRAP "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " "\\{$r, $g, $b, $a\\};", []>; - +} // Surface store instruction patterns // I'm not sure why we can't just include these in the instruction definitions, // but TableGen complains of type errors :( +// .clamp variant +def : Pat<(int_nvvm_sust_b_1d_i8_clamp + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_i16_clamp + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_i32_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), + (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_i64_clamp + Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), + (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp + Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), + (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, + Int64Regs:$r, Int64Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp + Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp + Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp + Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), + (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), + (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int64Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), + (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int64Regs:$r, Int64Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_2d_i8_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_i16_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_i32_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_i64_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), + (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), + (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), + (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r, Int64Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), + (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g), + (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, + Int64Regs:$g), + (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_3d_i8_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + (SUST_B_3D_B8_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_i16_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + (SUST_B_3D_B16_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_i32_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r), + (SUST_B_3D_B32_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_i64_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r), + (SUST_B_3D_B64_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_3D_V2B8_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_3D_V2B16_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g), + (SUST_B_3D_V2B32_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r, Int64Regs:$g), + (SUST_B_3D_V2B64_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r, Int64Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_3D_V4B8_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_3D_V4B16_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_3D_V4B32_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + +// .trap variant def : Pat<(int_nvvm_sust_b_1d_i8_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; @@ -3215,6 +6034,10 @@ def : Pat<(int_nvvm_sust_b_1d_i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; +def : Pat<(int_nvvm_sust_b_1d_i64_trap + Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), + (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; + def : Pat<(int_nvvm_sust_b_1d_v2i8_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, @@ -3230,6 +6053,11 @@ def : Pat<(int_nvvm_sust_b_1d_v2i32_trap (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g)>; +def : Pat<(int_nvvm_sust_b_1d_v2i64_trap + Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), + (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, + Int64Regs:$r, Int64Regs:$g)>; + def : Pat<(int_nvvm_sust_b_1d_v4i8_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), @@ -3265,6 +6093,11 @@ def : Pat<(int_nvvm_sust_b_1d_array_i32_trap (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r)>; +def : Pat<(int_nvvm_sust_b_1d_array_i64_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), + (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int64Regs:$r)>; + def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, @@ -3280,6 +6113,11 @@ def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g)>; +def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), + (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int64Regs:$r, Int64Regs:$g)>; + def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), @@ -3315,6 +6153,11 @@ def : Pat<(int_nvvm_sust_b_2d_i32_trap (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r)>; +def : Pat<(int_nvvm_sust_b_2d_i64_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), + (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r)>; + def : Pat<(int_nvvm_sust_b_2d_v2i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, @@ -3330,6 +6173,11 @@ def : Pat<(int_nvvm_sust_b_2d_v2i32_trap (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; +def : Pat<(int_nvvm_sust_b_2d_v2i64_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), + (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r, Int64Regs:$g)>; + def : Pat<(int_nvvm_sust_b_2d_v4i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), @@ -3368,6 +6216,12 @@ def : Pat<(int_nvvm_sust_b_2d_array_i32_trap Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r)>; +def : Pat<(int_nvvm_sust_b_2d_array_i64_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), + (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r)>; + def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), @@ -3388,6 +6242,12 @@ def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; +def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, + Int64Regs:$g), + (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; + def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), @@ -3432,6 +6292,13 @@ def : Pat<(int_nvvm_sust_b_3d_i32_trap Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r)>; +def : Pat<(int_nvvm_sust_b_3d_i64_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r), + (SUST_B_3D_B64_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r)>; + def : Pat<(int_nvvm_sust_b_3d_v2i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g), @@ -3453,6 +6320,13 @@ def : Pat<(int_nvvm_sust_b_3d_v2i32_trap Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g)>; +def : Pat<(int_nvvm_sust_b_3d_v2i64_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r, Int64Regs:$g), + (SUST_B_3D_V2B64_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r, Int64Regs:$g)>; + def : Pat<(int_nvvm_sust_b_3d_v4i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), @@ -3475,6 +6349,334 @@ def : Pat<(int_nvvm_sust_b_3d_v4i32_trap Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; +// .zero variant +def : Pat<(int_nvvm_sust_b_1d_i8_zero + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_i16_zero + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_i32_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), + (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_i64_zero + Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), + (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i8_zero + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i16_zero + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i32_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i64_zero + Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), + (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, + Int64Regs:$r, Int64Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i8_zero + Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i16_zero + Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i32_zero + Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_1d_array_i8_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_i16_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_i32_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), + (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_i64_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), + (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int64Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), + (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int64Regs:$r, Int64Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_2d_i8_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_i16_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_i32_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_i64_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), + (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i8_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i16_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i32_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), + (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i64_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), + (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r, Int64Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i8_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i16_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i32_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_2d_array_i8_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_i16_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_i32_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_i64_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), + (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g), + (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, + Int64Regs:$g), + (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_3d_i8_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + (SUST_B_3D_B8_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_i16_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + (SUST_B_3D_B16_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_i32_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r), + (SUST_B_3D_B32_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_i64_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r), + (SUST_B_3D_B64_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i8_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_3D_V2B8_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i16_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_3D_V2B16_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i32_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g), + (SUST_B_3D_V2B32_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i64_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r, Int64Regs:$g), + (SUST_B_3D_V2B64_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r, Int64Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i8_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_3D_V4B8_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i16_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_3D_V4B16_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i32_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_3D_V4B32_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + def : Pat<(int_nvvm_sust_p_1d_i8_trap diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h index 5ec1fc9..8759406 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTX_LOWER_AGGR_COPIES_H -#define NVPTX_LOWER_AGGR_COPIES_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/IR/DataLayout.h" diff --git a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp new file mode 100644 index 0000000..3149399 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp @@ -0,0 +1,134 @@ +//===-- NVPTXLowerStructArgs.cpp - Copy struct args to local memory =====--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Copy struct args to local memory. This is needed for kernel functions only. +// This is a preparation for handling cases like +// +// kernel void foo(struct A arg, ...) +// { +// struct A *p = &arg; +// ... +// ... = p->filed1 ... (this is no generic address for .param) +// p->filed2 = ... (this is no write access to .param) +// } +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "NVPTXUtilities.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" + +using namespace llvm; + +namespace llvm { +void initializeNVPTXLowerStructArgsPass(PassRegistry &); +} + +class LLVM_LIBRARY_VISIBILITY NVPTXLowerStructArgs : public FunctionPass { + bool runOnFunction(Function &F) override; + + void handleStructPtrArgs(Function &); + void handleParam(Argument *); + +public: + static char ID; // Pass identification, replacement for typeid + NVPTXLowerStructArgs() : FunctionPass(ID) {} + const char *getPassName() const override { + return "Copy structure (byval *) arguments to stack"; + } +}; + +char NVPTXLowerStructArgs::ID = 1; + +INITIALIZE_PASS(NVPTXLowerStructArgs, "nvptx-lower-struct-args", + "Lower structure arguments (NVPTX)", false, false) + +void NVPTXLowerStructArgs::handleParam(Argument *Arg) { + Function *Func = Arg->getParent(); + Instruction *FirstInst = &(Func->getEntryBlock().front()); + PointerType *PType = dyn_cast<PointerType>(Arg->getType()); + + assert(PType && "Expecting pointer type in handleParam"); + + Type *StructType = PType->getElementType(); + AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst); + + /* Set the alignment to alignment of the byval parameter. This is because, + * later load/stores assume that alignment, and we are going to replace + * the use of the byval parameter with this alloca instruction. + */ + AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1)); + + Arg->replaceAllUsesWith(AllocA); + + // Get the cvt.gen.to.param intrinsic + Type *CvtTypes[] = { + Type::getInt8PtrTy(Func->getParent()->getContext(), ADDRESS_SPACE_PARAM), + Type::getInt8PtrTy(Func->getParent()->getContext(), + ADDRESS_SPACE_GENERIC)}; + Function *CvtFunc = Intrinsic::getDeclaration( + Func->getParent(), Intrinsic::nvvm_ptr_gen_to_param, CvtTypes); + + Value *BitcastArgs[] = { + new BitCastInst(Arg, Type::getInt8PtrTy(Func->getParent()->getContext(), + ADDRESS_SPACE_GENERIC), + Arg->getName(), FirstInst)}; + CallInst *CallCVT = + CallInst::Create(CvtFunc, BitcastArgs, "cvt_to_param", FirstInst); + + BitCastInst *BitCast = new BitCastInst( + CallCVT, PointerType::get(StructType, ADDRESS_SPACE_PARAM), + Arg->getName(), FirstInst); + LoadInst *LI = new LoadInst(BitCast, Arg->getName(), FirstInst); + new StoreInst(LI, AllocA, FirstInst); +} + +// ============================================================================= +// If the function had a struct ptr arg, say foo(%struct.x *byval %d), then +// add the following instructions to the first basic block : +// +// %temp = alloca %struct.x, align 8 +// %tt1 = bitcast %struct.x * %d to i8 * +// %tt2 = llvm.nvvm.cvt.gen.to.param %tt2 +// %tempd = bitcast i8 addrspace(101) * to %struct.x addrspace(101) * +// %tv = load %struct.x addrspace(101) * %tempd +// store %struct.x %tv, %struct.x * %temp, align 8 +// +// The above code allocates some space in the stack and copies the incoming +// struct from param space to local space. +// Then replace all occurences of %d by %temp. +// ============================================================================= +void NVPTXLowerStructArgs::handleStructPtrArgs(Function &F) { + for (Argument &Arg : F.args()) { + if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) { + handleParam(&Arg); + } + } +} + +// ============================================================================= +// Main function for this pass. +// ============================================================================= +bool NVPTXLowerStructArgs::runOnFunction(Function &F) { + // Skip non-kernels. See the comments at the top of this file. + if (!isKernelFunction(F)) + return false; + + handleStructPtrArgs(F); + return true; +} + +FunctionPass *llvm::createNVPTXLowerStructArgsPass() { + return new NVPTXLowerStructArgs(); +} diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h index 5547649..d39a394 100644 --- a/lib/Target/NVPTX/NVPTXMCExpr.h +++ b/lib/Target/NVPTX/NVPTXMCExpr.h @@ -9,8 +9,8 @@ // Modeled after ARMMCExpr -#ifndef NVPTXMCEXPR_H -#define NVPTXMCEXPR_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXMCEXPR_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXMCEXPR_H #include "llvm/ADT/APFloat.h" #include "llvm/MC/MCExpr.h" @@ -63,7 +63,8 @@ public: void PrintImpl(raw_ostream &OS) const override; bool EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const override { + const MCAsmLayout *Layout, + const MCFixup *Fixup) const override { return false; } void visitUsedExpr(MCStreamer &Streamer) const override {}; diff --git a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h index 67fb390..10f1135 100644 --- a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h +++ b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h @@ -12,6 +12,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXMACHINEFUNCTIONINFO_H + #include "llvm/CodeGen/MachineFunction.h" namespace llvm { @@ -44,3 +47,5 @@ public: } }; } + +#endif diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp index 348ab0c..a1e1b9e 100644 --- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp +++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -48,8 +49,8 @@ char NVPTXPrologEpilogPass::ID = 0; bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) { const TargetMachine &TM = MF.getTarget(); - const TargetFrameLowering &TFI = *TM.getFrameLowering(); - const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering(); + const TargetRegisterInfo &TRI = *TM.getSubtargetImpl()->getRegisterInfo(); bool Modified = false; calculateFrameObjectOffsets(MF); @@ -108,8 +109,8 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, void NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) { - const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); - const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); + const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index 62f288b..358ccce 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -53,9 +53,9 @@ std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) { return "%f"; } if (RC == &NVPTX::Float64RegsRegClass) { - return "%fl"; + return "%fd"; } else if (RC == &NVPTX::Int64RegsRegClass) { - return "%rl"; + return "%rd"; } else if (RC == &NVPTX::Int32RegsRegClass) { return "%r"; } else if (RC == &NVPTX::Int16RegsRegClass) { diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h index a7594be..d2e6733 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.h +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTXREGISTERINFO_H -#define NVPTXREGISTERINFO_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXREGISTERINFO_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXREGISTERINFO_H #include "ManagedStringPool.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td index 3482248..efcee6b 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.td +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td @@ -35,9 +35,9 @@ foreach i = 0-4 in { def P#i : NVPTXReg<"%p"#i>; // Predicate def RS#i : NVPTXReg<"%rs"#i>; // 16-bit def R#i : NVPTXReg<"%r"#i>; // 32-bit - def RL#i : NVPTXReg<"%rl"#i>; // 64-bit + def RL#i : NVPTXReg<"%rd"#i>; // 64-bit def F#i : NVPTXReg<"%f"#i>; // 32-bit float - def FL#i : NVPTXReg<"%fl"#i>; // 64-bit float + def FL#i : NVPTXReg<"%fd"#i>; // 64-bit float // Arguments def ia#i : NVPTXReg<"%ia"#i>; diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp index afd53a6..324420d 100644 --- a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp +++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -15,6 +15,7 @@ #include "NVPTX.h" #include "NVPTXMachineFunctionInfo.h" +#include "NVPTXSubtarget.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -33,9 +34,15 @@ public: NVPTXReplaceImageHandles(); bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "NVPTX Replace Image Handles"; + } private: bool processInstr(MachineInstr &MI); void replaceImageHandle(MachineOperand &Op, MachineFunction &MF); + bool findIndexForHandle(MachineOperand &Op, MachineFunction &MF, + unsigned &Idx); }; } @@ -65,242 +72,43 @@ bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) { E = InstrsToRemove.end(); I != E; ++I) { (*I)->eraseFromParent(); } - return Changed; } bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) { MachineFunction &MF = *MI.getParent()->getParent(); - // Check if we have a surface/texture instruction - switch (MI.getOpcode()) { - default: return false; - case NVPTX::TEX_1D_F32_I32: - case NVPTX::TEX_1D_F32_F32: - case NVPTX::TEX_1D_F32_F32_LEVEL: - case NVPTX::TEX_1D_F32_F32_GRAD: - case NVPTX::TEX_1D_I32_I32: - case NVPTX::TEX_1D_I32_F32: - case NVPTX::TEX_1D_I32_F32_LEVEL: - case NVPTX::TEX_1D_I32_F32_GRAD: - case NVPTX::TEX_1D_ARRAY_F32_I32: - case NVPTX::TEX_1D_ARRAY_F32_F32: - case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL: - case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD: - case NVPTX::TEX_1D_ARRAY_I32_I32: - case NVPTX::TEX_1D_ARRAY_I32_F32: - case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL: - case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD: - case NVPTX::TEX_2D_F32_I32: - case NVPTX::TEX_2D_F32_F32: - case NVPTX::TEX_2D_F32_F32_LEVEL: - case NVPTX::TEX_2D_F32_F32_GRAD: - case NVPTX::TEX_2D_I32_I32: - case NVPTX::TEX_2D_I32_F32: - case NVPTX::TEX_2D_I32_F32_LEVEL: - case NVPTX::TEX_2D_I32_F32_GRAD: - case NVPTX::TEX_2D_ARRAY_F32_I32: - case NVPTX::TEX_2D_ARRAY_F32_F32: - case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL: - case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD: - case NVPTX::TEX_2D_ARRAY_I32_I32: - case NVPTX::TEX_2D_ARRAY_I32_F32: - case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL: - case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD: - case NVPTX::TEX_3D_F32_I32: - case NVPTX::TEX_3D_F32_F32: - case NVPTX::TEX_3D_F32_F32_LEVEL: - case NVPTX::TEX_3D_F32_F32_GRAD: - case NVPTX::TEX_3D_I32_I32: - case NVPTX::TEX_3D_I32_F32: - case NVPTX::TEX_3D_I32_F32_LEVEL: - case NVPTX::TEX_3D_I32_F32_GRAD: { + const MCInstrDesc &MCID = MI.getDesc(); + + if (MCID.TSFlags & NVPTXII::IsTexFlag) { // This is a texture fetch, so operand 4 is a texref and operand 5 is // a samplerref MachineOperand &TexHandle = MI.getOperand(4); - MachineOperand &SampHandle = MI.getOperand(5); - replaceImageHandle(TexHandle, MF); - replaceImageHandle(SampHandle, MF); - - return true; - } - case NVPTX::SULD_1D_I8_TRAP: - case NVPTX::SULD_1D_I16_TRAP: - case NVPTX::SULD_1D_I32_TRAP: - case NVPTX::SULD_1D_ARRAY_I8_TRAP: - case NVPTX::SULD_1D_ARRAY_I16_TRAP: - case NVPTX::SULD_1D_ARRAY_I32_TRAP: - case NVPTX::SULD_2D_I8_TRAP: - case NVPTX::SULD_2D_I16_TRAP: - case NVPTX::SULD_2D_I32_TRAP: - case NVPTX::SULD_2D_ARRAY_I8_TRAP: - case NVPTX::SULD_2D_ARRAY_I16_TRAP: - case NVPTX::SULD_2D_ARRAY_I32_TRAP: - case NVPTX::SULD_3D_I8_TRAP: - case NVPTX::SULD_3D_I16_TRAP: - case NVPTX::SULD_3D_I32_TRAP: { - // This is a V1 surface load, so operand 1 is a surfref - MachineOperand &SurfHandle = MI.getOperand(1); - replaceImageHandle(SurfHandle, MF); + if (!(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) { + MachineOperand &SampHandle = MI.getOperand(5); + replaceImageHandle(SampHandle, MF); + } return true; - } - case NVPTX::SULD_1D_V2I8_TRAP: - case NVPTX::SULD_1D_V2I16_TRAP: - case NVPTX::SULD_1D_V2I32_TRAP: - case NVPTX::SULD_1D_ARRAY_V2I8_TRAP: - case NVPTX::SULD_1D_ARRAY_V2I16_TRAP: - case NVPTX::SULD_1D_ARRAY_V2I32_TRAP: - case NVPTX::SULD_2D_V2I8_TRAP: - case NVPTX::SULD_2D_V2I16_TRAP: - case NVPTX::SULD_2D_V2I32_TRAP: - case NVPTX::SULD_2D_ARRAY_V2I8_TRAP: - case NVPTX::SULD_2D_ARRAY_V2I16_TRAP: - case NVPTX::SULD_2D_ARRAY_V2I32_TRAP: - case NVPTX::SULD_3D_V2I8_TRAP: - case NVPTX::SULD_3D_V2I16_TRAP: - case NVPTX::SULD_3D_V2I32_TRAP: { - // This is a V2 surface load, so operand 2 is a surfref - MachineOperand &SurfHandle = MI.getOperand(2); - - replaceImageHandle(SurfHandle, MF); + } else if (MCID.TSFlags & NVPTXII::IsSuldMask) { + unsigned VecSize = + 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1); - return true; - } - case NVPTX::SULD_1D_V4I8_TRAP: - case NVPTX::SULD_1D_V4I16_TRAP: - case NVPTX::SULD_1D_V4I32_TRAP: - case NVPTX::SULD_1D_ARRAY_V4I8_TRAP: - case NVPTX::SULD_1D_ARRAY_V4I16_TRAP: - case NVPTX::SULD_1D_ARRAY_V4I32_TRAP: - case NVPTX::SULD_2D_V4I8_TRAP: - case NVPTX::SULD_2D_V4I16_TRAP: - case NVPTX::SULD_2D_V4I32_TRAP: - case NVPTX::SULD_2D_ARRAY_V4I8_TRAP: - case NVPTX::SULD_2D_ARRAY_V4I16_TRAP: - case NVPTX::SULD_2D_ARRAY_V4I32_TRAP: - case NVPTX::SULD_3D_V4I8_TRAP: - case NVPTX::SULD_3D_V4I16_TRAP: - case NVPTX::SULD_3D_V4I32_TRAP: { - // This is a V4 surface load, so operand 4 is a surfref - MachineOperand &SurfHandle = MI.getOperand(4); + // For a surface load of vector size N, the Nth operand will be the surfref + MachineOperand &SurfHandle = MI.getOperand(VecSize); replaceImageHandle(SurfHandle, MF); return true; - } - case NVPTX::SUST_B_1D_B8_TRAP: - case NVPTX::SUST_B_1D_B16_TRAP: - case NVPTX::SUST_B_1D_B32_TRAP: - case NVPTX::SUST_B_1D_V2B8_TRAP: - case NVPTX::SUST_B_1D_V2B16_TRAP: - case NVPTX::SUST_B_1D_V2B32_TRAP: - case NVPTX::SUST_B_1D_V4B8_TRAP: - case NVPTX::SUST_B_1D_V4B16_TRAP: - case NVPTX::SUST_B_1D_V4B32_TRAP: - case NVPTX::SUST_B_1D_ARRAY_B8_TRAP: - case NVPTX::SUST_B_1D_ARRAY_B16_TRAP: - case NVPTX::SUST_B_1D_ARRAY_B32_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP: - case NVPTX::SUST_B_2D_B8_TRAP: - case NVPTX::SUST_B_2D_B16_TRAP: - case NVPTX::SUST_B_2D_B32_TRAP: - case NVPTX::SUST_B_2D_V2B8_TRAP: - case NVPTX::SUST_B_2D_V2B16_TRAP: - case NVPTX::SUST_B_2D_V2B32_TRAP: - case NVPTX::SUST_B_2D_V4B8_TRAP: - case NVPTX::SUST_B_2D_V4B16_TRAP: - case NVPTX::SUST_B_2D_V4B32_TRAP: - case NVPTX::SUST_B_2D_ARRAY_B8_TRAP: - case NVPTX::SUST_B_2D_ARRAY_B16_TRAP: - case NVPTX::SUST_B_2D_ARRAY_B32_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP: - case NVPTX::SUST_B_3D_B8_TRAP: - case NVPTX::SUST_B_3D_B16_TRAP: - case NVPTX::SUST_B_3D_B32_TRAP: - case NVPTX::SUST_B_3D_V2B8_TRAP: - case NVPTX::SUST_B_3D_V2B16_TRAP: - case NVPTX::SUST_B_3D_V2B32_TRAP: - case NVPTX::SUST_B_3D_V4B8_TRAP: - case NVPTX::SUST_B_3D_V4B16_TRAP: - case NVPTX::SUST_B_3D_V4B32_TRAP: - case NVPTX::SUST_P_1D_B8_TRAP: - case NVPTX::SUST_P_1D_B16_TRAP: - case NVPTX::SUST_P_1D_B32_TRAP: - case NVPTX::SUST_P_1D_V2B8_TRAP: - case NVPTX::SUST_P_1D_V2B16_TRAP: - case NVPTX::SUST_P_1D_V2B32_TRAP: - case NVPTX::SUST_P_1D_V4B8_TRAP: - case NVPTX::SUST_P_1D_V4B16_TRAP: - case NVPTX::SUST_P_1D_V4B32_TRAP: - case NVPTX::SUST_P_1D_ARRAY_B8_TRAP: - case NVPTX::SUST_P_1D_ARRAY_B16_TRAP: - case NVPTX::SUST_P_1D_ARRAY_B32_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP: - case NVPTX::SUST_P_2D_B8_TRAP: - case NVPTX::SUST_P_2D_B16_TRAP: - case NVPTX::SUST_P_2D_B32_TRAP: - case NVPTX::SUST_P_2D_V2B8_TRAP: - case NVPTX::SUST_P_2D_V2B16_TRAP: - case NVPTX::SUST_P_2D_V2B32_TRAP: - case NVPTX::SUST_P_2D_V4B8_TRAP: - case NVPTX::SUST_P_2D_V4B16_TRAP: - case NVPTX::SUST_P_2D_V4B32_TRAP: - case NVPTX::SUST_P_2D_ARRAY_B8_TRAP: - case NVPTX::SUST_P_2D_ARRAY_B16_TRAP: - case NVPTX::SUST_P_2D_ARRAY_B32_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP: - case NVPTX::SUST_P_3D_B8_TRAP: - case NVPTX::SUST_P_3D_B16_TRAP: - case NVPTX::SUST_P_3D_B32_TRAP: - case NVPTX::SUST_P_3D_V2B8_TRAP: - case NVPTX::SUST_P_3D_V2B16_TRAP: - case NVPTX::SUST_P_3D_V2B32_TRAP: - case NVPTX::SUST_P_3D_V4B8_TRAP: - case NVPTX::SUST_P_3D_V4B16_TRAP: - case NVPTX::SUST_P_3D_V4B32_TRAP: { + } else if (MCID.TSFlags & NVPTXII::IsSustFlag) { // This is a surface store, so operand 0 is a surfref MachineOperand &SurfHandle = MI.getOperand(0); replaceImageHandle(SurfHandle, MF); return true; - } - case NVPTX::TXQ_CHANNEL_ORDER: - case NVPTX::TXQ_CHANNEL_DATA_TYPE: - case NVPTX::TXQ_WIDTH: - case NVPTX::TXQ_HEIGHT: - case NVPTX::TXQ_DEPTH: - case NVPTX::TXQ_ARRAY_SIZE: - case NVPTX::TXQ_NUM_SAMPLES: - case NVPTX::TXQ_NUM_MIPMAP_LEVELS: - case NVPTX::SUQ_CHANNEL_ORDER: - case NVPTX::SUQ_CHANNEL_DATA_TYPE: - case NVPTX::SUQ_WIDTH: - case NVPTX::SUQ_HEIGHT: - case NVPTX::SUQ_DEPTH: - case NVPTX::SUQ_ARRAY_SIZE: { + } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) { // This is a query, so operand 1 is a surfref/texref MachineOperand &Handle = MI.getOperand(1); @@ -308,22 +116,38 @@ bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) { return true; } - } + + return false; } void NVPTXReplaceImageHandles:: replaceImageHandle(MachineOperand &Op, MachineFunction &MF) { + unsigned Idx; + if (findIndexForHandle(Op, MF, Idx)) { + Op.ChangeToImmediate(Idx); + } +} + +bool NVPTXReplaceImageHandles:: +findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) { const MachineRegisterInfo &MRI = MF.getRegInfo(); NVPTXMachineFunctionInfo *MFI = MF.getInfo<NVPTXMachineFunctionInfo>(); + + assert(Op.isReg() && "Handle is not in a reg?"); + // Which instruction defines the handle? - MachineInstr *MI = MRI.getVRegDef(Op.getReg()); - assert(MI && "No def for image handle vreg?"); - MachineInstr &TexHandleDef = *MI; + MachineInstr &TexHandleDef = *MRI.getVRegDef(Op.getReg()); switch (TexHandleDef.getOpcode()) { case NVPTX::LD_i64_avar: { // The handle is a parameter value being loaded, replace with the // parameter symbol + const NVPTXSubtarget &ST = MF.getTarget().getSubtarget<NVPTXSubtarget>(); + if (ST.getDrvInterface() == NVPTX::CUDA) { + // For CUDA, we preserve the param loads coming from function arguments + return false; + } + assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!"); StringRef Sym = TexHandleDef.getOperand(6).getSymbolName(); std::string ParamBaseName = MF.getName(); @@ -333,19 +157,27 @@ replaceImageHandle(MachineOperand &Op, MachineFunction &MF) { std::string NewSym; raw_string_ostream NewSymStr(NewSym); NewSymStr << MF.getFunction()->getName() << "_param_" << Param; - Op.ChangeToImmediate( - MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str())); + InstrsToRemove.insert(&TexHandleDef); - break; + Idx = MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str()); + return true; } case NVPTX::texsurf_handles: { // The handle is a global variable, replace with the global variable name assert(TexHandleDef.getOperand(1).isGlobal() && "Load is not a global!"); const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal(); assert(GV->hasName() && "Global sampler must be named!"); - Op.ChangeToImmediate(MFI->getImageHandleSymbolIndex(GV->getName().data())); InstrsToRemove.insert(&TexHandleDef); - break; + Idx = MFI->getImageHandleSymbolIndex(GV->getName().data()); + return true; + } + case NVPTX::nvvm_move_i64: + case TargetOpcode::COPY: { + bool Res = findIndexForHandle(TexHandleDef.getOperand(1), MF, Idx); + if (Res) { + InstrsToRemove.insert(&TexHandleDef); + } + return Res; } default: llvm_unreachable("Unknown instruction operating on handle"); diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h index aa0436b..f1d3cb4 100644 --- a/lib/Target/NVPTX/NVPTXSection.h +++ b/lib/Target/NVPTX/NVPTXSection.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_NVPTXSECTION_H -#define LLVM_NVPTXSECTION_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXSECTION_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXSECTION_H #include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCSection.h" diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp index d5cded2..3d52532 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -59,7 +59,8 @@ NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0), SmVersion(20), DL(computeDataLayout(is64Bit)), InstrInfo(initializeSubtargetDependencies(CPU, FS)), - TLInfo((NVPTXTargetMachine &)TM), TSInfo(&DL), FrameLowering(*this) { + TLInfo((const NVPTXTargetMachine &)TM), TSInfo(&DL), + FrameLowering(*this) { Triple T(TT); diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h index 3ed5747..fb2d404 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/lib/Target/NVPTX/NVPTXSubtarget.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTXSUBTARGET_H -#define NVPTXSUBTARGET_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXSUBTARGET_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXSUBTARGET_H #include "NVPTX.h" #include "NVPTXFrameLowering.h" @@ -57,14 +57,20 @@ public: NVPTXSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM, bool is64Bit); - const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; } - const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; } - const DataLayout *getDataLayout() const { return &DL; } - const NVPTXRegisterInfo *getRegisterInfo() const { + const TargetFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; } + const DataLayout *getDataLayout() const override { return &DL; } + const NVPTXRegisterInfo *getRegisterInfo() const override { return &InstrInfo.getRegisterInfo(); } - const NVPTXTargetLowering *getTargetLowering() const { return &TLInfo; } - const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } + const NVPTXTargetLowering *getTargetLowering() const override { + return &TLInfo; + } + const TargetSelectionDAGInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } bool hasBrkPt() const { return SmVersion >= 11; } bool hasAtomRedG32() const { return SmVersion >= 11; } @@ -91,7 +97,12 @@ public: inline bool hasROT64() const { return SmVersion >= 20; } bool hasImageHandles() const { - // Currently disabled + // Enable handles for Kepler+, where CUDA supports indirect surfaces and + // textures + if (getDrvInterface() == NVPTX::CUDA) + return (SmVersion >= 30); + + // Disabled, otherwise return false; } bool is64Bit() const { return Is64Bit; } @@ -108,4 +119,4 @@ public: } // End llvm namespace -#endif // NVPTXSUBTARGET_H +#endif diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 069a1b9..d87693f 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -16,6 +16,7 @@ #include "NVPTX.h" #include "NVPTXAllocaHoisting.h" #include "NVPTXLowerAggrCopies.h" +#include "NVPTXTargetObjectFile.h" #include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" @@ -50,6 +51,7 @@ void initializeNVVMReflectPass(PassRegistry&); void initializeGenericToNVVMPass(PassRegistry&); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); +void initializeNVPTXLowerStructArgsPass(PassRegistry &); } extern "C" void LLVMInitializeNVPTXTarget() { @@ -64,6 +66,7 @@ extern "C" void LLVMInitializeNVPTXTarget() { initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); initializeNVPTXFavorNonGenericAddrSpacesPass( *PassRegistry::getPassRegistry()); + initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry()); } NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT, @@ -72,10 +75,13 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64bit) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + TLOF(make_unique<NVPTXTargetObjectFile>()), Subtarget(TT, CPU, FS, *this, is64bit) { initAsmInfo(); } +NVPTXTargetMachine::~NVPTXTargetMachine() {} + void NVPTXTargetMachine32::anchor() {} NVPTXTargetMachine32::NVPTXTargetMachine32( @@ -119,6 +125,14 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { return PassConfig; } +void NVPTXTargetMachine::addAnalysisPasses(PassManagerBase &PM) { + // Add first the target-independent BasicTTI pass, then our NVPTX pass. This + // allows the NVPTX pass to delegate to the target independent layer when + // appropriate. + PM.add(createBasicTargetTransformInfoPass(this)); + PM.add(createNVPTXTargetTransformInfoPass(this)); +} + void NVPTXPassConfig::addIRPasses() { // The following passes are known to not play well with virtual regs hanging // around after register allocation (which in our case, is *all* registers). diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index a7a1c8f..a726bd1 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTX_TARGETMACHINE_H -#define NVPTX_TARGETMACHINE_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETMACHINE_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETMACHINE_H #include "NVPTXSubtarget.h" #include "ManagedStringPool.h" @@ -25,6 +25,7 @@ namespace llvm { /// NVPTXTargetMachine /// class NVPTXTargetMachine : public LLVMTargetMachine { + std::unique_ptr<TargetLoweringObjectFile> TLOF; NVPTXSubtarget Subtarget; // Hold Strings that can be free'd all together with NVPTXTargetMachine @@ -35,27 +36,9 @@ public: const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit); - const TargetFrameLowering *getFrameLowering() const override { - return getSubtargetImpl()->getFrameLowering(); - } - const NVPTXInstrInfo *getInstrInfo() const override { - return getSubtargetImpl()->getInstrInfo(); - } - const DataLayout *getDataLayout() const override { - return getSubtargetImpl()->getDataLayout(); - } - const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; } - const NVPTXRegisterInfo *getRegisterInfo() const override { - return getSubtargetImpl()->getRegisterInfo(); - } - - const NVPTXTargetLowering *getTargetLowering() const override { - return getSubtargetImpl()->getTargetLowering(); - } + ~NVPTXTargetMachine() override; - const TargetSelectionDAGInfo *getSelectionDAGInfo() const override { - return getSubtargetImpl()->getSelectionDAGInfo(); - } + const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; } ManagedStringPool *getManagedStrPool() const { return const_cast<ManagedStringPool *>(&ManagedStrPool); @@ -63,17 +46,17 @@ public: TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - // Emission of machine code through JITCodeEmitter is not supported. - bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &, - bool = true) override { - return true; - } - // Emission of machine code through MCJIT is not supported. bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &, bool = true) override { return true; } + TargetLoweringObjectFile *getObjFileLowering() const override { + return TLOF.get(); + } + + /// \brief Register NVPTX analysis passes with a pass manager. + void addAnalysisPasses(PassManagerBase &PM) override; }; // NVPTXTargetMachine. diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h index 0b438c5..00ceca5 100644 --- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h +++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H -#define LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETOBJECTFILE_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETOBJECTFILE_H #include "NVPTXSection.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -87,7 +87,8 @@ public: new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); } - const MCSection *getSectionForConstant(SectionKind Kind) const override { + const MCSection *getSectionForConstant(SectionKind Kind, + const Constant *C) const override { return ReadOnlySection; } @@ -97,6 +98,9 @@ public: return DataSection; } + const MCSection * + SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const override; }; } // end namespace llvm diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp new file mode 100644 index 0000000..b09d0d4 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -0,0 +1,115 @@ +//===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI pass ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// \file +// This file implements a TargetTransformInfo analysis pass specific to the +// NVPTX target machine. It uses the target's detailed information to provide +// more precise answers to certain TTI queries, while letting the target +// independent and default TTI implementations handle the rest. +// +//===----------------------------------------------------------------------===// + +#include "NVPTXTargetMachine.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/CostTable.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +#define DEBUG_TYPE "NVPTXtti" + +// Declare the pass initialization routine locally as target-specific passes +// don't have a target-wide initialization entry point, and so we rely on the +// pass constructor initialization. +namespace llvm { +void initializeNVPTXTTIPass(PassRegistry &); +} + +namespace { + +class NVPTXTTI final : public ImmutablePass, public TargetTransformInfo { + const NVPTXTargetLowering *TLI; +public: + NVPTXTTI() : ImmutablePass(ID), TLI(nullptr) { + llvm_unreachable("This pass cannot be directly constructed"); + } + + NVPTXTTI(const NVPTXTargetMachine *TM) + : ImmutablePass(ID), TLI(TM->getSubtargetImpl()->getTargetLowering()) { + initializeNVPTXTTIPass(*PassRegistry::getPassRegistry()); + } + + void initializePass() override { pushTTIStack(this); } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + TargetTransformInfo::getAnalysisUsage(AU); + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + void *getAdjustedAnalysisPointer(const void *ID) override { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo *)this; + return this; + } + + bool hasBranchDivergence() const override; + + unsigned getArithmeticInstrCost( + unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue, + OperandValueKind Opd2Info = OK_AnyValue, + OperandValueProperties Opd1PropInfo = OP_None, + OperandValueProperties Opd2PropInfo = OP_None) const override; +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(NVPTXTTI, TargetTransformInfo, "NVPTXtti", + "NVPTX Target Transform Info", true, true, false) +char NVPTXTTI::ID = 0; + +ImmutablePass * +llvm::createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM) { + return new NVPTXTTI(TM); +} + +bool NVPTXTTI::hasBranchDivergence() const { return true; } + +unsigned NVPTXTTI::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, + OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, + OperandValueProperties Opd2PropInfo) const { + // Legalize the type. + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + + switch (ISD) { + default: + return TargetTransformInfo::getArithmeticInstrCost( + Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); + case ISD::ADD: + case ISD::MUL: + case ISD::XOR: + case ISD::OR: + case ISD::AND: + // The machine code (SASS) simulates an i64 with two i32. Therefore, we + // estimate that arithmetic operations on i64 are twice as expensive as + // those on types that can fit into one machine register. + if (LT.second.SimpleTy == MVT::i64) + return 2 * LT.first; + // Delegate other cases to the basic TTI. + return TargetTransformInfo::getArithmeticInstrCost( + Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); + } +} diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp index a9fd190b..5caa8bd 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -90,11 +90,11 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) { return; if ((*annotationCache).find(m) != (*annotationCache).end()) - (*annotationCache)[m][gv] = tmp; + (*annotationCache)[m][gv] = std::move(tmp); else { global_val_annot_t tmp1; - tmp1[gv] = tmp; - (*annotationCache)[m] = tmp1; + tmp1[gv] = std::move(tmp); + (*annotationCache)[m] = std::move(tmp1); } } diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h index 446bfa1..7e2ce73 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.h +++ b/lib/Target/NVPTX/NVPTXUtilities.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTXUTILITIES_H -#define NVPTXUTILITIES_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXUTILITIES_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXUTILITIES_H #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" diff --git a/lib/Target/NVPTX/NVPTXutil.h b/lib/Target/NVPTX/NVPTXutil.h index d1d1171..1915dac 100644 --- a/lib/Target/NVPTX/NVPTXutil.h +++ b/lib/Target/NVPTX/NVPTXutil.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_NVPTX_UTIL_H -#define LLVM_TARGET_NVPTX_UTIL_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" |