aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/NVPTX
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-05-29 02:49:00 -0700
committerStephen Hines <srhines@google.com>2014-05-29 02:49:00 -0700
commitdce4a407a24b04eebc6a376f8e62b41aaa7b071f (patch)
treedcebc53f2b182f145a2e659393bf9a0472cedf23 /lib/Target/NVPTX
parent220b921aed042f9e520c26cffd8282a94c66c3d5 (diff)
downloadexternal_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.zip
external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.gz
external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.bz2
Update LLVM for 3.5 rebase (r209712).
Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
Diffstat (limited to 'lib/Target/NVPTX')
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt3
-rw-r--r--lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp3
-rw-r--r--lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h14
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h6
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp6
-rw-r--r--lib/Target/NVPTX/NVPTX.h3
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.h6
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp382
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h66
-rw-r--r--lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp195
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.h10
-rw-r--r--lib/Target/NVPTX/NVPTXGenericToNVVM.cpp10
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp826
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h15
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp552
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h139
-rw-r--r--lib/Target/NVPTX/NVPTXImageOptimizer.cpp178
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.h16
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td1823
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.h6
-rw-r--r--lib/Target/NVPTX/NVPTXMCExpr.cpp3
-rw-r--r--lib/Target/NVPTX/NVPTXMCExpr.h12
-rw-r--r--lib/Target/NVPTX/NVPTXMachineFunctionInfo.h46
-rw-r--r--lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp6
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp21
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.h23
-rw-r--r--lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp357
-rw-r--r--lib/Target/NVPTX/NVPTXSection.h16
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.cpp8
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h7
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp47
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h24
-rw-r--r--lib/Target/NVPTX/NVPTXTargetObjectFile.h38
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.cpp67
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.h4
-rw-r--r--lib/Target/NVPTX/NVVMReflect.cpp14
38 files changed, 4556 insertions, 405 deletions
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 029118a..4e35b18 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -9,6 +9,7 @@ tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(NVPTXCommonTableGen)
set(NVPTXCodeGen_sources
+ NVPTXFavorNonGenericAddrSpaces.cpp
NVPTXFrameLowering.cpp
NVPTXInstrInfo.cpp
NVPTXISelDAGToDAG.cpp
@@ -26,6 +27,8 @@ set(NVPTXCodeGen_sources
NVPTXAssignValidGlobalNames.cpp
NVPTXPrologEpilogPass.cpp
NVPTXMCExpr.cpp
+ NVPTXReplaceImageHandles.cpp
+ NVPTXImageOptimizer.cpp
)
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
index cf165be..9618896 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "InstPrinter/NVPTXInstPrinter.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
@@ -25,6 +24,8 @@
#include <cctype>
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
#include "NVPTXGenAsmWriter.inc"
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
index 93029ae..1fb3c57 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
@@ -27,8 +27,8 @@ public:
NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
- virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
- virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot);
+ void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+ void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override;
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
@@ -37,15 +37,15 @@ public:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printCvtMode(const MCInst *MI, int OpNum, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printCmpMode(const MCInst *MI, int OpNum, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printLdStCode(const MCInst *MI, int OpNum,
- raw_ostream &O, const char *Modifier = 0);
+ raw_ostream &O, const char *Modifier = nullptr);
void printMemOperand(const MCInst *MI, int OpNum,
- raw_ostream &O, const char *Modifier = 0);
+ raw_ostream &O, const char *Modifier = nullptr);
void printProtoIdent(const MCInst *MI, int OpNum,
- raw_ostream &O, const char *Modifier = 0);
+ raw_ostream &O, const char *Modifier = nullptr);
};
}
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index edf4a80..ddb122f 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -43,14 +43,16 @@ enum PropertyAnnotation {
PROPERTY_ISSAMPLER,
PROPERTY_ISREADONLY_IMAGE_PARAM,
PROPERTY_ISWRITEONLY_IMAGE_PARAM,
+ PROPERTY_ISREADWRITE_IMAGE_PARAM,
PROPERTY_ISKERNEL_FUNCTION,
PROPERTY_ALIGN,
+ PROPERTY_MANAGED,
// last property
PROPERTY_LAST
};
-const unsigned AnnotationNameLen = 8; // length of each annotation name
+const unsigned AnnotationNameLen = 9; // length of each annotation name
const char PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
"maxntidx", // PROPERTY_MAXNTID_X
"maxntidy", // PROPERTY_MAXNTID_Y
@@ -64,8 +66,10 @@ const char PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
"sampler", // PROPERTY_ISSAMPLER
"rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM
"wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM
+ "rdwrimage", // PROPERTY_ISREADWRITE_IMAGE_PARAM
"kernel", // PROPERTY_ISKERNEL_FUNCTION
"align", // PROPERTY_ALIGN
+ "managed", // PROPERTY_MANAGED
// last property
"proplast", // PROPERTY_LAST
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 3cf6e4b..158ca90 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -20,6 +20,8 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
#define GET_INSTRINFO_MC_DESC
#include "NVPTXGenInstrInfo.inc"
@@ -29,8 +31,6 @@
#define GET_REGINFO_MC_DESC
#include "NVPTXGenRegisterInfo.inc"
-using namespace llvm;
-
static MCInstrInfo *createNVPTXMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitNVPTXMCInstrInfo(X);
@@ -66,7 +66,7 @@ static MCInstPrinter *createNVPTXMCInstPrinter(const Target &T,
const MCSubtargetInfo &STI) {
if (SyntaxVariant == 0)
return new NVPTXInstPrinter(MAI, MII, MRI, STI);
- return 0;
+ return nullptr;
}
// Force static initialization.
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index 8cbdd47..e74c808 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -63,9 +63,12 @@ FunctionPass *
createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel);
ModulePass *createNVPTXAssignValidGlobalNamesPass();
ModulePass *createGenericToNVVMPass();
+FunctionPass *createNVPTXFavorNonGenericAddrSpacesPass();
ModulePass *createNVVMReflectPass();
ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping);
MachineFunctionPass *createNVPTXPrologEpilogPass();
+MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
+FunctionPass *createNVPTXImageOptimizerPass();
bool isImageOrSamplerVal(const Value *, const Module *);
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
index 22404b7..5b61068 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
@@ -30,17 +30,17 @@ public:
static char ID; // Pass ID
NVPTXAllocaHoisting() : FunctionPass(ID) {}
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DataLayoutPass>();
AU.addPreserved("stack-protector");
AU.addPreserved<MachineFunctionAnalysis>();
}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "NVPTX specific alloca hoisting";
}
- virtual bool runOnFunction(Function &function);
+ bool runOnFunction(Function &function) override;
};
extern FunctionPass *createAllocaHoisting();
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 97e2cc6..4ec575f 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -17,6 +17,7 @@
#include "MCTargetDesc/NVPTXMCAsmInfo.h"
#include "NVPTX.h"
#include "NVPTXInstrInfo.h"
+#include "NVPTXMachineFunctionInfo.h"
#include "NVPTXMCExpr.h"
#include "NVPTXRegisterInfo.h"
#include "NVPTXTargetMachine.h"
@@ -131,7 +132,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
- if (CE == 0)
+ if (!CE)
llvm_unreachable("Unknown constant value to lower!");
switch (CE->getOpcode()) {
@@ -149,9 +150,24 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
raw_string_ostream OS(S);
OS << "Unsupported expression in static initializer: ";
CE->printAsOperand(OS, /*PrintType=*/ false,
- !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+ !AP.MF ? nullptr : AP.MF->getFunction()->getParent());
report_fatal_error(OS.str());
}
+ case Instruction::AddrSpaceCast: {
+ // Strip any addrspace(1)->addrspace(0) addrspace casts. These will be
+ // handled by the generic() logic in the MCExpr printer
+ PointerType *DstTy = cast<PointerType>(CE->getType());
+ PointerType *SrcTy = cast<PointerType>(CE->getOperand(0)->getType());
+ if (SrcTy->getAddressSpace() == 1 && DstTy->getAddressSpace() == 0) {
+ return LowerConstant(cast<const Constant>(CE->getOperand(0)), AP);
+ }
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ CE->printAsOperand(OS, /*PrintType=*/ false,
+ !AP.MF ? nullptr : AP.MF->getFunction()->getParent());
+ report_fatal_error(OS.str());
+ }
case Instruction::GetElementPtr: {
const DataLayout &TD = *AP.TM.getDataLayout();
// Generate a symbolic expression for the byte address
@@ -310,13 +326,279 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(OutStreamer, Inst);
}
+// Handle symbol backtracking for targets that do not support image handles
+bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
+ unsigned OpNo, MCOperand &MCOp) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+
+ switch (MI->getOpcode()) {
+ default: return false;
+ case NVPTX::TEX_1D_F32_I32:
+ case NVPTX::TEX_1D_F32_F32:
+ case NVPTX::TEX_1D_F32_F32_LEVEL:
+ case NVPTX::TEX_1D_F32_F32_GRAD:
+ case NVPTX::TEX_1D_I32_I32:
+ case NVPTX::TEX_1D_I32_F32:
+ case NVPTX::TEX_1D_I32_F32_LEVEL:
+ case NVPTX::TEX_1D_I32_F32_GRAD:
+ case NVPTX::TEX_1D_ARRAY_F32_I32:
+ case NVPTX::TEX_1D_ARRAY_F32_F32:
+ case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL:
+ case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD:
+ case NVPTX::TEX_1D_ARRAY_I32_I32:
+ case NVPTX::TEX_1D_ARRAY_I32_F32:
+ case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL:
+ case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD:
+ case NVPTX::TEX_2D_F32_I32:
+ case NVPTX::TEX_2D_F32_F32:
+ case NVPTX::TEX_2D_F32_F32_LEVEL:
+ case NVPTX::TEX_2D_F32_F32_GRAD:
+ case NVPTX::TEX_2D_I32_I32:
+ case NVPTX::TEX_2D_I32_F32:
+ case NVPTX::TEX_2D_I32_F32_LEVEL:
+ case NVPTX::TEX_2D_I32_F32_GRAD:
+ case NVPTX::TEX_2D_ARRAY_F32_I32:
+ case NVPTX::TEX_2D_ARRAY_F32_F32:
+ case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL:
+ case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD:
+ case NVPTX::TEX_2D_ARRAY_I32_I32:
+ case NVPTX::TEX_2D_ARRAY_I32_F32:
+ case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL:
+ case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD:
+ case NVPTX::TEX_3D_F32_I32:
+ case NVPTX::TEX_3D_F32_F32:
+ case NVPTX::TEX_3D_F32_F32_LEVEL:
+ case NVPTX::TEX_3D_F32_F32_GRAD:
+ case NVPTX::TEX_3D_I32_I32:
+ case NVPTX::TEX_3D_I32_F32:
+ case NVPTX::TEX_3D_I32_F32_LEVEL:
+ case NVPTX::TEX_3D_I32_F32_GRAD:
+ {
+ // This is a texture fetch, so operand 4 is a texref and operand 5 is
+ // a samplerref
+ if (OpNo == 4) {
+ lowerImageHandleSymbol(MO.getImm(), MCOp);
+ return true;
+ }
+ if (OpNo == 5) {
+ lowerImageHandleSymbol(MO.getImm(), MCOp);
+ return true;
+ }
+
+ return false;
+ }
+ case NVPTX::SULD_1D_I8_TRAP:
+ case NVPTX::SULD_1D_I16_TRAP:
+ case NVPTX::SULD_1D_I32_TRAP:
+ case NVPTX::SULD_1D_ARRAY_I8_TRAP:
+ case NVPTX::SULD_1D_ARRAY_I16_TRAP:
+ case NVPTX::SULD_1D_ARRAY_I32_TRAP:
+ case NVPTX::SULD_2D_I8_TRAP:
+ case NVPTX::SULD_2D_I16_TRAP:
+ case NVPTX::SULD_2D_I32_TRAP:
+ case NVPTX::SULD_2D_ARRAY_I8_TRAP:
+ case NVPTX::SULD_2D_ARRAY_I16_TRAP:
+ case NVPTX::SULD_2D_ARRAY_I32_TRAP:
+ case NVPTX::SULD_3D_I8_TRAP:
+ case NVPTX::SULD_3D_I16_TRAP:
+ case NVPTX::SULD_3D_I32_TRAP: {
+ // This is a V1 surface load, so operand 1 is a surfref
+ if (OpNo == 1) {
+ lowerImageHandleSymbol(MO.getImm(), MCOp);
+ return true;
+ }
+
+ return false;
+ }
+ case NVPTX::SULD_1D_V2I8_TRAP:
+ case NVPTX::SULD_1D_V2I16_TRAP:
+ case NVPTX::SULD_1D_V2I32_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V2I8_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V2I16_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V2I32_TRAP:
+ case NVPTX::SULD_2D_V2I8_TRAP:
+ case NVPTX::SULD_2D_V2I16_TRAP:
+ case NVPTX::SULD_2D_V2I32_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V2I8_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V2I16_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V2I32_TRAP:
+ case NVPTX::SULD_3D_V2I8_TRAP:
+ case NVPTX::SULD_3D_V2I16_TRAP:
+ case NVPTX::SULD_3D_V2I32_TRAP: {
+ // This is a V2 surface load, so operand 2 is a surfref
+ if (OpNo == 2) {
+ lowerImageHandleSymbol(MO.getImm(), MCOp);
+ return true;
+ }
+
+ return false;
+ }
+ case NVPTX::SULD_1D_V4I8_TRAP:
+ case NVPTX::SULD_1D_V4I16_TRAP:
+ case NVPTX::SULD_1D_V4I32_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V4I8_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V4I16_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V4I32_TRAP:
+ case NVPTX::SULD_2D_V4I8_TRAP:
+ case NVPTX::SULD_2D_V4I16_TRAP:
+ case NVPTX::SULD_2D_V4I32_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V4I8_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V4I16_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V4I32_TRAP:
+ case NVPTX::SULD_3D_V4I8_TRAP:
+ case NVPTX::SULD_3D_V4I16_TRAP:
+ case NVPTX::SULD_3D_V4I32_TRAP: {
+ // This is a V4 surface load, so operand 4 is a surfref
+ if (OpNo == 4) {
+ lowerImageHandleSymbol(MO.getImm(), MCOp);
+ return true;
+ }
+
+ return false;
+ }
+ case NVPTX::SUST_B_1D_B8_TRAP:
+ case NVPTX::SUST_B_1D_B16_TRAP:
+ case NVPTX::SUST_B_1D_B32_TRAP:
+ case NVPTX::SUST_B_1D_V2B8_TRAP:
+ case NVPTX::SUST_B_1D_V2B16_TRAP:
+ case NVPTX::SUST_B_1D_V2B32_TRAP:
+ case NVPTX::SUST_B_1D_V4B8_TRAP:
+ case NVPTX::SUST_B_1D_V4B16_TRAP:
+ case NVPTX::SUST_B_1D_V4B32_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_B8_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_B16_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_B32_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP:
+ case NVPTX::SUST_B_2D_B8_TRAP:
+ case NVPTX::SUST_B_2D_B16_TRAP:
+ case NVPTX::SUST_B_2D_B32_TRAP:
+ case NVPTX::SUST_B_2D_V2B8_TRAP:
+ case NVPTX::SUST_B_2D_V2B16_TRAP:
+ case NVPTX::SUST_B_2D_V2B32_TRAP:
+ case NVPTX::SUST_B_2D_V4B8_TRAP:
+ case NVPTX::SUST_B_2D_V4B16_TRAP:
+ case NVPTX::SUST_B_2D_V4B32_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_B8_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_B16_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_B32_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP:
+ case NVPTX::SUST_B_3D_B8_TRAP:
+ case NVPTX::SUST_B_3D_B16_TRAP:
+ case NVPTX::SUST_B_3D_B32_TRAP:
+ case NVPTX::SUST_B_3D_V2B8_TRAP:
+ case NVPTX::SUST_B_3D_V2B16_TRAP:
+ case NVPTX::SUST_B_3D_V2B32_TRAP:
+ case NVPTX::SUST_B_3D_V4B8_TRAP:
+ case NVPTX::SUST_B_3D_V4B16_TRAP:
+ case NVPTX::SUST_B_3D_V4B32_TRAP:
+ case NVPTX::SUST_P_1D_B8_TRAP:
+ case NVPTX::SUST_P_1D_B16_TRAP:
+ case NVPTX::SUST_P_1D_B32_TRAP:
+ case NVPTX::SUST_P_1D_V2B8_TRAP:
+ case NVPTX::SUST_P_1D_V2B16_TRAP:
+ case NVPTX::SUST_P_1D_V2B32_TRAP:
+ case NVPTX::SUST_P_1D_V4B8_TRAP:
+ case NVPTX::SUST_P_1D_V4B16_TRAP:
+ case NVPTX::SUST_P_1D_V4B32_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_B8_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_B16_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_B32_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP:
+ case NVPTX::SUST_P_2D_B8_TRAP:
+ case NVPTX::SUST_P_2D_B16_TRAP:
+ case NVPTX::SUST_P_2D_B32_TRAP:
+ case NVPTX::SUST_P_2D_V2B8_TRAP:
+ case NVPTX::SUST_P_2D_V2B16_TRAP:
+ case NVPTX::SUST_P_2D_V2B32_TRAP:
+ case NVPTX::SUST_P_2D_V4B8_TRAP:
+ case NVPTX::SUST_P_2D_V4B16_TRAP:
+ case NVPTX::SUST_P_2D_V4B32_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_B8_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_B16_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_B32_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP:
+ case NVPTX::SUST_P_3D_B8_TRAP:
+ case NVPTX::SUST_P_3D_B16_TRAP:
+ case NVPTX::SUST_P_3D_B32_TRAP:
+ case NVPTX::SUST_P_3D_V2B8_TRAP:
+ case NVPTX::SUST_P_3D_V2B16_TRAP:
+ case NVPTX::SUST_P_3D_V2B32_TRAP:
+ case NVPTX::SUST_P_3D_V4B8_TRAP:
+ case NVPTX::SUST_P_3D_V4B16_TRAP:
+ case NVPTX::SUST_P_3D_V4B32_TRAP: {
+ // This is a surface store, so operand 0 is a surfref
+ if (OpNo == 0) {
+ lowerImageHandleSymbol(MO.getImm(), MCOp);
+ return true;
+ }
+
+ return false;
+ }
+ case NVPTX::TXQ_CHANNEL_ORDER:
+ case NVPTX::TXQ_CHANNEL_DATA_TYPE:
+ case NVPTX::TXQ_WIDTH:
+ case NVPTX::TXQ_HEIGHT:
+ case NVPTX::TXQ_DEPTH:
+ case NVPTX::TXQ_ARRAY_SIZE:
+ case NVPTX::TXQ_NUM_SAMPLES:
+ case NVPTX::TXQ_NUM_MIPMAP_LEVELS:
+ case NVPTX::SUQ_CHANNEL_ORDER:
+ case NVPTX::SUQ_CHANNEL_DATA_TYPE:
+ case NVPTX::SUQ_WIDTH:
+ case NVPTX::SUQ_HEIGHT:
+ case NVPTX::SUQ_DEPTH:
+ case NVPTX::SUQ_ARRAY_SIZE: {
+ // This is a query, so operand 1 is a surfref/texref
+ if (OpNo == 1) {
+ lowerImageHandleSymbol(MO.getImm(), MCOp);
+ return true;
+ }
+
+ return false;
+ }
+ }
+}
+
+void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
+ // Ewwww
+ TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget());
+ NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
+ const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
+ const char *Sym = MFI->getImageHandleSymbol(Index);
+ std::string *SymNamePtr =
+ nvTM.getManagedStrPool()->getManagedString(Sym);
+ MCOp = GetSymbolRef(OutContext.GetOrCreateSymbol(
+ StringRef(SymNamePtr->c_str())));
+}
+
void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
OutMI.setOpcode(MI->getOpcode());
+ const NVPTXSubtarget &ST = TM.getSubtarget<NVPTXSubtarget>();
// Special: Do not mangle symbol operand of CALL_PROTOTYPE
if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
const MachineOperand &MO = MI->getOperand(0);
- OutMI.addOperand(GetSymbolRef(MO,
+ OutMI.addOperand(GetSymbolRef(
OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName()))));
return;
}
@@ -325,6 +607,13 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
const MachineOperand &MO = MI->getOperand(i);
MCOperand MCOp;
+ if (!ST.hasImageHandles()) {
+ if (lowerImageHandleOperand(MI, i, MCOp)) {
+ OutMI.addOperand(MCOp);
+ continue;
+ }
+ }
+
if (lowerOperand(MO, MCOp))
OutMI.addOperand(MCOp);
}
@@ -345,10 +634,10 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
MO.getMBB()->getSymbol(), OutContext));
break;
case MachineOperand::MO_ExternalSymbol:
- MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
+ MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
break;
case MachineOperand::MO_GlobalAddress:
- MCOp = GetSymbolRef(MO, getSymbol(MO.getGlobal()));
+ MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
break;
case MachineOperand::MO_FPImmediate: {
const ConstantFP *Cnt = MO.getFPImm();
@@ -407,8 +696,7 @@ unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
}
}
-MCOperand NVPTXAsmPrinter::GetSymbolRef(const MachineOperand &MO,
- const MCSymbol *Symbol) {
+MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
const MCExpr *Expr;
Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
OutContext);
@@ -750,7 +1038,7 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
return false;
- const Function *oneFunc = 0;
+ const Function *oneFunc = nullptr;
bool flag = usedInOneFunc(gv, oneFunc);
if (flag == false)
@@ -1010,6 +1298,8 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
for (i = 0; i < n; i++)
global_list.insert(global_list.end(), gv_array[i]);
+ clearAnnotationCache(&M);
+
delete[] gv_array;
return ret;
@@ -1105,10 +1395,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
if (llvm::isSampler(*GVar)) {
O << ".global .samplerref " << llvm::getSamplerName(*GVar);
- const Constant *Initializer = NULL;
+ const Constant *Initializer = nullptr;
if (GVar->hasInitializer())
Initializer = GVar->getInitializer();
- const ConstantInt *CI = NULL;
+ const ConstantInt *CI = nullptr;
if (Initializer)
CI = dyn_cast<ConstantInt>(Initializer);
if (CI) {
@@ -1175,7 +1465,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
return;
}
- const Function *demotedFunc = 0;
+ const Function *demotedFunc = nullptr;
if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
O << "// " << GVar->getName().str() << " has been demoted\n";
if (localDecls.find(demotedFunc) != localDecls.end())
@@ -1347,7 +1637,7 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
return "u32";
}
llvm_unreachable("unexpected type");
- return NULL;
+ return nullptr;
}
void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
@@ -1495,19 +1785,33 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
first = false;
// Handle image/sampler parameters
- if (llvm::isSampler(*I) || llvm::isImage(*I)) {
- if (llvm::isImage(*I)) {
- std::string sname = I->getName();
- if (llvm::isImageWriteOnly(*I))
- O << "\t.param .surfref " << *getSymbol(F) << "_param_"
- << paramIndex;
- else // Default image is read_only
- O << "\t.param .texref " << *getSymbol(F) << "_param_"
- << paramIndex;
- } else // Should be llvm::isSampler(*I)
- O << "\t.param .samplerref " << *getSymbol(F) << "_param_"
- << paramIndex;
- continue;
+ if (isKernelFunction(*F)) {
+ if (isSampler(*I) || isImage(*I)) {
+ if (isImage(*I)) {
+ std::string sname = I->getName();
+ if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
+ if (nvptxSubtarget.hasImageHandles())
+ O << "\t.param .u64 .ptr .surfref ";
+ else
+ O << "\t.param .surfref ";
+ O << *CurrentFnSym << "_param_" << paramIndex;
+ }
+ else { // Default image is read_only
+ if (nvptxSubtarget.hasImageHandles())
+ O << "\t.param .u64 .ptr .texref ";
+ else
+ O << "\t.param .texref ";
+ O << *CurrentFnSym << "_param_" << paramIndex;
+ }
+ } else {
+ if (nvptxSubtarget.hasImageHandles())
+ O << "\t.param .u64 .ptr .samplerref ";
+ else
+ O << "\t.param .samplerref ";
+ O << *CurrentFnSym << "_param_" << paramIndex;
+ }
+ continue;
+ }
}
if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
@@ -1752,13 +2056,35 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
return;
}
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
- O << *getSymbol(GVar);
+ PointerType *PTy = dyn_cast<PointerType>(GVar->getType());
+ bool IsNonGenericPointer = false;
+ if (PTy && PTy->getAddressSpace() != 0) {
+ IsNonGenericPointer = true;
+ }
+ if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
+ O << "generic(";
+ O << *getSymbol(GVar);
+ O << ")";
+ } else {
+ O << *getSymbol(GVar);
+ }
return;
}
if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
const Value *v = Cexpr->stripPointerCasts();
+ PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
+ bool IsNonGenericPointer = false;
+ if (PTy && PTy->getAddressSpace() != 0) {
+ IsNonGenericPointer = true;
+ }
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
- O << *getSymbol(GVar);
+ if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
+ O << "generic(";
+ O << *getSymbol(GVar);
+ O << ")";
+ } else {
+ O << *getSymbol(GVar);
+ }
return;
} else {
O << *LowerConstant(CPV, *this);
@@ -2121,7 +2447,7 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
}
LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
- if (reader == NULL) {
+ if (!reader) {
reader = new LineReader(filename);
}
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 7162420..a9f9bdd 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -96,6 +96,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
unsigned curpos;
raw_ostream &O;
NVPTXAsmPrinter &AP;
+ bool EmitGeneric;
public:
AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
@@ -104,6 +105,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
size = _size;
curpos = 0;
numSymbols = 0;
+ EmitGeneric = AP.EmitGeneric;
}
~AggBuffer() { delete[] buffer; }
unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
@@ -155,7 +157,18 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
const Value *v = Symbols[nSym];
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
MCSymbol *Name = AP.getSymbol(GVar);
- O << *Name;
+ PointerType *PTy = dyn_cast<PointerType>(GVar->getType());
+ bool IsNonGenericPointer = false;
+ if (PTy && PTy->getAddressSpace() != 0) {
+ IsNonGenericPointer = true;
+ }
+ if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
+ O << "generic(";
+ O << *Name;
+ O << ")";
+ } else {
+ O << *Name;
+ }
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
O << *nvptx::LowerConstant(Cexpr, AP);
} else
@@ -176,31 +189,31 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
friend class AggBuffer;
- virtual void emitSrcInText(StringRef filename, unsigned line);
+ void emitSrcInText(StringRef filename, unsigned line);
private:
- virtual const char *getPassName() const { return "NVPTX Assembly Printer"; }
+ const char *getPassName() const override { return "NVPTX Assembly Printer"; }
const Function *F;
std::string CurrentFnName;
- void EmitFunctionEntryLabel();
- void EmitFunctionBodyStart();
- void EmitFunctionBodyEnd();
- void emitImplicitDef(const MachineInstr *MI) const;
+ void EmitFunctionEntryLabel() override;
+ void EmitFunctionBodyStart() override;
+ void EmitFunctionBodyEnd() override;
+ void emitImplicitDef(const MachineInstr *MI) const override;
- void EmitInstruction(const MachineInstr *);
+ void EmitInstruction(const MachineInstr *) override;
void lowerToMCInst(const MachineInstr *MI, MCInst &OutMI);
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
- MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
+ MCOperand GetSymbolRef(const MCSymbol *Symbol);
unsigned encodeVirtualRegister(unsigned Reg);
- void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const {}
+ void EmitAlignment(unsigned NumBits, const GlobalValue *GV = nullptr) const {}
void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier,
raw_ostream &O);
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O,
bool = false);
@@ -221,15 +234,15 @@ private:
void printReturnValStr(const MachineFunction &MF, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &);
+ raw_ostream &) override;
void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &);
+ raw_ostream &) override;
protected:
- bool doInitialization(Module &M);
- bool doFinalization(Module &M);
+ bool doInitialization(Module &M) override;
+ bool doFinalization(Module &M) override;
private:
std::string CurrentBankselLabelInBasicBlock;
@@ -274,14 +287,33 @@ private:
static const char *getRegisterName(unsigned RegNo);
void emitDemotedVars(const Function *, raw_ostream &);
+ bool lowerImageHandleOperand(const MachineInstr *MI, unsigned OpNo,
+ MCOperand &MCOp);
+ void lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp);
+
LineReader *reader;
LineReader *getReader(std::string);
+
+ // Used to control the need to emit .generic() in the initializer of
+ // module scope variables.
+ // Although ptx supports the hybrid mode like the following,
+ // .global .u32 a;
+ // .global .u32 b;
+ // .global .u32 addr[] = {a, generic(b)}
+ // we have difficulty representing the difference in the NVVM IR.
+ //
+ // Since the address value should always be generic in CUDA C and always
+ // be specific in OpenCL, we use this simple control here.
+ //
+ bool EmitGeneric;
+
public:
NVPTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: AsmPrinter(TM, Streamer),
nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
CurrentBankselLabelInBasicBlock = "";
- reader = NULL;
+ reader = nullptr;
+ EmitGeneric = (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA);
}
~NVPTXAsmPrinter() {
diff --git a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
index 158c482..962b123 100644
--- a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
+++ b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
@@ -33,7 +33,7 @@ public:
static char ID;
NVPTXAssignValidGlobalNames() : ModulePass(ID) {}
- virtual bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
/// \brief Clean up the name to remove symbols invalid in PTX.
std::string cleanUpName(StringRef Name);
diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
new file mode 100644
index 0000000..f3a095d
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
@@ -0,0 +1,195 @@
+//===-- NVPTXFavorNonGenericAddrSpace.cpp - ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// When a load/store accesses the generic address space, checks whether the
+// address is casted from a non-generic address space. If so, remove this
+// addrspacecast because accessing non-generic address spaces is typically
+// faster. Besides seeking addrspacecasts, this optimization also traces into
+// the base pointer of a GEP.
+//
+// For instance, the code below loads a float from an array allocated in
+// addrspace(3).
+//
+// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
+// %1 = gep [10 x float]* %0, i64 0, i64 %i
+// %2 = load float* %1 ; emits ld.f32
+//
+// First, function hoistAddrSpaceCastFromGEP reorders the addrspacecast
+// and the GEP to expose more optimization opportunities to function
+// optimizeMemoryInst. The intermediate code looks like:
+//
+// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
+// %1 = addrspacecast float addrspace(3)* %0 to float*
+// %2 = load float* %1 ; still emits ld.f32, but will be optimized shortly
+//
+// Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed
+// generic pointers, and folds the load and the addrspacecast into a load from
+// the original address space. The final code looks like:
+//
+// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
+// %2 = load float addrspace(3)* %0 ; emits ld.shared.f32
+//
+// This pass may remove an addrspacecast in a different BB. Therefore, we
+// implement it as a FunctionPass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+// An option to disable this optimization. Enable it by default.
+static cl::opt<bool> DisableFavorNonGeneric(
+ "disable-nvptx-favor-non-generic",
+ cl::init(false),
+ cl::desc("Do not convert generic address space usage "
+ "to non-generic address space usage"),
+ cl::Hidden);
+
+namespace {
+/// \brief NVPTXFavorNonGenericAddrSpaces
+class NVPTXFavorNonGenericAddrSpaces : public FunctionPass {
+public:
+ static char ID;
+ NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F) override;
+
+ /// Optimizes load/store instructions. Idx is the index of the pointer operand
+ /// (0 for load, and 1 for store). Returns true if it changes anything.
+ bool optimizeMemoryInstruction(Instruction *I, unsigned Idx);
+ /// Transforms "gep (addrspacecast X), indices" into "addrspacecast (gep X,
+ /// indices)". This reordering exposes to optimizeMemoryInstruction more
+ /// optimization opportunities on loads and stores. Returns true if it changes
+ /// the program.
+ bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP);
+};
+}
+
+char NVPTXFavorNonGenericAddrSpaces::ID = 0;
+
+namespace llvm {
+void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
+}
+INITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces, "nvptx-favor-non-generic",
+ "Remove unnecessary non-generic-to-generic addrspacecasts",
+ false, false)
+
+// Decides whether removing Cast is valid and beneficial. Cast can be an
+// instruction or a constant expression.
+static bool IsEliminableAddrSpaceCast(Operator *Cast) {
+ // Returns false if not even an addrspacecast.
+ if (Cast->getOpcode() != Instruction::AddrSpaceCast)
+ return false;
+
+ Value *Src = Cast->getOperand(0);
+ PointerType *SrcTy = cast<PointerType>(Src->getType());
+ PointerType *DestTy = cast<PointerType>(Cast->getType());
+ // TODO: For now, we only handle the case where the addrspacecast only changes
+ // the address space but not the type. If the type also changes, we could
+ // still get rid of the addrspacecast by adding an extra bitcast, but we
+ // rarely see such scenarios.
+ if (SrcTy->getElementType() != DestTy->getElementType())
+ return false;
+
+ // Checks whether the addrspacecast is from a non-generic address space to the
+ // generic address space.
+ return (SrcTy->getAddressSpace() != AddressSpace::ADDRESS_SPACE_GENERIC &&
+ DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC);
+}
+
+bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(
+ GEPOperator *GEP) {
+ Operator *Cast = dyn_cast<Operator>(GEP->getPointerOperand());
+ if (!Cast)
+ return false;
+
+ if (!IsEliminableAddrSpaceCast(Cast))
+ return false;
+
+ SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
+ if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) {
+ // %1 = gep (addrspacecast X), indices
+ // =>
+ // %0 = gep X, indices
+ // %1 = addrspacecast %0
+ GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(Cast->getOperand(0),
+ Indices,
+ GEP->getName(),
+ GEPI);
+ NewGEPI->setIsInBounds(GEP->isInBounds());
+ GEP->replaceAllUsesWith(
+ new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI));
+ } else {
+ // GEP is a constant expression.
+ Constant *NewGEPCE = ConstantExpr::getGetElementPtr(
+ cast<Constant>(Cast->getOperand(0)),
+ Indices,
+ GEP->isInBounds());
+ GEP->replaceAllUsesWith(
+ ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType()));
+ }
+
+ return true;
+}
+
+bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI,
+ unsigned Idx) {
+ // If the pointer operand is a GEP, hoist the addrspacecast if any from the
+ // GEP to expose more optimization opportunites.
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(MI->getOperand(Idx))) {
+ hoistAddrSpaceCastFromGEP(GEP);
+ }
+
+ // load/store (addrspacecast X) => load/store X if shortcutting the
+ // addrspacecast is valid and can improve performance.
+ //
+ // e.g.,
+ // %1 = addrspacecast float addrspace(3)* %0 to float*
+ // %2 = load float* %1
+ // ->
+ // %2 = load float addrspace(3)* %0
+ //
+ // Note: the addrspacecast can also be a constant expression.
+ if (Operator *Cast = dyn_cast<Operator>(MI->getOperand(Idx))) {
+ if (IsEliminableAddrSpaceCast(Cast)) {
+ MI->setOperand(Idx, Cast->getOperand(0));
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) {
+ if (DisableFavorNonGeneric)
+ return false;
+
+ bool Changed = false;
+ for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
+ for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ++I) {
+ if (isa<LoadInst>(I)) {
+ // V = load P
+ Changed |= optimizeMemoryInstruction(I, 0);
+ } else if (isa<StoreInst>(I)) {
+ // store V, P
+ Changed |= optimizeMemoryInstruction(I, 1);
+ }
+ }
+ }
+ return Changed;
+}
+
+FunctionPass *llvm::createNVPTXFavorNonGenericAddrSpacesPass() {
+ return new NVPTXFavorNonGenericAddrSpaces();
+}
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
index 819f1dd..2ae6d72 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -28,13 +28,13 @@ public:
: TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), tm(_tm),
is64bit(_is64bit) {}
- virtual bool hasFP(const MachineFunction &MF) const;
- virtual void emitPrologue(MachineFunction &MF) const;
- virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ bool hasFP(const MachineFunction &MF) const override;
+ void emitPrologue(MachineFunction &MF) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const override;
};
} // End llvm namespace
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 45f0734..023dd5e 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -40,10 +40,9 @@ public:
GenericToNVVM() : ModulePass(ID) {}
- virtual bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {}
private:
Value *getOrInsertCVTA(Module *M, Function *F, GlobalVariable *GV,
@@ -88,7 +87,8 @@ bool GenericToNVVM::runOnModule(Module &M) {
!GV->getName().startswith("llvm.")) {
GlobalVariable *NewGV = new GlobalVariable(
M, GV->getType()->getElementType(), GV->isConstant(),
- GV->getLinkage(), GV->hasInitializer() ? GV->getInitializer() : NULL,
+ GV->getLinkage(),
+ GV->hasInitializer() ? GV->getInitializer() : nullptr,
"", GV, GV->getThreadLocalMode(), llvm::ADDRESS_SPACE_GLOBAL);
NewGV->copyAttributesFrom(GV);
GVMap[GV] = NewGV;
@@ -162,7 +162,7 @@ Value *GenericToNVVM::getOrInsertCVTA(Module *M, Function *F,
GlobalVariable *GV,
IRBuilder<> &Builder) {
PointerType *GVType = GV->getType();
- Value *CVTA = NULL;
+ Value *CVTA = nullptr;
// See if the address space conversion requires the operand to be bitcast
// to i8 addrspace(n)* first.
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index bd08d2d..cd30880 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -20,11 +20,10 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "nvptx-isel"
-
using namespace llvm;
+#define DEBUG_TYPE "nvptx-isel"
+
static cl::opt<int>
FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
@@ -120,10 +119,10 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
if (N->isMachineOpcode()) {
N->setNodeId(-1);
- return NULL; // Already selected.
+ return nullptr; // Already selected.
}
- SDNode *ResNode = NULL;
+ SDNode *ResNode = nullptr;
switch (N->getOpcode()) {
case ISD::LOAD:
ResNode = SelectLoad(N);
@@ -162,6 +161,98 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
case NVPTXISD::StoreParamU32:
ResNode = SelectStoreParam(N);
break;
+ case ISD::INTRINSIC_WO_CHAIN:
+ ResNode = SelectIntrinsicNoChain(N);
+ break;
+ case NVPTXISD::Tex1DFloatI32:
+ case NVPTXISD::Tex1DFloatFloat:
+ case NVPTXISD::Tex1DFloatFloatLevel:
+ case NVPTXISD::Tex1DFloatFloatGrad:
+ case NVPTXISD::Tex1DI32I32:
+ case NVPTXISD::Tex1DI32Float:
+ case NVPTXISD::Tex1DI32FloatLevel:
+ case NVPTXISD::Tex1DI32FloatGrad:
+ case NVPTXISD::Tex1DArrayFloatI32:
+ case NVPTXISD::Tex1DArrayFloatFloat:
+ case NVPTXISD::Tex1DArrayFloatFloatLevel:
+ case NVPTXISD::Tex1DArrayFloatFloatGrad:
+ case NVPTXISD::Tex1DArrayI32I32:
+ case NVPTXISD::Tex1DArrayI32Float:
+ case NVPTXISD::Tex1DArrayI32FloatLevel:
+ case NVPTXISD::Tex1DArrayI32FloatGrad:
+ case NVPTXISD::Tex2DFloatI32:
+ case NVPTXISD::Tex2DFloatFloat:
+ case NVPTXISD::Tex2DFloatFloatLevel:
+ case NVPTXISD::Tex2DFloatFloatGrad:
+ case NVPTXISD::Tex2DI32I32:
+ case NVPTXISD::Tex2DI32Float:
+ case NVPTXISD::Tex2DI32FloatLevel:
+ case NVPTXISD::Tex2DI32FloatGrad:
+ case NVPTXISD::Tex2DArrayFloatI32:
+ case NVPTXISD::Tex2DArrayFloatFloat:
+ case NVPTXISD::Tex2DArrayFloatFloatLevel:
+ case NVPTXISD::Tex2DArrayFloatFloatGrad:
+ case NVPTXISD::Tex2DArrayI32I32:
+ case NVPTXISD::Tex2DArrayI32Float:
+ case NVPTXISD::Tex2DArrayI32FloatLevel:
+ case NVPTXISD::Tex2DArrayI32FloatGrad:
+ case NVPTXISD::Tex3DFloatI32:
+ case NVPTXISD::Tex3DFloatFloat:
+ case NVPTXISD::Tex3DFloatFloatLevel:
+ case NVPTXISD::Tex3DFloatFloatGrad:
+ case NVPTXISD::Tex3DI32I32:
+ case NVPTXISD::Tex3DI32Float:
+ case NVPTXISD::Tex3DI32FloatLevel:
+ case NVPTXISD::Tex3DI32FloatGrad:
+ ResNode = SelectTextureIntrinsic(N);
+ break;
+ case NVPTXISD::Suld1DI8Trap:
+ case NVPTXISD::Suld1DI16Trap:
+ case NVPTXISD::Suld1DI32Trap:
+ case NVPTXISD::Suld1DV2I8Trap:
+ case NVPTXISD::Suld1DV2I16Trap:
+ case NVPTXISD::Suld1DV2I32Trap:
+ case NVPTXISD::Suld1DV4I8Trap:
+ case NVPTXISD::Suld1DV4I16Trap:
+ case NVPTXISD::Suld1DV4I32Trap:
+ case NVPTXISD::Suld1DArrayI8Trap:
+ case NVPTXISD::Suld1DArrayI16Trap:
+ case NVPTXISD::Suld1DArrayI32Trap:
+ case NVPTXISD::Suld1DArrayV2I8Trap:
+ case NVPTXISD::Suld1DArrayV2I16Trap:
+ case NVPTXISD::Suld1DArrayV2I32Trap:
+ case NVPTXISD::Suld1DArrayV4I8Trap:
+ case NVPTXISD::Suld1DArrayV4I16Trap:
+ case NVPTXISD::Suld1DArrayV4I32Trap:
+ case NVPTXISD::Suld2DI8Trap:
+ case NVPTXISD::Suld2DI16Trap:
+ case NVPTXISD::Suld2DI32Trap:
+ case NVPTXISD::Suld2DV2I8Trap:
+ case NVPTXISD::Suld2DV2I16Trap:
+ case NVPTXISD::Suld2DV2I32Trap:
+ case NVPTXISD::Suld2DV4I8Trap:
+ case NVPTXISD::Suld2DV4I16Trap:
+ case NVPTXISD::Suld2DV4I32Trap:
+ case NVPTXISD::Suld2DArrayI8Trap:
+ case NVPTXISD::Suld2DArrayI16Trap:
+ case NVPTXISD::Suld2DArrayI32Trap:
+ case NVPTXISD::Suld2DArrayV2I8Trap:
+ case NVPTXISD::Suld2DArrayV2I16Trap:
+ case NVPTXISD::Suld2DArrayV2I32Trap:
+ case NVPTXISD::Suld2DArrayV4I8Trap:
+ case NVPTXISD::Suld2DArrayV4I16Trap:
+ case NVPTXISD::Suld2DArrayV4I32Trap:
+ case NVPTXISD::Suld3DI8Trap:
+ case NVPTXISD::Suld3DI16Trap:
+ case NVPTXISD::Suld3DI32Trap:
+ case NVPTXISD::Suld3DV2I8Trap:
+ case NVPTXISD::Suld3DV2I16Trap:
+ case NVPTXISD::Suld3DV2I32Trap:
+ case NVPTXISD::Suld3DV4I8Trap:
+ case NVPTXISD::Suld3DV4I16Trap:
+ case NVPTXISD::Suld3DV4I32Trap:
+ ResNode = SelectSurfaceIntrinsic(N);
+ break;
case ISD::ADDRSPACECAST:
ResNode = SelectAddrSpaceCast(N);
break;
@@ -175,7 +266,7 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
static unsigned int getCodeAddrSpace(MemSDNode *N,
const NVPTXSubtarget &Subtarget) {
- const Value *Src = N->getSrcValue();
+ const Value *Src = N->getMemOperand()->getValue();
if (!Src)
return NVPTX::PTXLdStInstCode::GENERIC;
@@ -194,6 +285,24 @@ static unsigned int getCodeAddrSpace(MemSDNode *N,
return NVPTX::PTXLdStInstCode::GENERIC;
}
+SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
+ unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IID) {
+ default:
+ return nullptr;
+ case Intrinsic::nvvm_texsurf_handle_internal:
+ return SelectTexSurfHandle(N);
+ }
+}
+
+SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
+ // Op 0 is the intrinsic ID
+ SDValue Wrapper = N->getOperand(1);
+ SDValue GlobalVal = Wrapper.getOperand(0);
+ return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
+ GlobalVal);
+}
+
SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
SDValue Src = N->getOperand(0);
AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
@@ -258,14 +367,14 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
SDLoc dl(N);
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT LoadedVT = LD->getMemoryVT();
- SDNode *NVPTXLD = NULL;
+ SDNode *NVPTXLD = nullptr;
// do not support pre/post inc/dec
if (LD->isIndexed())
- return NULL;
+ return nullptr;
if (!LoadedVT.isSimple())
- return NULL;
+ return nullptr;
// Address Space Setting
unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
@@ -288,7 +397,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
else if (num == 4)
vecType = NVPTX::PTXLdStInstCode::V4;
else
- return NULL;
+ return nullptr;
}
// Type Setting: fromType + fromTypeWidth
@@ -337,7 +446,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
Opcode = NVPTX::LD_f64_avar;
break;
default:
- return NULL;
+ return nullptr;
}
SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
getI32Imm(vecType), getI32Imm(fromType),
@@ -366,7 +475,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
Opcode = NVPTX::LD_f64_asi;
break;
default:
- return NULL;
+ return nullptr;
}
SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
getI32Imm(vecType), getI32Imm(fromType),
@@ -396,7 +505,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
Opcode = NVPTX::LD_f64_ari_64;
break;
default:
- return NULL;
+ return nullptr;
}
} else {
switch (TargetVT) {
@@ -419,7 +528,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
Opcode = NVPTX::LD_f64_ari;
break;
default:
- return NULL;
+ return nullptr;
}
}
SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
@@ -448,7 +557,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
Opcode = NVPTX::LD_f64_areg_64;
break;
default:
- return NULL;
+ return nullptr;
}
} else {
switch (TargetVT) {
@@ -471,7 +580,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
Opcode = NVPTX::LD_f64_areg;
break;
default:
- return NULL;
+ return nullptr;
}
}
SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
@@ -480,7 +589,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
}
- if (NVPTXLD != NULL) {
+ if (NVPTXLD) {
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
@@ -501,7 +610,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
EVT LoadedVT = MemSD->getMemoryVT();
if (!LoadedVT.isSimple())
- return NULL;
+ return nullptr;
// Address Space Setting
unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
@@ -547,7 +656,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
VecType = NVPTX::PTXLdStInstCode::V4;
break;
default:
- return NULL;
+ return nullptr;
}
EVT EltVT = N->getValueType(0);
@@ -555,11 +664,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
if (SelectDirectAddr(Op1, Addr)) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v2_avar;
break;
@@ -583,7 +692,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v4_avar;
break;
@@ -609,11 +718,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
: SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v2_asi;
break;
@@ -637,7 +746,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v4_asi;
break;
@@ -664,11 +773,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v2_ari_64;
break;
@@ -692,7 +801,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v4_ari_64;
break;
@@ -711,11 +820,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
} else {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v2_ari;
break;
@@ -739,7 +848,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v4_ari;
break;
@@ -766,11 +875,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v2_areg_64;
break;
@@ -794,7 +903,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v4_areg_64;
break;
@@ -813,11 +922,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
} else {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v2_areg;
break;
@@ -841,7 +950,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v4_areg;
break;
@@ -887,11 +996,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
if (SelectDirectAddr(Op1, Addr)) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
break;
@@ -915,7 +1024,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDUV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
break;
@@ -939,7 +1048,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
break;
@@ -957,7 +1066,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDUV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
break;
@@ -975,19 +1084,18 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
}
SDValue Ops[] = { Addr, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
- ArrayRef<SDValue>(Ops, 2));
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
} else if (Subtarget.is64Bit()
? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
: SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
break;
@@ -1011,7 +1119,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDUV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
break;
@@ -1035,7 +1143,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
break;
@@ -1053,7 +1161,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDUV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
break;
@@ -1072,11 +1180,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
} else {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
break;
@@ -1100,7 +1208,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDUV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
break;
@@ -1124,7 +1232,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
break;
@@ -1142,7 +1250,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDUV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
break;
@@ -1162,17 +1270,16 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
SDValue Ops[] = { Base, Offset, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
- ArrayRef<SDValue>(Ops, 3));
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
} else {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
break;
@@ -1196,7 +1303,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDUV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
break;
@@ -1220,7 +1327,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
break;
@@ -1238,7 +1345,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDUV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
break;
@@ -1257,11 +1364,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
} else {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
break;
@@ -1285,7 +1392,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDUV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
break;
@@ -1309,7 +1416,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
break;
@@ -1327,7 +1434,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDUV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
break;
@@ -1346,8 +1453,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
}
SDValue Ops[] = { Op1, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
- ArrayRef<SDValue>(Ops, 2));
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
}
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
@@ -1361,14 +1467,14 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
SDLoc dl(N);
StoreSDNode *ST = cast<StoreSDNode>(N);
EVT StoreVT = ST->getMemoryVT();
- SDNode *NVPTXST = NULL;
+ SDNode *NVPTXST = nullptr;
// do not support pre/post inc/dec
if (ST->isIndexed())
- return NULL;
+ return nullptr;
if (!StoreVT.isSimple())
- return NULL;
+ return nullptr;
// Address Space Setting
unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
@@ -1391,7 +1497,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
else if (num == 4)
vecType = NVPTX::PTXLdStInstCode::V4;
else
- return NULL;
+ return nullptr;
}
// Type Setting: toType + toTypeWidth
@@ -1435,7 +1541,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
Opcode = NVPTX::ST_f64_avar;
break;
default:
- return NULL;
+ return nullptr;
}
SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
getI32Imm(vecType), getI32Imm(toType),
@@ -1464,7 +1570,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
Opcode = NVPTX::ST_f64_asi;
break;
default:
- return NULL;
+ return nullptr;
}
SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
getI32Imm(vecType), getI32Imm(toType),
@@ -1494,7 +1600,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
Opcode = NVPTX::ST_f64_ari_64;
break;
default:
- return NULL;
+ return nullptr;
}
} else {
switch (SourceVT) {
@@ -1517,7 +1623,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
Opcode = NVPTX::ST_f64_ari;
break;
default:
- return NULL;
+ return nullptr;
}
}
SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
@@ -1546,7 +1652,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
Opcode = NVPTX::ST_f64_areg_64;
break;
default:
- return NULL;
+ return nullptr;
}
} else {
switch (SourceVT) {
@@ -1569,7 +1675,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
Opcode = NVPTX::ST_f64_areg;
break;
default:
- return NULL;
+ return nullptr;
}
}
SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
@@ -1578,7 +1684,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
}
- if (NVPTXST != NULL) {
+ if (NVPTXST) {
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
@@ -1645,7 +1751,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
N2 = N->getOperand(5);
break;
default:
- return NULL;
+ return nullptr;
}
StOps.push_back(getI32Imm(IsVolatile));
@@ -1657,11 +1763,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
if (SelectDirectAddr(N2, Addr)) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v2_avar;
break;
@@ -1685,7 +1791,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v4_avar;
break;
@@ -1707,11 +1813,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
: SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v2_asi;
break;
@@ -1735,7 +1841,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v4_asi;
break;
@@ -1759,11 +1865,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v2_ari_64;
break;
@@ -1787,7 +1893,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v4_ari_64;
break;
@@ -1806,11 +1912,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
} else {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v2_ari;
break;
@@ -1834,7 +1940,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v4_ari;
break;
@@ -1857,11 +1963,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v2_areg_64;
break;
@@ -1885,7 +1991,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v4_areg_64;
break;
@@ -1904,11 +2010,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
} else {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v2_areg;
break;
@@ -1932,7 +2038,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v4_areg;
break;
@@ -1973,7 +2079,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
unsigned VecSize;
switch (Node->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LoadParam:
VecSize = 1;
break;
@@ -1992,11 +2098,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
switch (VecSize) {
default:
- return NULL;
+ return nullptr;
case 1:
switch (MemVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i1:
Opc = NVPTX::LoadParamMemI8;
break;
@@ -2023,7 +2129,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
case 2:
switch (MemVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i1:
Opc = NVPTX::LoadParamMemV2I8;
break;
@@ -2050,7 +2156,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
case 4:
switch (MemVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i1:
Opc = NVPTX::LoadParamMemV4I8;
break;
@@ -2077,7 +2183,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
} else {
EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
- VTs = CurDAG->getVTList(&EVTs[0], array_lengthof(EVTs));
+ VTs = CurDAG->getVTList(EVTs);
}
unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
@@ -2103,7 +2209,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
unsigned NumElts = 1;
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::StoreRetval:
NumElts = 1;
break;
@@ -2128,11 +2234,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
unsigned Opcode = 0;
switch (NumElts) {
default:
- return NULL;
+ return nullptr;
case 1:
switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i1:
Opcode = NVPTX::StoreRetvalI8;
break;
@@ -2159,7 +2265,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
case 2:
switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i1:
Opcode = NVPTX::StoreRetvalV2I8;
break;
@@ -2186,7 +2292,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
case 4:
switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i1:
Opcode = NVPTX::StoreRetvalV4I8;
break;
@@ -2229,7 +2335,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
unsigned NumElts = 1;
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::StoreParamU32:
case NVPTXISD::StoreParamS32:
case NVPTXISD::StoreParam:
@@ -2260,11 +2366,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
default:
switch (NumElts) {
default:
- return NULL;
+ return nullptr;
case 1:
switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i1:
Opcode = NVPTX::StoreParamI8;
break;
@@ -2291,7 +2397,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
case 2:
switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i1:
Opcode = NVPTX::StoreParamV2I8;
break;
@@ -2318,7 +2424,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
case 4:
switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i1:
Opcode = NVPTX::StoreParamV4I8;
break;
@@ -2371,6 +2477,488 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
return Ret;
}
+SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue TexRef = N->getOperand(1);
+ SDValue SampRef = N->getOperand(2);
+ SDNode *Ret = nullptr;
+ unsigned Opc = 0;
+ SmallVector<SDValue, 8> Ops;
+
+ switch (N->getOpcode()) {
+ default: return nullptr;
+ case NVPTXISD::Tex1DFloatI32:
+ Opc = NVPTX::TEX_1D_F32_I32;
+ break;
+ case NVPTXISD::Tex1DFloatFloat:
+ Opc = NVPTX::TEX_1D_F32_F32;
+ break;
+ case NVPTXISD::Tex1DFloatFloatLevel:
+ Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex1DFloatFloatGrad:
+ Opc = NVPTX::TEX_1D_F32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex1DI32I32:
+ Opc = NVPTX::TEX_1D_I32_I32;
+ break;
+ case NVPTXISD::Tex1DI32Float:
+ Opc = NVPTX::TEX_1D_I32_F32;
+ break;
+ case NVPTXISD::Tex1DI32FloatLevel:
+ Opc = NVPTX::TEX_1D_I32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex1DI32FloatGrad:
+ Opc = NVPTX::TEX_1D_I32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex1DArrayFloatI32:
+ Opc = NVPTX::TEX_1D_ARRAY_F32_I32;
+ break;
+ case NVPTXISD::Tex1DArrayFloatFloat:
+ Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
+ break;
+ case NVPTXISD::Tex1DArrayFloatFloatLevel:
+ Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex1DArrayFloatFloatGrad:
+ Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex1DArrayI32I32:
+ Opc = NVPTX::TEX_1D_ARRAY_I32_I32;
+ break;
+ case NVPTXISD::Tex1DArrayI32Float:
+ Opc = NVPTX::TEX_1D_ARRAY_I32_F32;
+ break;
+ case NVPTXISD::Tex1DArrayI32FloatLevel:
+ Opc = NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex1DArrayI32FloatGrad:
+ Opc = NVPTX::TEX_1D_ARRAY_I32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex2DFloatI32:
+ Opc = NVPTX::TEX_2D_F32_I32;
+ break;
+ case NVPTXISD::Tex2DFloatFloat:
+ Opc = NVPTX::TEX_2D_F32_F32;
+ break;
+ case NVPTXISD::Tex2DFloatFloatLevel:
+ Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex2DFloatFloatGrad:
+ Opc = NVPTX::TEX_2D_F32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex2DI32I32:
+ Opc = NVPTX::TEX_2D_I32_I32;
+ break;
+ case NVPTXISD::Tex2DI32Float:
+ Opc = NVPTX::TEX_2D_I32_F32;
+ break;
+ case NVPTXISD::Tex2DI32FloatLevel:
+ Opc = NVPTX::TEX_2D_I32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex2DI32FloatGrad:
+ Opc = NVPTX::TEX_2D_I32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex2DArrayFloatI32:
+ Opc = NVPTX::TEX_2D_ARRAY_F32_I32;
+ break;
+ case NVPTXISD::Tex2DArrayFloatFloat:
+ Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
+ break;
+ case NVPTXISD::Tex2DArrayFloatFloatLevel:
+ Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex2DArrayFloatFloatGrad:
+ Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex2DArrayI32I32:
+ Opc = NVPTX::TEX_2D_ARRAY_I32_I32;
+ break;
+ case NVPTXISD::Tex2DArrayI32Float:
+ Opc = NVPTX::TEX_2D_ARRAY_I32_F32;
+ break;
+ case NVPTXISD::Tex2DArrayI32FloatLevel:
+ Opc = NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex2DArrayI32FloatGrad:
+ Opc = NVPTX::TEX_2D_ARRAY_I32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex3DFloatI32:
+ Opc = NVPTX::TEX_3D_F32_I32;
+ break;
+ case NVPTXISD::Tex3DFloatFloat:
+ Opc = NVPTX::TEX_3D_F32_F32;
+ break;
+ case NVPTXISD::Tex3DFloatFloatLevel:
+ Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex3DFloatFloatGrad:
+ Opc = NVPTX::TEX_3D_F32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex3DI32I32:
+ Opc = NVPTX::TEX_3D_I32_I32;
+ break;
+ case NVPTXISD::Tex3DI32Float:
+ Opc = NVPTX::TEX_3D_I32_F32;
+ break;
+ case NVPTXISD::Tex3DI32FloatLevel:
+ Opc = NVPTX::TEX_3D_I32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex3DI32FloatGrad:
+ Opc = NVPTX::TEX_3D_I32_F32_GRAD;
+ break;
+ }
+
+ Ops.push_back(TexRef);
+ Ops.push_back(SampRef);
+
+ // Copy over indices
+ for (unsigned i = 3; i < N->getNumOperands(); ++i) {
+ Ops.push_back(N->getOperand(i));
+ }
+
+ Ops.push_back(Chain);
+ Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
+ return Ret;
+}
+
+SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue TexHandle = N->getOperand(1);
+ SDNode *Ret = nullptr;
+ unsigned Opc = 0;
+ SmallVector<SDValue, 8> Ops;
+ switch (N->getOpcode()) {
+ default: return nullptr;
+ case NVPTXISD::Suld1DI8Trap:
+ Opc = NVPTX::SULD_1D_I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DI16Trap:
+ Opc = NVPTX::SULD_1D_I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DI32Trap:
+ Opc = NVPTX::SULD_1D_I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV2I8Trap:
+ Opc = NVPTX::SULD_1D_V2I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV2I16Trap:
+ Opc = NVPTX::SULD_1D_V2I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV2I32Trap:
+ Opc = NVPTX::SULD_1D_V2I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV4I8Trap:
+ Opc = NVPTX::SULD_1D_V4I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV4I16Trap:
+ Opc = NVPTX::SULD_1D_V4I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV4I32Trap:
+ Opc = NVPTX::SULD_1D_V4I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayI8Trap:
+ Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayI16Trap:
+ Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayI32Trap:
+ Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV2I8Trap:
+ Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV2I16Trap:
+ Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV2I32Trap:
+ Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV4I8Trap:
+ Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV4I16Trap:
+ Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV4I32Trap:
+ Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DI8Trap:
+ Opc = NVPTX::SULD_2D_I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DI16Trap:
+ Opc = NVPTX::SULD_2D_I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DI32Trap:
+ Opc = NVPTX::SULD_2D_I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV2I8Trap:
+ Opc = NVPTX::SULD_2D_V2I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV2I16Trap:
+ Opc = NVPTX::SULD_2D_V2I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV2I32Trap:
+ Opc = NVPTX::SULD_2D_V2I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV4I8Trap:
+ Opc = NVPTX::SULD_2D_V4I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV4I16Trap:
+ Opc = NVPTX::SULD_2D_V4I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV4I32Trap:
+ Opc = NVPTX::SULD_2D_V4I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayI8Trap:
+ Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayI16Trap:
+ Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayI32Trap:
+ Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV2I8Trap:
+ Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV2I16Trap:
+ Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV2I32Trap:
+ Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV4I8Trap:
+ Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV4I16Trap:
+ Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV4I32Trap:
+ Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DI8Trap:
+ Opc = NVPTX::SULD_3D_I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DI16Trap:
+ Opc = NVPTX::SULD_3D_I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DI32Trap:
+ Opc = NVPTX::SULD_3D_I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV2I8Trap:
+ Opc = NVPTX::SULD_3D_V2I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV2I16Trap:
+ Opc = NVPTX::SULD_3D_V2I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV2I32Trap:
+ Opc = NVPTX::SULD_3D_V2I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV4I8Trap:
+ Opc = NVPTX::SULD_3D_V4I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV4I16Trap:
+ Opc = NVPTX::SULD_3D_V4I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV4I32Trap:
+ Opc = NVPTX::SULD_3D_V4I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ }
+ Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
+ return Ret;
+}
+
// SelectDirectAddr - Match a direct address for DAG.
// A direct address could be a globaladdress or externalsymbol.
bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
@@ -2464,14 +3052,18 @@ bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
unsigned int spN) const {
- const Value *Src = NULL;
+ const Value *Src = nullptr;
// Even though MemIntrinsicSDNode is a subclas of MemSDNode,
// the classof() for MemSDNode does not include MemIntrinsicSDNode
// (See SelectionDAGNodes.h). So we need to check for both.
if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
- Src = mN->getSrcValue();
+ if (spN == 0 && mN->getMemOperand()->getPseudoValue())
+ return true;
+ Src = mN->getMemOperand()->getValue();
} else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
- Src = mN->getSrcValue();
+ if (spN == 0 && mN->getMemOperand()->getPseudoValue())
+ return true;
+ Src = mN->getMemOperand()->getValue();
}
if (!Src)
return false;
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 93ad169..11f92e7 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "nvptx-isel"
-
#include "NVPTX.h"
#include "NVPTXISelLowering.h"
#include "NVPTXRegisterInfo.h"
@@ -46,19 +44,22 @@ public:
CodeGenOpt::Level OptLevel);
// Pass Name
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "NVPTX DAG->DAG Pattern Instruction Selection";
}
const NVPTXSubtarget &Subtarget;
- virtual bool SelectInlineAsmMemoryOperand(
- const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps);
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) override;
private:
// Include the pieces autogenerated from the target description.
#include "NVPTXGenDAGISel.inc"
- SDNode *Select(SDNode *N);
+ SDNode *Select(SDNode *N) override;
+ SDNode *SelectIntrinsicNoChain(SDNode *N);
+ SDNode *SelectTexSurfHandle(SDNode *N);
SDNode *SelectLoad(SDNode *N);
SDNode *SelectLoadVector(SDNode *N);
SDNode *SelectLDGLDUVector(SDNode *N);
@@ -68,6 +69,8 @@ private:
SDNode *SelectStoreRetval(SDNode *N);
SDNode *SelectStoreParam(SDNode *N);
SDNode *SelectAddrSpaceCast(SDNode *N);
+ SDNode *SelectTextureIntrinsic(SDNode *N);
+ SDNode *SelectSurfaceIntrinsic(SDNode *N);
inline SDValue getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 8e25a65..b0943be 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -75,7 +75,7 @@ static bool IsPTXVectorType(MVT VT) {
/// LowerCall, and LowerReturn.
static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty,
SmallVectorImpl<EVT> &ValueVTs,
- SmallVectorImpl<uint64_t> *Offsets = 0,
+ SmallVectorImpl<uint64_t> *Offsets = nullptr,
uint64_t StartingOffset = 0) {
SmallVector<EVT, 16> TempVTs;
SmallVector<uint64_t, 16> TempOffsets;
@@ -245,7 +245,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default:
- return 0;
+ return nullptr;
case NVPTXISD::CALL:
return "NVPTXISD::CALL";
case NVPTXISD::RET_FLAG:
@@ -328,6 +328,116 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "NVPTXISD::StoreV2";
case NVPTXISD::StoreV4:
return "NVPTXISD::StoreV4";
+ case NVPTXISD::Tex1DFloatI32: return "NVPTXISD::Tex1DFloatI32";
+ case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat";
+ case NVPTXISD::Tex1DFloatFloatLevel:
+ return "NVPTXISD::Tex1DFloatFloatLevel";
+ case NVPTXISD::Tex1DFloatFloatGrad:
+ return "NVPTXISD::Tex1DFloatFloatGrad";
+ case NVPTXISD::Tex1DI32I32: return "NVPTXISD::Tex1DI32I32";
+ case NVPTXISD::Tex1DI32Float: return "NVPTXISD::Tex1DI32Float";
+ case NVPTXISD::Tex1DI32FloatLevel:
+ return "NVPTXISD::Tex1DI32FloatLevel";
+ case NVPTXISD::Tex1DI32FloatGrad:
+ return "NVPTXISD::Tex1DI32FloatGrad";
+ case NVPTXISD::Tex1DArrayFloatI32: return "NVPTXISD::Tex2DArrayFloatI32";
+ case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
+ case NVPTXISD::Tex1DArrayFloatFloatLevel:
+ return "NVPTXISD::Tex2DArrayFloatFloatLevel";
+ case NVPTXISD::Tex1DArrayFloatFloatGrad:
+ return "NVPTXISD::Tex2DArrayFloatFloatGrad";
+ case NVPTXISD::Tex1DArrayI32I32: return "NVPTXISD::Tex2DArrayI32I32";
+ case NVPTXISD::Tex1DArrayI32Float: return "NVPTXISD::Tex2DArrayI32Float";
+ case NVPTXISD::Tex1DArrayI32FloatLevel:
+ return "NVPTXISD::Tex2DArrayI32FloatLevel";
+ case NVPTXISD::Tex1DArrayI32FloatGrad:
+ return "NVPTXISD::Tex2DArrayI32FloatGrad";
+ case NVPTXISD::Tex2DFloatI32: return "NVPTXISD::Tex2DFloatI32";
+ case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat";
+ case NVPTXISD::Tex2DFloatFloatLevel:
+ return "NVPTXISD::Tex2DFloatFloatLevel";
+ case NVPTXISD::Tex2DFloatFloatGrad:
+ return "NVPTXISD::Tex2DFloatFloatGrad";
+ case NVPTXISD::Tex2DI32I32: return "NVPTXISD::Tex2DI32I32";
+ case NVPTXISD::Tex2DI32Float: return "NVPTXISD::Tex2DI32Float";
+ case NVPTXISD::Tex2DI32FloatLevel:
+ return "NVPTXISD::Tex2DI32FloatLevel";
+ case NVPTXISD::Tex2DI32FloatGrad:
+ return "NVPTXISD::Tex2DI32FloatGrad";
+ case NVPTXISD::Tex2DArrayFloatI32: return "NVPTXISD::Tex2DArrayFloatI32";
+ case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
+ case NVPTXISD::Tex2DArrayFloatFloatLevel:
+ return "NVPTXISD::Tex2DArrayFloatFloatLevel";
+ case NVPTXISD::Tex2DArrayFloatFloatGrad:
+ return "NVPTXISD::Tex2DArrayFloatFloatGrad";
+ case NVPTXISD::Tex2DArrayI32I32: return "NVPTXISD::Tex2DArrayI32I32";
+ case NVPTXISD::Tex2DArrayI32Float: return "NVPTXISD::Tex2DArrayI32Float";
+ case NVPTXISD::Tex2DArrayI32FloatLevel:
+ return "NVPTXISD::Tex2DArrayI32FloatLevel";
+ case NVPTXISD::Tex2DArrayI32FloatGrad:
+ return "NVPTXISD::Tex2DArrayI32FloatGrad";
+ case NVPTXISD::Tex3DFloatI32: return "NVPTXISD::Tex3DFloatI32";
+ case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat";
+ case NVPTXISD::Tex3DFloatFloatLevel:
+ return "NVPTXISD::Tex3DFloatFloatLevel";
+ case NVPTXISD::Tex3DFloatFloatGrad:
+ return "NVPTXISD::Tex3DFloatFloatGrad";
+ case NVPTXISD::Tex3DI32I32: return "NVPTXISD::Tex3DI32I32";
+ case NVPTXISD::Tex3DI32Float: return "NVPTXISD::Tex3DI32Float";
+ case NVPTXISD::Tex3DI32FloatLevel:
+ return "NVPTXISD::Tex3DI32FloatLevel";
+ case NVPTXISD::Tex3DI32FloatGrad:
+ return "NVPTXISD::Tex3DI32FloatGrad";
+
+ case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap";
+ case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap";
+ case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap";
+ case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap";
+ case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap";
+ case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap";
+ case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap";
+ case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap";
+ case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap";
+
+ case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap";
+ case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap";
+ case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap";
+ case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap";
+ case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap";
+ case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap";
+ case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap";
+ case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap";
+ case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap";
+
+ case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap";
+ case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap";
+ case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap";
+ case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap";
+ case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap";
+ case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap";
+ case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap";
+ case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap";
+ case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap";
+
+ case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap";
+ case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap";
+ case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap";
+ case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap";
+ case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap";
+ case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap";
+ case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap";
+ case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap";
+ case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap";
+
+ case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap";
+ case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap";
+ case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap";
+ case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap";
+ case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap";
+ case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap";
+ case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap";
+ case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap";
+ case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap";
}
}
@@ -526,7 +636,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &isTailCall = CLI.IsTailCall;
- ArgListTy &Args = CLI.Args;
+ ArgListTy &Args = CLI.getArgs();
Type *retTy = CLI.RetTy;
ImmutableCallSite *CS = CLI.CS;
@@ -575,7 +685,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(paramCount, MVT::i32),
DAG.getConstant(sz, MVT::i32), InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
- DeclareParamOps, 5);
+ DeclareParamOps);
InFlag = Chain.getValue(1);
unsigned curOffset = 0;
for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
@@ -599,7 +709,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(curOffset, MVT::i32),
StVal, InFlag };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
- CopyParamVTs, &CopyParamOps[0], 5,
+ CopyParamVTs, CopyParamOps,
elemtype, MachinePointerInfo());
InFlag = Chain.getValue(1);
curOffset += sz / 8;
@@ -621,7 +731,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(paramCount, MVT::i32),
DAG.getConstant(sz, MVT::i32), InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
- DeclareParamOps, 5);
+ DeclareParamOps);
InFlag = Chain.getValue(1);
unsigned NumElts = ObjectVT.getVectorNumElements();
EVT EltVT = ObjectVT.getVectorElementType();
@@ -644,7 +754,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(0, MVT::i32), Elt,
InFlag };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
- CopyParamVTs, &CopyParamOps[0], 5,
+ CopyParamVTs, CopyParamOps,
MemVT, MachinePointerInfo());
InFlag = Chain.getValue(1);
} else if (NumElts == 2) {
@@ -661,7 +771,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(0, MVT::i32), Elt0, Elt1,
InFlag };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl,
- CopyParamVTs, &CopyParamOps[0], 6,
+ CopyParamVTs, CopyParamOps,
MemVT, MachinePointerInfo());
InFlag = Chain.getValue(1);
} else {
@@ -735,9 +845,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Ops.push_back(InFlag);
SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, &Ops[0],
- Ops.size(), MemVT,
- MachinePointerInfo());
+ Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops,
+ MemVT, MachinePointerInfo());
InFlag = Chain.getValue(1);
curOffset += PerStoreOffset;
}
@@ -762,7 +871,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(sz, MVT::i32),
DAG.getConstant(0, MVT::i32), InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
- DeclareParamOps, 5);
+ DeclareParamOps);
InFlag = Chain.getValue(1);
SDValue OutV = OutVals[OIdx];
if (needExtend) {
@@ -781,7 +890,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
opcode = NVPTXISD::StoreParamU32;
else if (Outs[OIdx].Flags.isSExt())
opcode = NVPTXISD::StoreParamS32;
- Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps, 5,
+ Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps,
VT, MachinePointerInfo());
InFlag = Chain.getValue(1);
@@ -806,7 +915,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InFlag
};
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
- DeclareParamOps, 5);
+ DeclareParamOps);
InFlag = Chain.getValue(1);
unsigned curOffset = 0;
for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
@@ -834,7 +943,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(curOffset, MVT::i32), theVal,
InFlag };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
- CopyParamOps, 5, elemtype,
+ CopyParamOps, elemtype,
MachinePointerInfo());
InFlag = Chain.getValue(1);
@@ -865,7 +974,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(resultsz, MVT::i32),
DAG.getConstant(0, MVT::i32), InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
- DeclareRetOps, 5);
+ DeclareRetOps);
InFlag = Chain.getValue(1);
} else {
retAlignment = getArgumentAlignment(Callee, CS, retTy, 0);
@@ -875,7 +984,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(resultsz / 8, MVT::i32),
DAG.getConstant(0, MVT::i32), InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
- DeclareRetOps, 5);
+ DeclareRetOps);
InFlag = Chain.getValue(1);
}
}
@@ -895,7 +1004,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue ProtoOps[] = {
Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag,
};
- Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, &ProtoOps[0], 3);
+ Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps);
InFlag = Chain.getValue(1);
}
// Op to just print "call"
@@ -904,20 +1013,20 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, MVT::i32), InFlag
};
Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall),
- dl, PrintCallVTs, PrintCallOps, 3);
+ dl, PrintCallVTs, PrintCallOps);
InFlag = Chain.getValue(1);
// Ops to print out the function name
SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CallVoidOps[] = { Chain, Callee, InFlag };
- Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3);
+ Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps);
InFlag = Chain.getValue(1);
// Ops to print out the param list
SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CallArgBeginOps[] = { Chain, InFlag };
Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
- CallArgBeginOps, 2);
+ CallArgBeginOps);
InFlag = Chain.getValue(1);
for (unsigned i = 0, e = paramCount; i != e; ++i) {
@@ -929,21 +1038,20 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
DAG.getConstant(i, MVT::i32), InFlag };
- Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4);
+ Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps);
InFlag = Chain.getValue(1);
}
SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32),
InFlag };
- Chain =
- DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 3);
+ Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
InFlag = Chain.getValue(1);
if (!Func) {
SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32),
InFlag };
- Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3);
+ Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
InFlag = Chain.getValue(1);
}
@@ -962,7 +1070,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (NumElts == 1) {
// Just a simple load
- std::vector<EVT> LoadRetVTs;
+ SmallVector<EVT, 4> LoadRetVTs;
if (needTruncate) {
// If loading i1 result, generate
// load i16
@@ -972,15 +1080,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
LoadRetVTs.push_back(EltVT);
LoadRetVTs.push_back(MVT::Other);
LoadRetVTs.push_back(MVT::Glue);
- std::vector<SDValue> LoadRetOps;
+ SmallVector<SDValue, 4> LoadRetOps;
LoadRetOps.push_back(Chain);
LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
LoadRetOps.push_back(DAG.getConstant(0, MVT::i32));
LoadRetOps.push_back(InFlag);
SDValue retval = DAG.getMemIntrinsicNode(
NVPTXISD::LoadParam, dl,
- DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0],
- LoadRetOps.size(), EltVT, MachinePointerInfo());
+ DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
Chain = retval.getValue(1);
InFlag = retval.getValue(2);
SDValue Ret0 = retval;
@@ -989,7 +1096,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InVals.push_back(Ret0);
} else if (NumElts == 2) {
// LoadV2
- std::vector<EVT> LoadRetVTs;
+ SmallVector<EVT, 4> LoadRetVTs;
if (needTruncate) {
// If loading i1 result, generate
// load i16
@@ -1002,15 +1109,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
LoadRetVTs.push_back(MVT::Other);
LoadRetVTs.push_back(MVT::Glue);
- std::vector<SDValue> LoadRetOps;
+ SmallVector<SDValue, 4> LoadRetOps;
LoadRetOps.push_back(Chain);
LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
LoadRetOps.push_back(DAG.getConstant(0, MVT::i32));
LoadRetOps.push_back(InFlag);
SDValue retval = DAG.getMemIntrinsicNode(
NVPTXISD::LoadParamV2, dl,
- DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0],
- LoadRetOps.size(), EltVT, MachinePointerInfo());
+ DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
Chain = retval.getValue(2);
InFlag = retval.getValue(3);
SDValue Ret0 = retval.getValue(0);
@@ -1054,8 +1160,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
LoadRetOps.push_back(DAG.getConstant(Ofst, MVT::i32));
LoadRetOps.push_back(InFlag);
SDValue retval = DAG.getMemIntrinsicNode(
- Opc, dl, DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()),
- &LoadRetOps[0], LoadRetOps.size(), EltVT, MachinePointerInfo());
+ Opc, dl, DAG.getVTList(LoadRetVTs),
+ LoadRetOps, EltVT, MachinePointerInfo());
if (VecSize == 2) {
Chain = retval.getValue(2);
InFlag = retval.getValue(3);
@@ -1110,8 +1216,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
LoadRetOps.push_back(InFlag);
SDValue retval = DAG.getMemIntrinsicNode(
NVPTXISD::LoadParam, dl,
- DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0],
- LoadRetOps.size(), TheLoadType, MachinePointerInfo());
+ DAG.getVTList(LoadRetVTs), LoadRetOps,
+ TheLoadType, MachinePointerInfo());
Chain = retval.getValue(1);
InFlag = retval.getValue(2);
SDValue Ret0 = retval.getValue(0);
@@ -1153,8 +1259,7 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
DAG.getIntPtrConstant(j)));
}
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0],
- Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops);
}
SDValue
@@ -1209,7 +1314,7 @@ SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
// load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
// in LegalizeDAG.cpp which also uses MergeValues.
SDValue Ops[] = { result, LD->getChain() };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
@@ -1297,7 +1402,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
MemSDNode *MemSD = cast<MemSDNode>(N);
SDValue NewSt = DAG.getMemIntrinsicNode(
- Opcode, DL, DAG.getVTList(MVT::Other), &Ops[0], Ops.size(),
+ Opcode, DL, DAG.getVTList(MVT::Other), Ops,
MemSD->getMemoryVT(), MemSD->getMemOperand());
//return DCI.CombineTo(N, NewSt, true);
@@ -1429,7 +1534,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
if (isImageOrSamplerVal(
theArgs[i],
(theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
- : 0))) {
+ : nullptr))) {
assert(isKernel && "Only kernels can have image/sampler params");
InVals.push_back(DAG.getConstant(i + 1, MVT::i32));
continue;
@@ -1683,8 +1788,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
//}
if (!OutChains.empty())
- DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0],
- OutChains.size()));
+ DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains));
return Chain;
}
@@ -1726,7 +1830,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
- DAG.getVTList(MVT::Other), &Ops[0], 3,
+ DAG.getVTList(MVT::Other), Ops,
EltVT, MachinePointerInfo());
} else if (NumElts == 2) {
@@ -1742,7 +1846,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal0,
StoreVal1 };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl,
- DAG.getVTList(MVT::Other), &Ops[0], 4,
+ DAG.getVTList(MVT::Other), Ops,
EltVT, MachinePointerInfo());
} else {
// V4 stores
@@ -1814,8 +1918,8 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size());
Chain =
- DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), &Ops[0],
- Ops.size(), EltVT, MachinePointerInfo());
+ DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops,
+ EltVT, MachinePointerInfo());
Offset += PerStoreOffset;
}
}
@@ -1852,8 +1956,8 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SDValue Ops[] = { Chain, DAG.getConstant(SizeSoFar, MVT::i32), TmpVal };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
- DAG.getVTList(MVT::Other), &Ops[0],
- 3, TheStoreType,
+ DAG.getVTList(MVT::Other), Ops,
+ TheStoreType,
MachinePointerInfo());
if(TheValType.isVector())
SizeSoFar +=
@@ -1891,6 +1995,195 @@ bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
return false;
}
+static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
+ switch (Intrinsic) {
+ default:
+ return 0;
+
+ case Intrinsic::nvvm_tex_1d_v4f32_i32:
+ return NVPTXISD::Tex1DFloatI32;
+ case Intrinsic::nvvm_tex_1d_v4f32_f32:
+ return NVPTXISD::Tex1DFloatFloat;
+ case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
+ return NVPTXISD::Tex1DFloatFloatLevel;
+ case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
+ return NVPTXISD::Tex1DFloatFloatGrad;
+ case Intrinsic::nvvm_tex_1d_v4i32_i32:
+ return NVPTXISD::Tex1DI32I32;
+ case Intrinsic::nvvm_tex_1d_v4i32_f32:
+ return NVPTXISD::Tex1DI32Float;
+ case Intrinsic::nvvm_tex_1d_level_v4i32_f32:
+ return NVPTXISD::Tex1DI32FloatLevel;
+ case Intrinsic::nvvm_tex_1d_grad_v4i32_f32:
+ return NVPTXISD::Tex1DI32FloatGrad;
+
+ case Intrinsic::nvvm_tex_1d_array_v4f32_i32:
+ return NVPTXISD::Tex1DArrayFloatI32;
+ case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
+ return NVPTXISD::Tex1DArrayFloatFloat;
+ case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
+ return NVPTXISD::Tex1DArrayFloatFloatLevel;
+ case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
+ return NVPTXISD::Tex1DArrayFloatFloatGrad;
+ case Intrinsic::nvvm_tex_1d_array_v4i32_i32:
+ return NVPTXISD::Tex1DArrayI32I32;
+ case Intrinsic::nvvm_tex_1d_array_v4i32_f32:
+ return NVPTXISD::Tex1DArrayI32Float;
+ case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32:
+ return NVPTXISD::Tex1DArrayI32FloatLevel;
+ case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32:
+ return NVPTXISD::Tex1DArrayI32FloatGrad;
+
+ case Intrinsic::nvvm_tex_2d_v4f32_i32:
+ return NVPTXISD::Tex2DFloatI32;
+ case Intrinsic::nvvm_tex_2d_v4f32_f32:
+ return NVPTXISD::Tex2DFloatFloat;
+ case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
+ return NVPTXISD::Tex2DFloatFloatLevel;
+ case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
+ return NVPTXISD::Tex2DFloatFloatGrad;
+ case Intrinsic::nvvm_tex_2d_v4i32_i32:
+ return NVPTXISD::Tex2DI32I32;
+ case Intrinsic::nvvm_tex_2d_v4i32_f32:
+ return NVPTXISD::Tex2DI32Float;
+ case Intrinsic::nvvm_tex_2d_level_v4i32_f32:
+ return NVPTXISD::Tex2DI32FloatLevel;
+ case Intrinsic::nvvm_tex_2d_grad_v4i32_f32:
+ return NVPTXISD::Tex2DI32FloatGrad;
+
+ case Intrinsic::nvvm_tex_2d_array_v4f32_i32:
+ return NVPTXISD::Tex2DArrayFloatI32;
+ case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
+ return NVPTXISD::Tex2DArrayFloatFloat;
+ case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
+ return NVPTXISD::Tex2DArrayFloatFloatLevel;
+ case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
+ return NVPTXISD::Tex2DArrayFloatFloatGrad;
+ case Intrinsic::nvvm_tex_2d_array_v4i32_i32:
+ return NVPTXISD::Tex2DArrayI32I32;
+ case Intrinsic::nvvm_tex_2d_array_v4i32_f32:
+ return NVPTXISD::Tex2DArrayI32Float;
+ case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32:
+ return NVPTXISD::Tex2DArrayI32FloatLevel;
+ case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32:
+ return NVPTXISD::Tex2DArrayI32FloatGrad;
+
+ case Intrinsic::nvvm_tex_3d_v4f32_i32:
+ return NVPTXISD::Tex3DFloatI32;
+ case Intrinsic::nvvm_tex_3d_v4f32_f32:
+ return NVPTXISD::Tex3DFloatFloat;
+ case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
+ return NVPTXISD::Tex3DFloatFloatLevel;
+ case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
+ return NVPTXISD::Tex3DFloatFloatGrad;
+ case Intrinsic::nvvm_tex_3d_v4i32_i32:
+ return NVPTXISD::Tex3DI32I32;
+ case Intrinsic::nvvm_tex_3d_v4i32_f32:
+ return NVPTXISD::Tex3DI32Float;
+ case Intrinsic::nvvm_tex_3d_level_v4i32_f32:
+ return NVPTXISD::Tex3DI32FloatLevel;
+ case Intrinsic::nvvm_tex_3d_grad_v4i32_f32:
+ return NVPTXISD::Tex3DI32FloatGrad;
+ }
+}
+
+static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
+ switch (Intrinsic) {
+ default:
+ return 0;
+ case Intrinsic::nvvm_suld_1d_i8_trap:
+ return NVPTXISD::Suld1DI8Trap;
+ case Intrinsic::nvvm_suld_1d_i16_trap:
+ return NVPTXISD::Suld1DI16Trap;
+ case Intrinsic::nvvm_suld_1d_i32_trap:
+ return NVPTXISD::Suld1DI32Trap;
+ case Intrinsic::nvvm_suld_1d_v2i8_trap:
+ return NVPTXISD::Suld1DV2I8Trap;
+ case Intrinsic::nvvm_suld_1d_v2i16_trap:
+ return NVPTXISD::Suld1DV2I16Trap;
+ case Intrinsic::nvvm_suld_1d_v2i32_trap:
+ return NVPTXISD::Suld1DV2I32Trap;
+ case Intrinsic::nvvm_suld_1d_v4i8_trap:
+ return NVPTXISD::Suld1DV4I8Trap;
+ case Intrinsic::nvvm_suld_1d_v4i16_trap:
+ return NVPTXISD::Suld1DV4I16Trap;
+ case Intrinsic::nvvm_suld_1d_v4i32_trap:
+ return NVPTXISD::Suld1DV4I32Trap;
+ case Intrinsic::nvvm_suld_1d_array_i8_trap:
+ return NVPTXISD::Suld1DArrayI8Trap;
+ case Intrinsic::nvvm_suld_1d_array_i16_trap:
+ return NVPTXISD::Suld1DArrayI16Trap;
+ case Intrinsic::nvvm_suld_1d_array_i32_trap:
+ return NVPTXISD::Suld1DArrayI32Trap;
+ case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
+ return NVPTXISD::Suld1DArrayV2I8Trap;
+ case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
+ return NVPTXISD::Suld1DArrayV2I16Trap;
+ case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
+ return NVPTXISD::Suld1DArrayV2I32Trap;
+ case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
+ return NVPTXISD::Suld1DArrayV4I8Trap;
+ case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
+ return NVPTXISD::Suld1DArrayV4I16Trap;
+ case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
+ return NVPTXISD::Suld1DArrayV4I32Trap;
+ case Intrinsic::nvvm_suld_2d_i8_trap:
+ return NVPTXISD::Suld2DI8Trap;
+ case Intrinsic::nvvm_suld_2d_i16_trap:
+ return NVPTXISD::Suld2DI16Trap;
+ case Intrinsic::nvvm_suld_2d_i32_trap:
+ return NVPTXISD::Suld2DI32Trap;
+ case Intrinsic::nvvm_suld_2d_v2i8_trap:
+ return NVPTXISD::Suld2DV2I8Trap;
+ case Intrinsic::nvvm_suld_2d_v2i16_trap:
+ return NVPTXISD::Suld2DV2I16Trap;
+ case Intrinsic::nvvm_suld_2d_v2i32_trap:
+ return NVPTXISD::Suld2DV2I32Trap;
+ case Intrinsic::nvvm_suld_2d_v4i8_trap:
+ return NVPTXISD::Suld2DV4I8Trap;
+ case Intrinsic::nvvm_suld_2d_v4i16_trap:
+ return NVPTXISD::Suld2DV4I16Trap;
+ case Intrinsic::nvvm_suld_2d_v4i32_trap:
+ return NVPTXISD::Suld2DV4I32Trap;
+ case Intrinsic::nvvm_suld_2d_array_i8_trap:
+ return NVPTXISD::Suld2DArrayI8Trap;
+ case Intrinsic::nvvm_suld_2d_array_i16_trap:
+ return NVPTXISD::Suld2DArrayI16Trap;
+ case Intrinsic::nvvm_suld_2d_array_i32_trap:
+ return NVPTXISD::Suld2DArrayI32Trap;
+ case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
+ return NVPTXISD::Suld2DArrayV2I8Trap;
+ case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
+ return NVPTXISD::Suld2DArrayV2I16Trap;
+ case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
+ return NVPTXISD::Suld2DArrayV2I32Trap;
+ case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
+ return NVPTXISD::Suld2DArrayV4I8Trap;
+ case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
+ return NVPTXISD::Suld2DArrayV4I16Trap;
+ case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
+ return NVPTXISD::Suld2DArrayV4I32Trap;
+ case Intrinsic::nvvm_suld_3d_i8_trap:
+ return NVPTXISD::Suld3DI8Trap;
+ case Intrinsic::nvvm_suld_3d_i16_trap:
+ return NVPTXISD::Suld3DI16Trap;
+ case Intrinsic::nvvm_suld_3d_i32_trap:
+ return NVPTXISD::Suld3DI32Trap;
+ case Intrinsic::nvvm_suld_3d_v2i8_trap:
+ return NVPTXISD::Suld3DV2I8Trap;
+ case Intrinsic::nvvm_suld_3d_v2i16_trap:
+ return NVPTXISD::Suld3DV2I16Trap;
+ case Intrinsic::nvvm_suld_3d_v2i32_trap:
+ return NVPTXISD::Suld3DV2I32Trap;
+ case Intrinsic::nvvm_suld_3d_v4i8_trap:
+ return NVPTXISD::Suld3DV4I8Trap;
+ case Intrinsic::nvvm_suld_3d_v4i16_trap:
+ return NVPTXISD::Suld3DV4I16Trap;
+ case Intrinsic::nvvm_suld_3d_v4i32_trap:
+ return NVPTXISD::Suld3DV4I32Trap;
+ }
+}
+
// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
// TgtMemIntrinsic
// because we need the information that is only available in the "Value" type
@@ -1944,6 +2237,142 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.align = 0;
return true;
+ case Intrinsic::nvvm_tex_1d_v4f32_i32:
+ case Intrinsic::nvvm_tex_1d_v4f32_f32:
+ case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_1d_array_v4f32_i32:
+ case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
+ case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_2d_v4f32_i32:
+ case Intrinsic::nvvm_tex_2d_v4f32_f32:
+ case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_2d_array_v4f32_i32:
+ case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
+ case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_3d_v4f32_i32:
+ case Intrinsic::nvvm_tex_3d_v4f32_f32:
+ case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: {
+ Info.opc = getOpcForTextureInstr(Intrinsic);
+ Info.memVT = MVT::f32;
+ Info.ptrVal = nullptr;
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = false;
+ Info.align = 16;
+ return true;
+ }
+ case Intrinsic::nvvm_tex_1d_v4i32_i32:
+ case Intrinsic::nvvm_tex_1d_v4i32_f32:
+ case Intrinsic::nvvm_tex_1d_level_v4i32_f32:
+ case Intrinsic::nvvm_tex_1d_grad_v4i32_f32:
+ case Intrinsic::nvvm_tex_1d_array_v4i32_i32:
+ case Intrinsic::nvvm_tex_1d_array_v4i32_f32:
+ case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32:
+ case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32:
+ case Intrinsic::nvvm_tex_2d_v4i32_i32:
+ case Intrinsic::nvvm_tex_2d_v4i32_f32:
+ case Intrinsic::nvvm_tex_2d_level_v4i32_f32:
+ case Intrinsic::nvvm_tex_2d_grad_v4i32_f32:
+ case Intrinsic::nvvm_tex_2d_array_v4i32_i32:
+ case Intrinsic::nvvm_tex_2d_array_v4i32_f32:
+ case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32:
+ case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32:
+ case Intrinsic::nvvm_tex_3d_v4i32_i32:
+ case Intrinsic::nvvm_tex_3d_v4i32_f32:
+ case Intrinsic::nvvm_tex_3d_level_v4i32_f32:
+ case Intrinsic::nvvm_tex_3d_grad_v4i32_f32: {
+ Info.opc = getOpcForTextureInstr(Intrinsic);
+ Info.memVT = MVT::i32;
+ Info.ptrVal = nullptr;
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = false;
+ Info.align = 16;
+ return true;
+ }
+ case Intrinsic::nvvm_suld_1d_i8_trap:
+ case Intrinsic::nvvm_suld_1d_v2i8_trap:
+ case Intrinsic::nvvm_suld_1d_v4i8_trap:
+ case Intrinsic::nvvm_suld_1d_array_i8_trap:
+ case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
+ case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
+ case Intrinsic::nvvm_suld_2d_i8_trap:
+ case Intrinsic::nvvm_suld_2d_v2i8_trap:
+ case Intrinsic::nvvm_suld_2d_v4i8_trap:
+ case Intrinsic::nvvm_suld_2d_array_i8_trap:
+ case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
+ case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
+ case Intrinsic::nvvm_suld_3d_i8_trap:
+ case Intrinsic::nvvm_suld_3d_v2i8_trap:
+ case Intrinsic::nvvm_suld_3d_v4i8_trap: {
+ Info.opc = getOpcForSurfaceInstr(Intrinsic);
+ Info.memVT = MVT::i8;
+ Info.ptrVal = nullptr;
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = false;
+ Info.align = 16;
+ return true;
+ }
+ case Intrinsic::nvvm_suld_1d_i16_trap:
+ case Intrinsic::nvvm_suld_1d_v2i16_trap:
+ case Intrinsic::nvvm_suld_1d_v4i16_trap:
+ case Intrinsic::nvvm_suld_1d_array_i16_trap:
+ case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
+ case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
+ case Intrinsic::nvvm_suld_2d_i16_trap:
+ case Intrinsic::nvvm_suld_2d_v2i16_trap:
+ case Intrinsic::nvvm_suld_2d_v4i16_trap:
+ case Intrinsic::nvvm_suld_2d_array_i16_trap:
+ case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
+ case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
+ case Intrinsic::nvvm_suld_3d_i16_trap:
+ case Intrinsic::nvvm_suld_3d_v2i16_trap:
+ case Intrinsic::nvvm_suld_3d_v4i16_trap: {
+ Info.opc = getOpcForSurfaceInstr(Intrinsic);
+ Info.memVT = MVT::i16;
+ Info.ptrVal = nullptr;
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = false;
+ Info.align = 16;
+ return true;
+ }
+ case Intrinsic::nvvm_suld_1d_i32_trap:
+ case Intrinsic::nvvm_suld_1d_v2i32_trap:
+ case Intrinsic::nvvm_suld_1d_v4i32_trap:
+ case Intrinsic::nvvm_suld_1d_array_i32_trap:
+ case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
+ case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
+ case Intrinsic::nvvm_suld_2d_i32_trap:
+ case Intrinsic::nvvm_suld_2d_v2i32_trap:
+ case Intrinsic::nvvm_suld_2d_v4i32_trap:
+ case Intrinsic::nvvm_suld_2d_array_i32_trap:
+ case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
+ case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
+ case Intrinsic::nvvm_suld_3d_i32_trap:
+ case Intrinsic::nvvm_suld_3d_v2i32_trap:
+ case Intrinsic::nvvm_suld_3d_v4i32_trap: {
+ Info.opc = getOpcForSurfaceInstr(Intrinsic);
+ Info.memVT = MVT::i32;
+ Info.ptrVal = nullptr;
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = false;
+ Info.align = 16;
+ return true;
+ }
+
}
return false;
}
@@ -2094,7 +2523,7 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
case 4: {
Opcode = NVPTXISD::LoadV4;
EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
- LdResVTs = DAG.getVTList(ListVTs, 5);
+ LdResVTs = DAG.getVTList(ListVTs);
break;
}
}
@@ -2111,8 +2540,8 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
// pass along the extension information
OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
- SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
- OtherOps.size(), LD->getMemoryVT(),
+ SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
+ LD->getMemoryVT(),
LD->getMemOperand());
SmallVector<SDValue, 4> ScalarRes;
@@ -2126,8 +2555,7 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
SDValue LoadChain = NewLD.getValue(NumElts);
- SDValue BuildVec =
- DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
+ SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes);
Results.push_back(BuildVec);
Results.push_back(LoadChain);
@@ -2207,7 +2635,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
break;
}
EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
- LdResVTs = DAG.getVTList(ListVTs, 5);
+ LdResVTs = DAG.getVTList(ListVTs);
break;
}
}
@@ -2224,9 +2652,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
- SDValue NewLD = DAG.getMemIntrinsicNode(
- Opcode, DL, LdResVTs, &OtherOps[0], OtherOps.size(),
- MemSD->getMemoryVT(), MemSD->getMemOperand());
+ SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
+ MemSD->getMemoryVT(),
+ MemSD->getMemOperand());
SmallVector<SDValue, 4> ScalarRes;
@@ -2241,7 +2669,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
SDValue LoadChain = NewLD.getValue(NumElts);
SDValue BuildVec =
- DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
+ DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes);
Results.push_back(BuildVec);
Results.push_back(LoadChain);
@@ -2263,8 +2691,8 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
// We make sure the memory type is i8, which will be used during isel
// to select the proper instruction.
SDValue NewLD =
- DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, &Ops[0],
- Ops.size(), MVT::i8, MemSD->getMemOperand());
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops,
+ MVT::i8, MemSD->getMemOperand());
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
NewLD.getValue(0)));
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index c1e8c21..7bad8a2 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -70,7 +70,100 @@ enum NodeType {
StoreParamU32, // to zext and store a <32bit value, not used currently
StoreRetval,
StoreRetvalV2,
- StoreRetvalV4
+ StoreRetvalV4,
+
+ // Texture intrinsics
+ Tex1DFloatI32,
+ Tex1DFloatFloat,
+ Tex1DFloatFloatLevel,
+ Tex1DFloatFloatGrad,
+ Tex1DI32I32,
+ Tex1DI32Float,
+ Tex1DI32FloatLevel,
+ Tex1DI32FloatGrad,
+ Tex1DArrayFloatI32,
+ Tex1DArrayFloatFloat,
+ Tex1DArrayFloatFloatLevel,
+ Tex1DArrayFloatFloatGrad,
+ Tex1DArrayI32I32,
+ Tex1DArrayI32Float,
+ Tex1DArrayI32FloatLevel,
+ Tex1DArrayI32FloatGrad,
+ Tex2DFloatI32,
+ Tex2DFloatFloat,
+ Tex2DFloatFloatLevel,
+ Tex2DFloatFloatGrad,
+ Tex2DI32I32,
+ Tex2DI32Float,
+ Tex2DI32FloatLevel,
+ Tex2DI32FloatGrad,
+ Tex2DArrayFloatI32,
+ Tex2DArrayFloatFloat,
+ Tex2DArrayFloatFloatLevel,
+ Tex2DArrayFloatFloatGrad,
+ Tex2DArrayI32I32,
+ Tex2DArrayI32Float,
+ Tex2DArrayI32FloatLevel,
+ Tex2DArrayI32FloatGrad,
+ Tex3DFloatI32,
+ Tex3DFloatFloat,
+ Tex3DFloatFloatLevel,
+ Tex3DFloatFloatGrad,
+ Tex3DI32I32,
+ Tex3DI32Float,
+ Tex3DI32FloatLevel,
+ Tex3DI32FloatGrad,
+
+ // Surface intrinsics
+ Suld1DI8Trap,
+ Suld1DI16Trap,
+ Suld1DI32Trap,
+ Suld1DV2I8Trap,
+ Suld1DV2I16Trap,
+ Suld1DV2I32Trap,
+ Suld1DV4I8Trap,
+ Suld1DV4I16Trap,
+ Suld1DV4I32Trap,
+
+ Suld1DArrayI8Trap,
+ Suld1DArrayI16Trap,
+ Suld1DArrayI32Trap,
+ Suld1DArrayV2I8Trap,
+ Suld1DArrayV2I16Trap,
+ Suld1DArrayV2I32Trap,
+ Suld1DArrayV4I8Trap,
+ Suld1DArrayV4I16Trap,
+ Suld1DArrayV4I32Trap,
+
+ Suld2DI8Trap,
+ Suld2DI16Trap,
+ Suld2DI32Trap,
+ Suld2DV2I8Trap,
+ Suld2DV2I16Trap,
+ Suld2DV2I32Trap,
+ Suld2DV4I8Trap,
+ Suld2DV4I16Trap,
+ Suld2DV4I32Trap,
+
+ Suld2DArrayI8Trap,
+ Suld2DArrayI16Trap,
+ Suld2DArrayI32Trap,
+ Suld2DArrayV2I8Trap,
+ Suld2DArrayV2I16Trap,
+ Suld2DArrayV2I32Trap,
+ Suld2DArrayV4I8Trap,
+ Suld2DArrayV4I16Trap,
+ Suld2DArrayV4I32Trap,
+
+ Suld3DI8Trap,
+ Suld3DI16Trap,
+ Suld3DI32Trap,
+ Suld3DV2I8Trap,
+ Suld3DV2I16Trap,
+ Suld3DV2I32Trap,
+ Suld3DV4I8Trap,
+ Suld3DV4I16Trap,
+ Suld3DV4I32Trap
};
}
@@ -80,68 +173,70 @@ enum NodeType {
class NVPTXTargetLowering : public TargetLowering {
public:
explicit NVPTXTargetLowering(NVPTXTargetMachine &TM);
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset,
SelectionDAG &DAG) const;
- virtual const char *getTargetNodeName(unsigned Opcode) const;
+ const char *getTargetNodeName(unsigned Opcode) const override;
bool isTypeSupportedInIntrinsic(MVT VT) const;
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
- unsigned Intrinsic) const;
+ unsigned Intrinsic) const override;
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type
/// Used to guide target specific optimizations, like loop strength
/// reduction (LoopStrengthReduce.cpp) and memory optimization for
/// address mode (CodeGenPrepare.cpp)
- virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
+ bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
/// getFunctionAlignment - Return the Log2 alignment of this function.
- virtual unsigned getFunctionAlignment(const Function *F) const;
+ unsigned getFunctionAlignment(const Function *F) const;
- virtual EVT getSetCCResultType(LLVMContext &, EVT VT) const {
+ EVT getSetCCResultType(LLVMContext &, EVT VT) const override {
if (VT.isVector())
return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
return MVT::i1;
}
- ConstraintType getConstraintType(const std::string &Constraint) const;
+ ConstraintType
+ getConstraintType(const std::string &Constraint) const override;
std::pair<unsigned, const TargetRegisterClass *>
- getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const;
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const override;
- virtual SDValue LowerFormalArguments(
+ SDValue LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals) const override;
- virtual SDValue
- LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
std::string getPrototype(Type *, const ArgListTy &,
const SmallVectorImpl<ISD::OutputArg> &,
unsigned retAlignment,
const ImmutableCallSite *CS) const;
- virtual SDValue
+ SDValue
LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals, SDLoc dl,
- SelectionDAG &DAG) const;
+ SelectionDAG &DAG) const override;
- virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
- std::vector<SDValue> &Ops,
- SelectionDAG &DAG) const;
+ void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const override;
NVPTXTargetMachine *nvTM;
// PTX always uses 32-bit shift amounts
- virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+ MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
- virtual bool shouldSplitVectorType(EVT VT) const override;
+ bool shouldSplitVectorType(EVT VT) const override;
private:
const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
@@ -160,8 +255,8 @@ private:
SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
- virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const;
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
unsigned getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS,
Type *Ty, unsigned Idx) const;
diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
new file mode 100644
index 0000000..397f4bc
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -0,0 +1,178 @@
+//===-- NVPTXImageOptimizer.cpp - Image optimization pass -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements IR-level optimizations of image access code,
+// including:
+//
+// 1. Eliminate istypep intrinsics when image access qualifier is known
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "NVPTXUtilities.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ConstantFolding.h"
+
+using namespace llvm;
+
+namespace {
+class NVPTXImageOptimizer : public FunctionPass {
+private:
+ static char ID;
+ SmallVector<Instruction*, 4> InstrToDelete;
+
+public:
+ NVPTXImageOptimizer();
+
+ bool runOnFunction(Function &F) override;
+
+private:
+ bool replaceIsTypePSampler(Instruction &I);
+ bool replaceIsTypePSurface(Instruction &I);
+ bool replaceIsTypePTexture(Instruction &I);
+ Value *cleanupValue(Value *V);
+ void replaceWith(Instruction *From, ConstantInt *To);
+};
+}
+
+char NVPTXImageOptimizer::ID = 0;
+
+NVPTXImageOptimizer::NVPTXImageOptimizer()
+ : FunctionPass(ID) {}
+
+bool NVPTXImageOptimizer::runOnFunction(Function &F) {
+ bool Changed = false;
+ InstrToDelete.clear();
+
+ // Look for call instructions in the function
+ for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;
+ ++BI) {
+ for (BasicBlock::iterator I = (*BI).begin(), E = (*BI).end();
+ I != E; ++I) {
+ Instruction &Instr = *I;
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ Function *CalledF = CI->getCalledFunction();
+ if (CalledF && CalledF->isIntrinsic()) {
+ // This is an intrinsic function call, check if its an istypep
+ switch (CalledF->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::nvvm_istypep_sampler:
+ Changed |= replaceIsTypePSampler(Instr);
+ break;
+ case Intrinsic::nvvm_istypep_surface:
+ Changed |= replaceIsTypePSurface(Instr);
+ break;
+ case Intrinsic::nvvm_istypep_texture:
+ Changed |= replaceIsTypePTexture(Instr);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ // Delete any istypep instances we replaced in the IR
+ for (unsigned i = 0, e = InstrToDelete.size(); i != e; ++i)
+ InstrToDelete[i]->eraseFromParent();
+
+ return Changed;
+}
+
+bool NVPTXImageOptimizer::replaceIsTypePSampler(Instruction &I) {
+ Value *TexHandle = cleanupValue(I.getOperand(0));
+ if (isSampler(*TexHandle)) {
+ // This is an OpenCL sampler, so it must be a samplerref
+ replaceWith(&I, ConstantInt::getTrue(I.getContext()));
+ return true;
+ } else if (isImageWriteOnly(*TexHandle) ||
+ isImageReadWrite(*TexHandle) ||
+ isImageReadOnly(*TexHandle)) {
+ // This is an OpenCL image, so it cannot be a samplerref
+ replaceWith(&I, ConstantInt::getFalse(I.getContext()));
+ return true;
+ } else {
+ // The image type is unknown, so we cannot eliminate the intrinsic
+ return false;
+ }
+}
+
+bool NVPTXImageOptimizer::replaceIsTypePSurface(Instruction &I) {
+ Value *TexHandle = cleanupValue(I.getOperand(0));
+ if (isImageReadWrite(*TexHandle) ||
+ isImageWriteOnly(*TexHandle)) {
+ // This is an OpenCL read-only/read-write image, so it must be a surfref
+ replaceWith(&I, ConstantInt::getTrue(I.getContext()));
+ return true;
+ } else if (isImageReadOnly(*TexHandle) ||
+ isSampler(*TexHandle)) {
+ // This is an OpenCL read-only/ imageor sampler, so it cannot be
+ // a surfref
+ replaceWith(&I, ConstantInt::getFalse(I.getContext()));
+ return true;
+ } else {
+ // The image type is unknown, so we cannot eliminate the intrinsic
+ return false;
+ }
+}
+
+bool NVPTXImageOptimizer::replaceIsTypePTexture(Instruction &I) {
+ Value *TexHandle = cleanupValue(I.getOperand(0));
+ if (isImageReadOnly(*TexHandle)) {
+ // This is an OpenCL read-only image, so it must be a texref
+ replaceWith(&I, ConstantInt::getTrue(I.getContext()));
+ return true;
+ } else if (isImageWriteOnly(*TexHandle) ||
+ isImageReadWrite(*TexHandle) ||
+ isSampler(*TexHandle)) {
+ // This is an OpenCL read-write/write-only image or a sampler, so it
+ // cannot be a texref
+ replaceWith(&I, ConstantInt::getFalse(I.getContext()));
+ return true;
+ } else {
+ // The image type is unknown, so we cannot eliminate the intrinsic
+ return false;
+ }
+}
+
+void NVPTXImageOptimizer::replaceWith(Instruction *From, ConstantInt *To) {
+ // We implement "poor man's DCE" here to make sure any code that is no longer
+ // live is actually unreachable and can be trivially eliminated by the
+ // unreachable block elimiation pass.
+ for (CallInst::use_iterator UI = From->use_begin(), UE = From->use_end();
+ UI != UE; ++UI) {
+ if (BranchInst *BI = dyn_cast<BranchInst>(*UI)) {
+ if (BI->isUnconditional()) continue;
+ BasicBlock *Dest;
+ if (To->isZero())
+ // Get false block
+ Dest = BI->getSuccessor(1);
+ else
+ // Get true block
+ Dest = BI->getSuccessor(0);
+ BranchInst::Create(Dest, BI);
+ InstrToDelete.push_back(BI);
+ }
+ }
+ From->replaceAllUsesWith(To);
+ InstrToDelete.push_back(From);
+}
+
+Value *NVPTXImageOptimizer::cleanupValue(Value *V) {
+ if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(V)) {
+ return cleanupValue(EVI->getAggregateOperand());
+ }
+ return V;
+}
+
+FunctionPass *llvm::createNVPTXImageOptimizerPass() {
+ return new NVPTXImageOptimizer();
+}
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 86ddd38..cdc8088 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -14,8 +14,6 @@
#include "NVPTX.h"
#include "NVPTXInstrInfo.h"
#include "NVPTXTargetMachine.h"
-#define GET_INSTRINFO_CTOR_DTOR
-#include "NVPTXGenInstrInfo.inc"
#include "llvm/IR/Function.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -24,6 +22,9 @@
using namespace llvm;
+#define GET_INSTRINFO_CTOR_DTOR
+#include "NVPTXGenInstrInfo.inc"
+
// Pin the vtable to this file.
void NVPTXInstrInfo::anchor() {}
@@ -256,7 +257,7 @@ unsigned NVPTXInstrInfo::InsertBranch(
"NVPTX branch conditions have two components!");
// One-way branch.
- if (FBB == 0) {
+ if (!FBB) {
if (Cond.empty()) // Unconditional branch
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB);
else // Conditional branch
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h
index 600fc5c..88a9e45 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -30,7 +30,7 @@ class NVPTXInstrInfo : public NVPTXGenInstrInfo {
public:
explicit NVPTXInstrInfo(NVPTXTargetMachine &TM);
- virtual const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; }
+ const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; }
/* The following virtual functions are used in register allocation.
* They are not implemented because the existing interface and the logic
@@ -50,9 +50,9 @@ public:
* const TargetRegisterClass *RC) const;
*/
- virtual void copyPhysReg(
+ void copyPhysReg(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg, bool KillSrc) const;
+ unsigned DestReg, unsigned SrcReg, bool KillSrc) const override;
virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
unsigned &DestReg) const;
bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
@@ -61,13 +61,13 @@ public:
virtual bool CanTailMerge(const MachineInstr *MI) const;
// Branch analysis.
- virtual bool AnalyzeBranch(
+ bool AnalyzeBranch(
MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
- virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
- virtual unsigned InsertBranch(
+ SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const override;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+ unsigned InsertBranch(
MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
+ const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const override;
unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const {
return MI.getOperand(2).getImm();
}
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index 14049b1..5e228fc 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1666,6 +1666,9 @@ def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
(MoveParam texternalsym:$src)))),
(nvvm_move_ptr32 texternalsym:$src)>;
+def texsurf_handles
+ : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
+ "mov.u64 \t$result, $src;", []>;
//-----------------------------------
// Compiler Error Warn
@@ -1686,6 +1689,1826 @@ def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
[(int_nvvm_compiler_error Int64Regs:$a)]>;
+//-----------------------------------
+// Texture Intrinsics
+//-----------------------------------
+
+// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
+// also defined in NVPTXReplaceImageHandles.cpp
+
+
+// Texture fetch instructions using handles
+def TEX_1D_F32_I32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
+ "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ []>;
+def TEX_1D_F32_F32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
+ "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ []>;
+def TEX_1D_F32_F32_LEVEL
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
+ "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x\\}], $lod;",
+ []>;
+def TEX_1D_F32_F32_GRAD
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
+ Float32Regs:$gradx, Float32Regs:$grady),
+ "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
+def TEX_1D_I32_I32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
+ "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ []>;
+def TEX_1D_I32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
+ "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ []>;
+def TEX_1D_I32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
+ Float32Regs:$lod),
+ "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x\\}], $lod;",
+ []>;
+def TEX_1D_I32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
+ Float32Regs:$gradx, Float32Regs:$grady),
+ "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
+
+def TEX_1D_ARRAY_F32_I32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x\\}];",
+ []>;
+def TEX_1D_ARRAY_F32_F32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
+ "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x\\}];",
+ []>;
+def TEX_1D_ARRAY_F32_F32_LEVEL
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$lod),
+ "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x\\}], $lod;",
+ []>;
+def TEX_1D_ARRAY_F32_F32_GRAD
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$gradx, Float32Regs:$grady),
+ "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
+def TEX_1D_ARRAY_I32_I32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x\\}];",
+ []>;
+def TEX_1D_ARRAY_I32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
+ "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x\\}];",
+ []>;
+def TEX_1D_ARRAY_I32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$lod),
+ "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x\\}], $lod;",
+ []>;
+def TEX_1D_ARRAY_I32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$gradx, Float32Regs:$grady),
+ "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
+
+def TEX_2D_F32_I32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TEX_2D_F32_F32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
+ "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TEX_2D_F32_F32_LEVEL
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$lod),
+ "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y\\}], $lod;",
+ []>;
+def TEX_2D_F32_F32_GRAD
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$grady0, Float32Regs:$grady1),
+ "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
+ "\\{$grady0, $grady1\\};",
+ []>;
+def TEX_2D_I32_I32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TEX_2D_I32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
+ "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TEX_2D_I32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$lod),
+ "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y\\}], $lod;",
+ []>;
+def TEX_2D_I32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$grady0, Float32Regs:$grady1),
+ "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
+ "\\{$grady0, $grady1\\};",
+ []>;
+
+def TEX_2D_ARRAY_F32_I32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$y),
+ "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def TEX_2D_ARRAY_F32_F32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y),
+ "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def TEX_2D_ARRAY_F32_F32_LEVEL
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y, Float32Regs:$lod),
+ "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
+ []>;
+def TEX_2D_ARRAY_F32_F32_GRAD
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$grady0, Float32Regs:$grady1),
+ "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
+ "\\{$grady0, $grady1\\};",
+ []>;
+def TEX_2D_ARRAY_I32_I32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$y),
+ "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def TEX_2D_ARRAY_I32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y),
+ "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def TEX_2D_ARRAY_I32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y, Float32Regs:$lod),
+ "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
+ []>;
+def TEX_2D_ARRAY_I32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$grady0, Float32Regs:$grady1),
+ "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
+ "\\{$grady0, $grady1\\};",
+ []>;
+
+def TEX_3D_F32_I32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$z),
+ "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_3D_F32_F32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z),
+ "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_3D_F32_F32_LEVEL
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z, Float32Regs:$lod),
+ "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+def TEX_3D_F32_F32_GRAD
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$gradx2, Float32Regs:$grady0,
+ Float32Regs:$grady1, Float32Regs:$grady2),
+ "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}], "
+ "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
+ "\\{$grady0, $grady1, $grady2, $grady2\\};",
+ []>;
+def TEX_3D_I32_I32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$z),
+ "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_3D_I32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z),
+ "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_3D_I32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z, Float32Regs:$lod),
+ "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+def TEX_3D_I32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$gradx2, Float32Regs:$grady0,
+ Float32Regs:$grady1, Float32Regs:$grady2),
+ "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}], "
+ "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
+ "\\{$grady0, $grady1, $grady2, $grady2\\};",
+ []>;
+
+
+// Surface load instructions
+def SULD_1D_I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V2I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V2I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V2I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V4I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V4I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V4I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ []>;
+
+def SULD_1D_ARRAY_I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V2I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V2I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V2I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V4I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V4I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V4I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x\\}];",
+ []>;
+
+def SULD_2D_I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V2I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V2I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V2I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V4I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V4I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V4I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
+ []>;
+
+def SULD_2D_ARRAY_I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V2I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V2I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V2I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V4I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V4I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V4I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+
+def SULD_3D_I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V2I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V2I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V2I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V4I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V4I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V4I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+
+
+//-----------------------------------
+// Texture Query Intrinsics
+//-----------------------------------
+def TXQ_CHANNEL_ORDER
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "txq.channel_order.b32 \t$d, [$a];",
+ []>;
+def TXQ_CHANNEL_DATA_TYPE
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "txq.channel_data_type.b32 \t$d, [$a];",
+ []>;
+def TXQ_WIDTH
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "txq.width.b32 \t$d, [$a];",
+ []>;
+def TXQ_HEIGHT
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "txq.height.b32 \t$d, [$a];",
+ []>;
+def TXQ_DEPTH
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "txq.depth.b32 \t$d, [$a];",
+ []>;
+def TXQ_ARRAY_SIZE
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "txq.array_size.b32 \t$d, [$a];",
+ []>;
+def TXQ_NUM_SAMPLES
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "txq.num_samples.b32 \t$d, [$a];",
+ []>;
+def TXQ_NUM_MIPMAP_LEVELS
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "txq.num_mipmap_levels.b32 \t$d, [$a];",
+ []>;
+
+def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
+ (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
+def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
+ (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
+def : Pat<(int_nvvm_txq_width Int64Regs:$a),
+ (TXQ_WIDTH Int64Regs:$a)>;
+def : Pat<(int_nvvm_txq_height Int64Regs:$a),
+ (TXQ_HEIGHT Int64Regs:$a)>;
+def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
+ (TXQ_DEPTH Int64Regs:$a)>;
+def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
+ (TXQ_ARRAY_SIZE Int64Regs:$a)>;
+def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
+ (TXQ_NUM_SAMPLES Int64Regs:$a)>;
+def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
+ (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
+
+
+//-----------------------------------
+// Surface Query Intrinsics
+//-----------------------------------
+def SUQ_CHANNEL_ORDER
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "suq.channel_order.b32 \t$d, [$a];",
+ []>;
+def SUQ_CHANNEL_DATA_TYPE
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "suq.channel_data_type.b32 \t$d, [$a];",
+ []>;
+def SUQ_WIDTH
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "suq.width.b32 \t$d, [$a];",
+ []>;
+def SUQ_HEIGHT
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "suq.height.b32 \t$d, [$a];",
+ []>;
+def SUQ_DEPTH
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "suq.depth.b32 \t$d, [$a];",
+ []>;
+def SUQ_ARRAY_SIZE
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "suq.array_size.b32 \t$d, [$a];",
+ []>;
+
+def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
+ (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
+def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
+ (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
+def : Pat<(int_nvvm_suq_width Int64Regs:$a),
+ (SUQ_WIDTH Int64Regs:$a)>;
+def : Pat<(int_nvvm_suq_height Int64Regs:$a),
+ (SUQ_HEIGHT Int64Regs:$a)>;
+def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
+ (SUQ_DEPTH Int64Regs:$a)>;
+def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
+ (SUQ_ARRAY_SIZE Int64Regs:$a)>;
+
+
+//===- Handle Query -------------------------------------------------------===//
+
+// TODO: These intrinsics are not yet finalized, pending PTX ISA design work
+def ISTYPEP_SAMPLER
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ "istypep.samplerref \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
+def ISTYPEP_SURFACE
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ "istypep.surfref \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
+def ISTYPEP_TEXTURE
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ "istypep.texref \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
+
+//===- Surface Stores -----------------------------------------------------===//
+
+// Unformatted
+
+def SUST_B_1D_B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
+ "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_V2B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_V2B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_V2B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
+ "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_V4B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
+ Int16Regs:$b, Int16Regs:$a),
+ "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_1D_V4B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
+ Int16Regs:$b, Int16Regs:$a),
+ "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_1D_V4B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_B_1D_ARRAY_B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
+ "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_ARRAY_B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
+ "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_ARRAY_B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
+ "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_ARRAY_V2B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_ARRAY_V2B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_ARRAY_V2B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
+ Int32Regs:$g),
+ "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_ARRAY_V4B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_1D_ARRAY_V4B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_1D_ARRAY_V4B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
+ Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_B_2D_B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
+ "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_V2B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_2D_V2B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_2D_V2B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
+ Int32Regs:$g),
+ "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_2D_V4B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_2D_V4B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_2D_V4B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
+ Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_B_2D_ARRAY_B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r),
+ "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_ARRAY_B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r),
+ "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_ARRAY_B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r),
+ "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_ARRAY_V2B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_2D_ARRAY_V2B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_2D_ARRAY_V2B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g),
+ "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_2D_ARRAY_V4B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_2D_ARRAY_V4B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_2D_ARRAY_V4B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_B_3D_B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+def SUST_B_3D_B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+def SUST_B_3D_B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r),
+ "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+def SUST_B_3D_V2B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_3D_V2B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_3D_V2B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g),
+ "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_3D_V4B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_3D_V4B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_3D_V4B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+// Formatted
+
+def SUST_P_1D_B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+def SUST_P_1D_B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+def SUST_P_1D_B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
+ "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+def SUST_P_1D_V2B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_P_1D_V2B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_P_1D_V2B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
+ "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_P_1D_V4B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
+ Int16Regs:$b, Int16Regs:$a),
+ "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_P_1D_V4B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
+ Int16Regs:$b, Int16Regs:$a),
+ "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_P_1D_V4B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_P_1D_ARRAY_B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
+ "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+def SUST_P_1D_ARRAY_B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
+ "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+def SUST_P_1D_ARRAY_B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
+ "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+def SUST_P_1D_ARRAY_V2B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_P_1D_ARRAY_V2B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_P_1D_ARRAY_V2B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
+ Int32Regs:$g),
+ "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_P_1D_ARRAY_V4B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_P_1D_ARRAY_V4B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_P_1D_ARRAY_V4B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
+ Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_P_2D_B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+def SUST_P_2D_B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+def SUST_P_2D_B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
+ "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+def SUST_P_2D_V2B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+def SUST_P_2D_V2B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+def SUST_P_2D_V2B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
+ Int32Regs:$g),
+ "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+def SUST_P_2D_V4B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_P_2D_V4B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_P_2D_V4B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
+ Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_P_2D_ARRAY_B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r),
+ "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+def SUST_P_2D_ARRAY_B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r),
+ "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+def SUST_P_2D_ARRAY_B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r),
+ "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+def SUST_P_2D_ARRAY_V2B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_P_2D_ARRAY_V2B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_P_2D_ARRAY_V2B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g),
+ "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_P_2D_ARRAY_V4B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_P_2D_ARRAY_V4B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_P_2D_ARRAY_V4B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_P_3D_B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+def SUST_P_3D_B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+def SUST_P_3D_B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r),
+ "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+def SUST_P_3D_V2B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_P_3D_V2B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_P_3D_V2B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g),
+ "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_P_3D_V4B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_P_3D_V4B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_P_3D_V4B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+// Surface store instruction patterns
+// I'm not sure why we can't just include these in the instruction definitions,
+// but TableGen complains of type errors :(
+
+def : Pat<(int_nvvm_sust_b_1d_i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
+ (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
+ (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
+ Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
+ Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
+ Int64Regs:$s, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
+ (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
+ (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
+ (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
+ (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_b_2d_i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
+ (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
+ (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
+ (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
+ Int32Regs:$g),
+ (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_b_3d_i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ (SUST_B_3D_B8_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_3d_i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ (SUST_B_3D_B16_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_3d_i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r),
+ (SUST_B_3D_B32_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g),
+ (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+
+def : Pat<(int_nvvm_sust_p_1d_i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_1d_i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_1d_i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
+ (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
+ (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
+ Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
+ Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
+ Int64Regs:$s, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
+ (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
+ (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
+ (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
+ (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_p_2d_i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_2d_i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_2d_i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
+ (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
+ (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
+ (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
+ (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
+ (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
+ Int32Regs:$g),
+ (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_p_3d_i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ (SUST_P_3D_B8_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_3d_i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ (SUST_P_3D_B16_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_3d_i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r),
+ (SUST_P_3D_B32_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g),
+ (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
//===-- Old PTX Back-end Intrinsics ---------------------------------------===//
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
index c9aa87d..5ec1fc9 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
@@ -27,17 +27,17 @@ struct NVPTXLowerAggrCopies : public FunctionPass {
NVPTXLowerAggrCopies() : FunctionPass(ID) {}
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DataLayoutPass>();
AU.addPreserved("stack-protector");
AU.addPreserved<MachineFunctionAnalysis>();
}
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
static const unsigned MaxAggrCopySize = 128;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "Lower aggregate copies/intrinsics into loops";
}
};
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.cpp b/lib/Target/NVPTX/NVPTXMCExpr.cpp
index ca24764..137248b 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.cpp
+++ b/lib/Target/NVPTX/NVPTXMCExpr.cpp
@@ -7,13 +7,14 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "nvptx-mcexpr"
#include "NVPTXMCExpr.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
using namespace llvm;
+#define DEBUG_TYPE "nvptx-mcexpr"
+
const NVPTXFloatMCExpr*
NVPTXFloatMCExpr::Create(VariantKind Kind, APFloat Flt, MCContext &Ctx) {
return new (Ctx) NVPTXFloatMCExpr(Kind, Flt);
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h
index 0efb231..0ee018c 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -61,18 +61,18 @@ public:
/// @}
- void PrintImpl(raw_ostream &OS) const;
+ void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const {
+ const MCAsmLayout *Layout) const override {
return false;
}
- void AddValueSymbols(MCAssembler *) const {};
- const MCSection *FindAssociatedSection() const {
- return NULL;
+ void AddValueSymbols(MCAssembler *) const override {};
+ const MCSection *FindAssociatedSection() const override {
+ return nullptr;
}
// There are no TLS NVPTXMCExprs at the moment.
- void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {}
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
static bool classof(const MCExpr *E) {
return E->getKind() == MCExpr::Target;
diff --git a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
new file mode 100644
index 0000000..67fb390
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
@@ -0,0 +1,46 @@
+//===-- NVPTXMachineFunctionInfo.h - NVPTX-specific Function Info --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class is attached to a MachineFunction instance and tracks target-
+// dependent information
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+class NVPTXMachineFunctionInfo : public MachineFunctionInfo {
+private:
+ /// Stores a mapping from index to symbol name for removing image handles
+ /// on Fermi.
+ SmallVector<std::string, 8> ImageHandleList;
+
+public:
+ NVPTXMachineFunctionInfo(MachineFunction &MF) {}
+
+ /// Returns the index for the symbol \p Symbol. If the symbol was previously,
+ /// added, the same index is returned. Otherwise, the symbol is added and the
+ /// new index is returned.
+ unsigned getImageHandleSymbolIndex(const char *Symbol) {
+ // Is the symbol already present?
+ for (unsigned i = 0, e = ImageHandleList.size(); i != e; ++i)
+ if (ImageHandleList[i] == std::string(Symbol))
+ return i;
+ // Nope, insert it
+ ImageHandleList.push_back(Symbol);
+ return ImageHandleList.size()-1;
+ }
+
+ /// Returns the symbol name at the given index.
+ const char *getImageHandleSymbol(unsigned Idx) const {
+ assert(ImageHandleList.size() > Idx && "Bad index");
+ return ImageHandleList[Idx].c_str();
+ }
+};
+}
diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index d5b042a..348ab0c 100644
--- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -25,13 +25,15 @@
using namespace llvm;
+#define DEBUG_TYPE "nvptx-prolog-epilog"
+
namespace {
class NVPTXPrologEpilogPass : public MachineFunctionPass {
public:
static char ID;
NVPTXPrologEpilogPass() : MachineFunctionPass(ID) {}
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
private:
void calculateFrameObjectOffsets(MachineFunction &Fn);
@@ -58,7 +60,7 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
if (!MI->getOperand(i).isFI())
continue;
- TRI.eliminateFrameIndex(MI, 0, i, NULL);
+ TRI.eliminateFrameIndex(MI, 0, i, nullptr);
Modified = true;
}
}
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 4d3a1d9..62f288b 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "nvptx-reg-info"
-
#include "NVPTXRegisterInfo.h"
#include "NVPTX.h"
#include "NVPTXSubtarget.h"
@@ -25,6 +23,8 @@
using namespace llvm;
+#define DEBUG_TYPE "nvptx-reg-info"
+
namespace llvm {
std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
if (RC == &NVPTX::Float32RegsRegClass) {
@@ -78,19 +78,12 @@ NVPTXRegisterInfo::NVPTXRegisterInfo(const NVPTXSubtarget &st)
#include "NVPTXGenRegisterInfo.inc"
/// NVPTX Callee Saved Registers
-const uint16_t *
+const MCPhysReg *
NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- static const uint16_t CalleeSavedRegs[] = { 0 };
+ static const MCPhysReg CalleeSavedRegs[] = { 0 };
return CalleeSavedRegs;
}
-// NVPTX Callee Saved Reg Classes
-const TargetRegisterClass *const *
-NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- static const TargetRegisterClass *const CalleeSavedRegClasses[] = { 0 };
- return CalleeSavedRegClasses;
-}
-
BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
return Reserved;
@@ -113,12 +106,6 @@ void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
}
-int NVPTXRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- return 0;
-}
-
unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return NVPTX::VRFrame;
}
-
-unsigned NVPTXRegisterInfo::getRARegister() const { return 0; }
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h
index 0a20f29..a7594be 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -16,11 +16,10 @@
#include "ManagedStringPool.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include <sstream>
#define GET_REGINFO_HEADER
#include "NVPTXGenRegisterInfo.inc"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include <sstream>
namespace llvm {
@@ -42,22 +41,16 @@ public:
//------------------------------------------------------
// NVPTX callee saved registers
- virtual const uint16_t *
- getCalleeSavedRegs(const MachineFunction *MF = 0) const;
-
- // NVPTX callee saved register classes
- virtual const TargetRegisterClass *const *
- getCalleeSavedRegClasses(const MachineFunction *MF) const;
+ const MCPhysReg *
+ getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
- virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
- virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
- unsigned FIOperandNum,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS = nullptr) const override;
- virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- virtual unsigned getFrameRegister(const MachineFunction &MF) const;
- virtual unsigned getRARegister() const;
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
ManagedStringPool *getStrPool() const {
return const_cast<ManagedStringPool *>(&ManagedStrPool);
diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
new file mode 100644
index 0000000..afd53a6
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -0,0 +1,357 @@
+//===-- NVPTXReplaceImageHandles.cpp - Replace image handles for Fermi ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// On Fermi, image handles are not supported. To work around this, we traverse
+// the machine code and replace image handles with concrete symbols. For this
+// to work reliably, inlining of all function call must be performed.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "NVPTXMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseSet.h"
+
+using namespace llvm;
+
+namespace {
+class NVPTXReplaceImageHandles : public MachineFunctionPass {
+private:
+ static char ID;
+ DenseSet<MachineInstr *> InstrsToRemove;
+
+public:
+ NVPTXReplaceImageHandles();
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+private:
+ bool processInstr(MachineInstr &MI);
+ void replaceImageHandle(MachineOperand &Op, MachineFunction &MF);
+};
+}
+
+char NVPTXReplaceImageHandles::ID = 0;
+
+NVPTXReplaceImageHandles::NVPTXReplaceImageHandles()
+ : MachineFunctionPass(ID) {}
+
+bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+ InstrsToRemove.clear();
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
+ ++BI) {
+ for (MachineBasicBlock::iterator I = (*BI).begin(), E = (*BI).end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ Changed |= processInstr(MI);
+ }
+ }
+
+ // Now clean up any handle-access instructions
+ // This is needed in debug mode when code cleanup passes are not executed,
+ // but we need the handle access to be eliminated because they are not
+ // valid instructions when image handles are disabled.
+ for (DenseSet<MachineInstr *>::iterator I = InstrsToRemove.begin(),
+ E = InstrsToRemove.end(); I != E; ++I) {
+ (*I)->eraseFromParent();
+ }
+
+ return Changed;
+}
+
+bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
+ MachineFunction &MF = *MI.getParent()->getParent();
+ // Check if we have a surface/texture instruction
+ switch (MI.getOpcode()) {
+ default: return false;
+ case NVPTX::TEX_1D_F32_I32:
+ case NVPTX::TEX_1D_F32_F32:
+ case NVPTX::TEX_1D_F32_F32_LEVEL:
+ case NVPTX::TEX_1D_F32_F32_GRAD:
+ case NVPTX::TEX_1D_I32_I32:
+ case NVPTX::TEX_1D_I32_F32:
+ case NVPTX::TEX_1D_I32_F32_LEVEL:
+ case NVPTX::TEX_1D_I32_F32_GRAD:
+ case NVPTX::TEX_1D_ARRAY_F32_I32:
+ case NVPTX::TEX_1D_ARRAY_F32_F32:
+ case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL:
+ case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD:
+ case NVPTX::TEX_1D_ARRAY_I32_I32:
+ case NVPTX::TEX_1D_ARRAY_I32_F32:
+ case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL:
+ case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD:
+ case NVPTX::TEX_2D_F32_I32:
+ case NVPTX::TEX_2D_F32_F32:
+ case NVPTX::TEX_2D_F32_F32_LEVEL:
+ case NVPTX::TEX_2D_F32_F32_GRAD:
+ case NVPTX::TEX_2D_I32_I32:
+ case NVPTX::TEX_2D_I32_F32:
+ case NVPTX::TEX_2D_I32_F32_LEVEL:
+ case NVPTX::TEX_2D_I32_F32_GRAD:
+ case NVPTX::TEX_2D_ARRAY_F32_I32:
+ case NVPTX::TEX_2D_ARRAY_F32_F32:
+ case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL:
+ case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD:
+ case NVPTX::TEX_2D_ARRAY_I32_I32:
+ case NVPTX::TEX_2D_ARRAY_I32_F32:
+ case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL:
+ case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD:
+ case NVPTX::TEX_3D_F32_I32:
+ case NVPTX::TEX_3D_F32_F32:
+ case NVPTX::TEX_3D_F32_F32_LEVEL:
+ case NVPTX::TEX_3D_F32_F32_GRAD:
+ case NVPTX::TEX_3D_I32_I32:
+ case NVPTX::TEX_3D_I32_F32:
+ case NVPTX::TEX_3D_I32_F32_LEVEL:
+ case NVPTX::TEX_3D_I32_F32_GRAD: {
+ // This is a texture fetch, so operand 4 is a texref and operand 5 is
+ // a samplerref
+ MachineOperand &TexHandle = MI.getOperand(4);
+ MachineOperand &SampHandle = MI.getOperand(5);
+
+ replaceImageHandle(TexHandle, MF);
+ replaceImageHandle(SampHandle, MF);
+
+ return true;
+ }
+ case NVPTX::SULD_1D_I8_TRAP:
+ case NVPTX::SULD_1D_I16_TRAP:
+ case NVPTX::SULD_1D_I32_TRAP:
+ case NVPTX::SULD_1D_ARRAY_I8_TRAP:
+ case NVPTX::SULD_1D_ARRAY_I16_TRAP:
+ case NVPTX::SULD_1D_ARRAY_I32_TRAP:
+ case NVPTX::SULD_2D_I8_TRAP:
+ case NVPTX::SULD_2D_I16_TRAP:
+ case NVPTX::SULD_2D_I32_TRAP:
+ case NVPTX::SULD_2D_ARRAY_I8_TRAP:
+ case NVPTX::SULD_2D_ARRAY_I16_TRAP:
+ case NVPTX::SULD_2D_ARRAY_I32_TRAP:
+ case NVPTX::SULD_3D_I8_TRAP:
+ case NVPTX::SULD_3D_I16_TRAP:
+ case NVPTX::SULD_3D_I32_TRAP: {
+ // This is a V1 surface load, so operand 1 is a surfref
+ MachineOperand &SurfHandle = MI.getOperand(1);
+
+ replaceImageHandle(SurfHandle, MF);
+
+ return true;
+ }
+ case NVPTX::SULD_1D_V2I8_TRAP:
+ case NVPTX::SULD_1D_V2I16_TRAP:
+ case NVPTX::SULD_1D_V2I32_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V2I8_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V2I16_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V2I32_TRAP:
+ case NVPTX::SULD_2D_V2I8_TRAP:
+ case NVPTX::SULD_2D_V2I16_TRAP:
+ case NVPTX::SULD_2D_V2I32_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V2I8_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V2I16_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V2I32_TRAP:
+ case NVPTX::SULD_3D_V2I8_TRAP:
+ case NVPTX::SULD_3D_V2I16_TRAP:
+ case NVPTX::SULD_3D_V2I32_TRAP: {
+ // This is a V2 surface load, so operand 2 is a surfref
+ MachineOperand &SurfHandle = MI.getOperand(2);
+
+ replaceImageHandle(SurfHandle, MF);
+
+ return true;
+ }
+ case NVPTX::SULD_1D_V4I8_TRAP:
+ case NVPTX::SULD_1D_V4I16_TRAP:
+ case NVPTX::SULD_1D_V4I32_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V4I8_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V4I16_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V4I32_TRAP:
+ case NVPTX::SULD_2D_V4I8_TRAP:
+ case NVPTX::SULD_2D_V4I16_TRAP:
+ case NVPTX::SULD_2D_V4I32_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V4I8_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V4I16_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V4I32_TRAP:
+ case NVPTX::SULD_3D_V4I8_TRAP:
+ case NVPTX::SULD_3D_V4I16_TRAP:
+ case NVPTX::SULD_3D_V4I32_TRAP: {
+ // This is a V4 surface load, so operand 4 is a surfref
+ MachineOperand &SurfHandle = MI.getOperand(4);
+
+ replaceImageHandle(SurfHandle, MF);
+
+ return true;
+ }
+ case NVPTX::SUST_B_1D_B8_TRAP:
+ case NVPTX::SUST_B_1D_B16_TRAP:
+ case NVPTX::SUST_B_1D_B32_TRAP:
+ case NVPTX::SUST_B_1D_V2B8_TRAP:
+ case NVPTX::SUST_B_1D_V2B16_TRAP:
+ case NVPTX::SUST_B_1D_V2B32_TRAP:
+ case NVPTX::SUST_B_1D_V4B8_TRAP:
+ case NVPTX::SUST_B_1D_V4B16_TRAP:
+ case NVPTX::SUST_B_1D_V4B32_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_B8_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_B16_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_B32_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP:
+ case NVPTX::SUST_B_2D_B8_TRAP:
+ case NVPTX::SUST_B_2D_B16_TRAP:
+ case NVPTX::SUST_B_2D_B32_TRAP:
+ case NVPTX::SUST_B_2D_V2B8_TRAP:
+ case NVPTX::SUST_B_2D_V2B16_TRAP:
+ case NVPTX::SUST_B_2D_V2B32_TRAP:
+ case NVPTX::SUST_B_2D_V4B8_TRAP:
+ case NVPTX::SUST_B_2D_V4B16_TRAP:
+ case NVPTX::SUST_B_2D_V4B32_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_B8_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_B16_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_B32_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP:
+ case NVPTX::SUST_B_3D_B8_TRAP:
+ case NVPTX::SUST_B_3D_B16_TRAP:
+ case NVPTX::SUST_B_3D_B32_TRAP:
+ case NVPTX::SUST_B_3D_V2B8_TRAP:
+ case NVPTX::SUST_B_3D_V2B16_TRAP:
+ case NVPTX::SUST_B_3D_V2B32_TRAP:
+ case NVPTX::SUST_B_3D_V4B8_TRAP:
+ case NVPTX::SUST_B_3D_V4B16_TRAP:
+ case NVPTX::SUST_B_3D_V4B32_TRAP:
+ case NVPTX::SUST_P_1D_B8_TRAP:
+ case NVPTX::SUST_P_1D_B16_TRAP:
+ case NVPTX::SUST_P_1D_B32_TRAP:
+ case NVPTX::SUST_P_1D_V2B8_TRAP:
+ case NVPTX::SUST_P_1D_V2B16_TRAP:
+ case NVPTX::SUST_P_1D_V2B32_TRAP:
+ case NVPTX::SUST_P_1D_V4B8_TRAP:
+ case NVPTX::SUST_P_1D_V4B16_TRAP:
+ case NVPTX::SUST_P_1D_V4B32_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_B8_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_B16_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_B32_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP:
+ case NVPTX::SUST_P_2D_B8_TRAP:
+ case NVPTX::SUST_P_2D_B16_TRAP:
+ case NVPTX::SUST_P_2D_B32_TRAP:
+ case NVPTX::SUST_P_2D_V2B8_TRAP:
+ case NVPTX::SUST_P_2D_V2B16_TRAP:
+ case NVPTX::SUST_P_2D_V2B32_TRAP:
+ case NVPTX::SUST_P_2D_V4B8_TRAP:
+ case NVPTX::SUST_P_2D_V4B16_TRAP:
+ case NVPTX::SUST_P_2D_V4B32_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_B8_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_B16_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_B32_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP:
+ case NVPTX::SUST_P_3D_B8_TRAP:
+ case NVPTX::SUST_P_3D_B16_TRAP:
+ case NVPTX::SUST_P_3D_B32_TRAP:
+ case NVPTX::SUST_P_3D_V2B8_TRAP:
+ case NVPTX::SUST_P_3D_V2B16_TRAP:
+ case NVPTX::SUST_P_3D_V2B32_TRAP:
+ case NVPTX::SUST_P_3D_V4B8_TRAP:
+ case NVPTX::SUST_P_3D_V4B16_TRAP:
+ case NVPTX::SUST_P_3D_V4B32_TRAP: {
+ // This is a surface store, so operand 0 is a surfref
+ MachineOperand &SurfHandle = MI.getOperand(0);
+
+ replaceImageHandle(SurfHandle, MF);
+
+ return true;
+ }
+ case NVPTX::TXQ_CHANNEL_ORDER:
+ case NVPTX::TXQ_CHANNEL_DATA_TYPE:
+ case NVPTX::TXQ_WIDTH:
+ case NVPTX::TXQ_HEIGHT:
+ case NVPTX::TXQ_DEPTH:
+ case NVPTX::TXQ_ARRAY_SIZE:
+ case NVPTX::TXQ_NUM_SAMPLES:
+ case NVPTX::TXQ_NUM_MIPMAP_LEVELS:
+ case NVPTX::SUQ_CHANNEL_ORDER:
+ case NVPTX::SUQ_CHANNEL_DATA_TYPE:
+ case NVPTX::SUQ_WIDTH:
+ case NVPTX::SUQ_HEIGHT:
+ case NVPTX::SUQ_DEPTH:
+ case NVPTX::SUQ_ARRAY_SIZE: {
+ // This is a query, so operand 1 is a surfref/texref
+ MachineOperand &Handle = MI.getOperand(1);
+
+ replaceImageHandle(Handle, MF);
+
+ return true;
+ }
+ }
+}
+
+void NVPTXReplaceImageHandles::
+replaceImageHandle(MachineOperand &Op, MachineFunction &MF) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ NVPTXMachineFunctionInfo *MFI = MF.getInfo<NVPTXMachineFunctionInfo>();
+ // Which instruction defines the handle?
+ MachineInstr *MI = MRI.getVRegDef(Op.getReg());
+ assert(MI && "No def for image handle vreg?");
+ MachineInstr &TexHandleDef = *MI;
+
+ switch (TexHandleDef.getOpcode()) {
+ case NVPTX::LD_i64_avar: {
+ // The handle is a parameter value being loaded, replace with the
+ // parameter symbol
+ assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!");
+ StringRef Sym = TexHandleDef.getOperand(6).getSymbolName();
+ std::string ParamBaseName = MF.getName();
+ ParamBaseName += "_param_";
+ assert(Sym.startswith(ParamBaseName) && "Invalid symbol reference");
+ unsigned Param = atoi(Sym.data()+ParamBaseName.size());
+ std::string NewSym;
+ raw_string_ostream NewSymStr(NewSym);
+ NewSymStr << MF.getFunction()->getName() << "_param_" << Param;
+ Op.ChangeToImmediate(
+ MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str()));
+ InstrsToRemove.insert(&TexHandleDef);
+ break;
+ }
+ case NVPTX::texsurf_handles: {
+ // The handle is a global variable, replace with the global variable name
+ assert(TexHandleDef.getOperand(1).isGlobal() && "Load is not a global!");
+ const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal();
+ assert(GV->hasName() && "Global sampler must be named!");
+ Op.ChangeToImmediate(MFI->getImageHandleSymbolIndex(GV->getName().data()));
+ InstrsToRemove.insert(&TexHandleDef);
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown instruction operating on handle");
+ }
+}
+
+MachineFunctionPass *llvm::createNVPTXReplaceImageHandlesPass() {
+ return new NVPTXReplaceImageHandles();
+}
diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h
index f8a692e..aa0436b 100644
--- a/lib/Target/NVPTX/NVPTXSection.h
+++ b/lib/Target/NVPTX/NVPTXSection.h
@@ -31,16 +31,16 @@ public:
/// Override this as NVPTX has its own way of printing switching
/// to a section.
- virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
- raw_ostream &OS,
- const MCExpr *Subsection) const {}
+ void PrintSwitchToSection(const MCAsmInfo &MAI,
+ raw_ostream &OS,
+ const MCExpr *Subsection) const override {}
/// Base address of PTX sections is zero.
- virtual bool isBaseAddressKnownZero() const { return true; }
- virtual bool UseCodeAlign() const { return false; }
- virtual bool isVirtualSection() const { return false; }
- virtual std::string getLabelBeginName() const { return ""; }
- virtual std::string getLabelEndName() const { return ""; }
+ bool isBaseAddressKnownZero() const override { return true; }
+ bool UseCodeAlign() const override { return false; }
+ bool isVirtualSection() const override { return false; }
+ std::string getLabelBeginName() const override { return ""; }
+ std::string getLabelEndName() const override { return ""; }
};
} // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 9771a17..8c7df52 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -12,14 +12,16 @@
//===----------------------------------------------------------------------===//
#include "NVPTXSubtarget.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "nvptx-subtarget"
+
#define GET_SUBTARGETINFO_ENUM
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "NVPTXGenSubtargetInfo.inc"
-using namespace llvm;
-
-
// Pin the vtable to this file.
void NVPTXSubtarget::anchor() {}
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index f99bebd..581e5ed 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -16,12 +16,11 @@
#include "NVPTX.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
#define GET_SUBTARGETINFO_HEADER
#include "NVPTXGenSubtargetInfo.inc"
-#include <string>
-
namespace llvm {
class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
@@ -65,6 +64,10 @@ public:
inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); }
inline bool hasROT64() const { return SmVersion >= 20; }
+ bool hasImageHandles() const {
+ // Currently disabled
+ return false;
+ }
bool is64Bit() const { return Is64Bit; }
unsigned int getSmVersion() const { return SmVersion; }
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 7d7d793..26a4f84 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -16,7 +16,6 @@
#include "NVPTX.h"
#include "NVPTXAllocaHoisting.h"
#include "NVPTXLowerAggrCopies.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
@@ -50,6 +49,7 @@ namespace llvm {
void initializeNVVMReflectPass(PassRegistry&);
void initializeGenericToNVVMPass(PassRegistry&);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
+void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
}
extern "C" void LLVMInitializeNVPTXTarget() {
@@ -62,6 +62,8 @@ extern "C" void LLVMInitializeNVPTXTarget() {
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
+ initializeNVPTXFavorNonGenericAddrSpacesPass(
+ *PassRegistry::getPassRegistry());
}
static std::string computeDataLayout(const NVPTXSubtarget &ST) {
@@ -113,14 +115,14 @@ public:
return getTM<NVPTXTargetMachine>();
}
- virtual void addIRPasses();
- virtual bool addInstSelector();
- virtual bool addPreRegAlloc();
- virtual bool addPostRegAlloc();
+ void addIRPasses() override;
+ bool addInstSelector() override;
+ bool addPreRegAlloc() override;
+ bool addPostRegAlloc() override;
- virtual FunctionPass *createTargetRegisterAllocator(bool) override;
- virtual void addFastRegAlloc(FunctionPass *RegAllocPass);
- virtual void addOptimizedRegAlloc(FunctionPass *RegAllocPass);
+ FunctionPass *createTargetRegisterAllocator(bool) override;
+ void addFastRegAlloc(FunctionPass *RegAllocPass) override;
+ void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
};
} // end anonymous namespace
@@ -140,15 +142,42 @@ void NVPTXPassConfig::addIRPasses() {
disablePass(&BranchFolderPassID);
disablePass(&TailDuplicateID);
+ addPass(createNVPTXImageOptimizerPass());
TargetPassConfig::addIRPasses();
addPass(createNVPTXAssignValidGlobalNamesPass());
addPass(createGenericToNVVMPass());
+ addPass(createNVPTXFavorNonGenericAddrSpacesPass());
+ addPass(createSeparateConstOffsetFromGEPPass());
+ // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used
+ // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates
+ // significantly better code than EarlyCSE for some of our benchmarks.
+ if (getOptLevel() == CodeGenOpt::Aggressive)
+ addPass(createGVNPass());
+ else
+ addPass(createEarlyCSEPass());
+ // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave
+ // some dead code. We could remove dead code in an ad-hoc manner, but that
+ // requires manual work and might be error-prone.
+ //
+ // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts,
+ // and leave them unused.
+ //
+ // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the
+ // old index and some of its intermediate results may become unused.
+ addPass(createDeadCodeEliminationPass());
}
bool NVPTXPassConfig::addInstSelector() {
+ const NVPTXSubtarget &ST =
+ getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>();
+
addPass(createLowerAggrCopies());
addPass(createAllocaHoisting());
addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
+
+ if (!ST.hasImageHandles())
+ addPass(createNVPTXReplaceImageHandlesPass());
+
return false;
}
@@ -159,7 +188,7 @@ bool NVPTXPassConfig::addPostRegAlloc() {
}
FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
- return 0; // No reg alloc
+ return nullptr; // No reg alloc
}
void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index 5fbcf73..2db7c18 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -51,22 +51,22 @@ public:
const TargetOptions &Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
- virtual const TargetFrameLowering *getFrameLowering() const {
+ const TargetFrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
- virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const DataLayout *getDataLayout() const { return &DL; }
- virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; }
- virtual const NVPTXRegisterInfo *getRegisterInfo() const {
+ const NVPTXRegisterInfo *getRegisterInfo() const override {
return &(InstrInfo.getRegisterInfo());
}
- virtual NVPTXTargetLowering *getTargetLowering() const {
+ NVPTXTargetLowering *getTargetLowering() const override {
return const_cast<NVPTXTargetLowering *>(&TLInfo);
}
- virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const {
+ const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
@@ -79,17 +79,17 @@ public:
return const_cast<ManagedStringPool *>(&ManagedStrPool);
}
- virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
// Emission of machine code through JITCodeEmitter is not supported.
- virtual bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &,
- bool = true) {
+ bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &,
+ bool = true) override {
return true;
}
// Emission of machine code through MCJIT is not supported.
- virtual bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &,
- bool = true) {
+ bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &,
+ bool = true) override {
return true;
}
diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index 2a7281e..0b438c5 100644
--- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -22,26 +22,26 @@ class NVPTXTargetObjectFile : public TargetLoweringObjectFile {
public:
NVPTXTargetObjectFile() {
- TextSection = 0;
- DataSection = 0;
- BSSSection = 0;
- ReadOnlySection = 0;
+ TextSection = nullptr;
+ DataSection = nullptr;
+ BSSSection = nullptr;
+ ReadOnlySection = nullptr;
- StaticCtorSection = 0;
- StaticDtorSection = 0;
- LSDASection = 0;
- EHFrameSection = 0;
- DwarfAbbrevSection = 0;
- DwarfInfoSection = 0;
- DwarfLineSection = 0;
- DwarfFrameSection = 0;
- DwarfPubTypesSection = 0;
- DwarfDebugInlineSection = 0;
- DwarfStrSection = 0;
- DwarfLocSection = 0;
- DwarfARangesSection = 0;
- DwarfRangesSection = 0;
- DwarfMacroInfoSection = 0;
+ StaticCtorSection = nullptr;
+ StaticDtorSection = nullptr;
+ LSDASection = nullptr;
+ EHFrameSection = nullptr;
+ DwarfAbbrevSection = nullptr;
+ DwarfInfoSection = nullptr;
+ DwarfLineSection = nullptr;
+ DwarfFrameSection = nullptr;
+ DwarfPubTypesSection = nullptr;
+ DwarfDebugInlineSection = nullptr;
+ DwarfStrSection = nullptr;
+ DwarfLocSection = nullptr;
+ DwarfARangesSection = nullptr;
+ DwarfRangesSection = nullptr;
+ DwarfMacroInfoSection = nullptr;
}
virtual ~NVPTXTargetObjectFile();
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
index 60a5173..a9fd190b 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -22,9 +22,9 @@
#include <map>
#include <string>
#include <vector>
-//#include <iostream>
#include "llvm/Support/ManagedStatic.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/Support/MutexGuard.h"
using namespace llvm;
@@ -33,8 +33,15 @@ typedef std::map<const GlobalValue *, key_val_pair_t> global_val_annot_t;
typedef std::map<const Module *, global_val_annot_t> per_module_annot_t;
ManagedStatic<per_module_annot_t> annotationCache;
+static sys::Mutex Lock;
+
+void llvm::clearAnnotationCache(const llvm::Module *Mod) {
+ MutexGuard Guard(Lock);
+ annotationCache->erase(Mod);
+}
static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
+ MutexGuard Guard(Lock);
assert(md && "Invalid mdnode for annotation");
assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands");
// start index = 1, to skip the global variable key
@@ -60,6 +67,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
}
static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
+ MutexGuard Guard(Lock);
NamedMDNode *NMD = m->getNamedMetadata(llvm::NamedMDForAnnotations);
if (!NMD)
return;
@@ -92,6 +100,7 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
bool llvm::findOneNVVMAnnotation(const GlobalValue *gv, std::string prop,
unsigned &retval) {
+ MutexGuard Guard(Lock);
const Module *m = gv->getParent();
if ((*annotationCache).find(m) == (*annotationCache).end())
cacheAnnotationFromMD(m, gv);
@@ -105,6 +114,7 @@ bool llvm::findOneNVVMAnnotation(const GlobalValue *gv, std::string prop,
bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop,
std::vector<unsigned> &retval) {
+ MutexGuard Guard(Lock);
const Module *m = gv->getParent();
if ((*annotationCache).find(m) == (*annotationCache).end())
cacheAnnotationFromMD(m, gv);
@@ -195,8 +205,37 @@ bool llvm::isImageWriteOnly(const llvm::Value &val) {
return false;
}
+bool llvm::isImageReadWrite(const llvm::Value &val) {
+ if (const Argument *arg = dyn_cast<Argument>(&val)) {
+ const Function *func = arg->getParent();
+ std::vector<unsigned> annot;
+ if (llvm::findAllNVVMAnnotation(func,
+ llvm::PropertyAnnotationNames[
+ llvm::PROPERTY_ISREADWRITE_IMAGE_PARAM],
+ annot)) {
+ if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
+ return true;
+ }
+ }
+ return false;
+}
+
bool llvm::isImage(const llvm::Value &val) {
- return llvm::isImageReadOnly(val) || llvm::isImageWriteOnly(val);
+ return llvm::isImageReadOnly(val) || llvm::isImageWriteOnly(val) ||
+ llvm::isImageReadWrite(val);
+}
+
+bool llvm::isManaged(const llvm::Value &val) {
+ if(const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
+ unsigned annot;
+ if(llvm::findOneNVVMAnnotation(gv,
+ llvm::PropertyAnnotationNames[llvm::PROPERTY_MANAGED],
+ annot)) {
+ assert((annot == 1) && "Unexpected annotation on a managed symbol");
+ return true;
+ }
+ }
+ return false;
}
std::string llvm::getTextureName(const llvm::Value &val) {
@@ -354,12 +393,12 @@ llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) {
const Value *
llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) {
if (processed.find(V) != processed.end())
- return NULL;
+ return nullptr;
processed.insert(V);
const Value *V2 = V->stripPointerCasts();
if (V2 != V && processed.find(V2) != processed.end())
- return NULL;
+ return nullptr;
processed.insert(V2);
V = V2;
@@ -375,20 +414,20 @@ llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) {
continue;
} else if (const PHINode *PN = dyn_cast<PHINode>(V)) {
if (V != V2 && processed.find(V) != processed.end())
- return NULL;
+ return nullptr;
processed.insert(PN);
- const Value *common = 0;
+ const Value *common = nullptr;
for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
const Value *pv = PN->getIncomingValue(i);
const Value *base = skipPointerTransfer(pv, processed);
if (base) {
- if (common == 0)
+ if (!common)
common = base;
else if (common != base)
return PN;
}
}
- if (common == 0)
+ if (!common)
return PN;
V = common;
}
@@ -406,7 +445,7 @@ BasicBlock *llvm::getParentBlock(Value *v) {
if (Instruction *I = dyn_cast<Instruction>(v))
return I->getParent();
- return 0;
+ return nullptr;
}
Function *llvm::getParentFunction(Value *v) {
@@ -419,13 +458,13 @@ Function *llvm::getParentFunction(Value *v) {
if (BasicBlock *B = dyn_cast<BasicBlock>(v))
return B->getParent();
- return 0;
+ return nullptr;
}
// Dump a block by name
void llvm::dumpBlock(Value *v, char *blockName) {
Function *F = getParentFunction(v);
- if (F == 0)
+ if (!F)
return;
for (Function::iterator it = F->begin(), ie = F->end(); it != ie; ++it) {
@@ -440,8 +479,8 @@ void llvm::dumpBlock(Value *v, char *blockName) {
// Find an instruction by name
Instruction *llvm::getInst(Value *base, char *instName) {
Function *F = getParentFunction(base);
- if (F == 0)
- return 0;
+ if (!F)
+ return nullptr;
for (inst_iterator it = inst_begin(F), ie = inst_end(F); it != ie; ++it) {
Instruction *I = &*it;
@@ -450,7 +489,7 @@ Instruction *llvm::getInst(Value *base, char *instName) {
}
}
- return 0;
+ return nullptr;
}
// Dump an instruction by nane
diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h
index a208004..446bfa1 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/lib/Target/NVPTX/NVPTXUtilities.h
@@ -28,6 +28,8 @@ namespace llvm {
#define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly"
#define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly"
+void clearAnnotationCache(const llvm::Module *);
+
bool findOneNVVMAnnotation(const llvm::GlobalValue *, std::string, unsigned &);
bool findAllNVVMAnnotation(const llvm::GlobalValue *, std::string,
std::vector<unsigned> &);
@@ -38,6 +40,8 @@ bool isSampler(const llvm::Value &);
bool isImage(const llvm::Value &);
bool isImageReadOnly(const llvm::Value &);
bool isImageWriteOnly(const llvm::Value &);
+bool isImageReadWrite(const llvm::Value &);
+bool isManaged(const llvm::Value &);
std::string getTextureName(const llvm::Value &);
std::string getSurfaceName(const llvm::Value &);
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
index 8b5444a..cb8bd72 100644
--- a/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -38,6 +38,8 @@
using namespace llvm;
+#define DEBUG_TYPE "nvptx-reflect"
+
namespace llvm { void initializeNVVMReflectPass(PassRegistry &); }
namespace {
@@ -49,13 +51,13 @@ private:
public:
static char ID;
- NVVMReflect() : ModulePass(ID), ReflectFunction(0) {
+ NVVMReflect() : ModulePass(ID), ReflectFunction(nullptr) {
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
VarMap.clear();
}
NVVMReflect(const StringMap<int> &Mapping)
- : ModulePass(ID), ReflectFunction(0) {
+ : ModulePass(ID), ReflectFunction(nullptr) {
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
for (StringMap<int>::const_iterator I = Mapping.begin(), E = Mapping.end();
I != E; ++I) {
@@ -63,8 +65,10 @@ public:
}
}
- void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); }
- virtual bool runOnModule(Module &);
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+ bool runOnModule(Module &) override;
void setVarMap();
};
@@ -126,7 +130,7 @@ bool NVVMReflect::runOnModule(Module &M) {
// If reflect function is not used, then there will be
// no entry in the module.
- if (ReflectFunction == 0)
+ if (!ReflectFunction)
return false;
// Validate _reflect function