diff options
Diffstat (limited to 'lib/Target')
60 files changed, 22984 insertions, 1 deletions
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index 8ec5673..045ab9e 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 Mips PTX PowerPC Sparc X86 XCore +subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PTX PowerPC Sparc X86 XCore ; This is a special group whose required libraries are extended (by llvm-build) ; with the best execution engine (the native JIT, if available, or the diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt new file mode 100644 index 0000000..a32a78a --- /dev/null +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -0,0 +1,33 @@ +set(LLVM_TARGET_DEFINITIONS NVPTX.td) + + +tablegen(LLVM NVPTXGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM NVPTXGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM NVPTXGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM NVPTXGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget) +add_public_tablegen_target(NVPTXCommonTableGen) + +set(NVPTXCodeGen_sources + NVPTXFrameLowering.cpp + NVPTXInstrInfo.cpp + NVPTXISelDAGToDAG.cpp + NVPTXISelLowering.cpp + NVPTXRegisterInfo.cpp + NVPTXSubtarget.cpp + NVPTXTargetMachine.cpp + NVPTXSplitBBatBar.cpp + NVPTXLowerAggrCopies.cpp + NVPTXutil.cpp + NVPTXAllocaHoisting.cpp + NVPTXAsmPrinter.cpp + NVPTXUtilities.cpp + VectorElementize.cpp + ) + +add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources}) + + +add_subdirectory(TargetInfo) +add_subdirectory(InstPrinter) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/NVPTX/InstPrinter/CMakeLists.txt b/lib/Target/NVPTX/InstPrinter/CMakeLists.txt new file mode 100644 index 0000000..ae4c751 --- /dev/null +++ b/lib/Target/NVPTX/InstPrinter/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMNVPTXAsmPrinter + NVPTXInstPrinter.cpp + ) + +add_dependencies(LLVMNVPTXAsmPrinter NVPTXCommonTableGen) diff --git a/lib/Target/NVPTX/InstPrinter/LLVMBuild.txt b/lib/Target/NVPTX/InstPrinter/LLVMBuild.txt new file mode 100644 index 0000000..032b573 --- /dev/null +++ b/lib/Target/NVPTX/InstPrinter/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/NVPTX/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = NVPTXAsmPrinter +parent = NVPTX +required_libraries = MC Support +add_to_library_groups = NVPTX diff --git a/lib/Target/NVPTX/InstPrinter/Makefile b/lib/Target/NVPTX/InstPrinter/Makefile new file mode 100644 index 0000000..7b78654 --- /dev/null +++ b/lib/Target/NVPTX/InstPrinter/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/NVPTX/AsmPrinter/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMNVPTXAsmPrinter + +# Hack: we need to include 'main' ptx target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp new file mode 100644 index 0000000..10051c7 --- /dev/null +++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp @@ -0,0 +1 @@ +// Placeholder diff --git a/lib/Target/NVPTX/LLVMBuild.txt b/lib/Target/NVPTX/LLVMBuild.txt new file mode 100644 index 0000000..e2d6ed2 --- /dev/null +++ b/lib/Target/NVPTX/LLVMBuild.txt @@ -0,0 +1,32 @@ +;===- ./lib/Target/NVPTX/LLVMBuild.txt -------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = InstPrinter MCTargetDesc TargetInfo + +[component_0] +type = TargetGroup +name = NVPTX +parent = Target +has_asmprinter = 1 + +[component_1] +type = Library +name = NVPTXCodeGen +parent = NVPTX +required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXDesc NVPTXInfo SelectionDAG Support Target TransformUtils +add_to_library_groups = NVPTX diff --git a/lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt b/lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt new file mode 100644 index 0000000..a030d9f --- /dev/null +++ b/lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,9 @@ +add_llvm_library(LLVMNVPTXDesc + NVPTXMCAsmInfo.cpp + NVPTXMCTargetDesc.cpp + ) + +add_dependencies(LLVMNVPTXDesc NVPTXCommonTableGen) + +# Hack: we need to include 'main' target directory to grab private headers +#include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt b/lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000..01a051a --- /dev/null +++ b/lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = NVPTXDesc +parent = NVPTX +required_libraries = MC NVPTXAsmPrinter NVPTXInfo Support +add_to_library_groups = NVPTX diff --git a/lib/Target/NVPTX/MCTargetDesc/Makefile b/lib/Target/NVPTX/MCTargetDesc/Makefile new file mode 100644 index 0000000..31d06cb --- /dev/null +++ b/lib/Target/NVPTX/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/NVPTX/TargetDesc/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMNVPTXDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h new file mode 100644 index 0000000..4545838 --- /dev/null +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h @@ -0,0 +1,88 @@ +//===-- NVPTXBaseInfo.h - Top-level definitions for NVPTX -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the NVPTX target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTXBASEINFO_H +#define NVPTXBASEINFO_H + +namespace llvm { + +enum AddressSpace { + ADDRESS_SPACE_GENERIC = 0, + ADDRESS_SPACE_GLOBAL = 1, + ADDRESS_SPACE_CONST_NOT_GEN = 2, // Not part of generic space + ADDRESS_SPACE_SHARED = 3, + ADDRESS_SPACE_CONST = 4, + ADDRESS_SPACE_LOCAL = 5, + + // NVVM Internal + ADDRESS_SPACE_PARAM = 101 +}; + +enum PropertyAnnotation { + PROPERTY_MAXNTID_X = 0, + PROPERTY_MAXNTID_Y, + PROPERTY_MAXNTID_Z, + PROPERTY_REQNTID_X, + PROPERTY_REQNTID_Y, + PROPERTY_REQNTID_Z, + PROPERTY_MINNCTAPERSM, + PROPERTY_ISTEXTURE, + PROPERTY_ISSURFACE, + PROPERTY_ISSAMPLER, + PROPERTY_ISREADONLY_IMAGE_PARAM, + PROPERTY_ISWRITEONLY_IMAGE_PARAM, + PROPERTY_ISKERNEL_FUNCTION, + PROPERTY_ALIGN, + + // last property + PROPERTY_LAST +}; + +const unsigned AnnotationNameLen = 8; // length of each annotation name +const char +PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = { + "maxntidx", // PROPERTY_MAXNTID_X + "maxntidy", // PROPERTY_MAXNTID_Y + "maxntidz", // PROPERTY_MAXNTID_Z + "reqntidx", // PROPERTY_REQNTID_X + "reqntidy", // PROPERTY_REQNTID_Y + "reqntidz", // PROPERTY_REQNTID_Z + "minctasm", // PROPERTY_MINNCTAPERSM + "texture", // PROPERTY_ISTEXTURE + "surface", // PROPERTY_ISSURFACE + "sampler", // PROPERTY_ISSAMPLER + "rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM + "wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM + "kernel", // PROPERTY_ISKERNEL_FUNCTION + "align", // PROPERTY_ALIGN + + // last property + "proplast", // PROPERTY_LAST +}; + +// name of named metadata used for global annotations +#if defined(__GNUC__) +// As this is declared to be static but some of the .cpp files that +// include NVVM.h do not use this array, gcc gives a warning when +// compiling those .cpp files, hence __attribute__((unused)). +__attribute__((unused)) +#endif +static const char* NamedMDForAnnotations = "nvvm.annotations"; + +} + + +#endif diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp new file mode 100644 index 0000000..1d41665 --- /dev/null +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -0,0 +1,63 @@ +//===-- NVPTXMCAsmInfo.cpp - NVPTX asm properties -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the NVPTXMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "NVPTXMCAsmInfo.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +bool CompileForDebugging; + +// -debug-compile - Command line option to inform opt and llc passes to +// compile for debugging +static cl::opt<bool, true> +Debug("debug-compile", cl::desc("Compile for debugging"), cl::Hidden, + cl::location(CompileForDebugging), + cl::init(false)); + +void NVPTXMCAsmInfo::anchor() { } + +NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) { + Triple TheTriple(TT); + if (TheTriple.getArch() == Triple::nvptx64) + PointerSize = 8; + + CommentString = "//"; + + PrivateGlobalPrefix = "$L__"; + + AllowPeriodsInName = false; + + HasSetDirective = false; + + HasSingleParameterDotFile = false; + + InlineAsmStart = " inline asm"; + InlineAsmEnd = " inline asm"; + + SupportsDebugInformation = CompileForDebugging; + HasDotTypeDotSizeDirective = false; + + Data8bitsDirective = " .b8 "; + Data16bitsDirective = " .b16 "; + Data32bitsDirective = " .b32 "; + Data64bitsDirective = " .b64 "; + PrivateGlobalPrefix = ""; + ZeroDirective = " .b8"; + AsciiDirective = " .b8"; + AscizDirective = " .b8"; + + // @TODO: Can we just disable this? + GlobalDirective = "\t// .globl\t"; +} diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h new file mode 100644 index 0000000..82097da --- /dev/null +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h @@ -0,0 +1,30 @@ +//===-- NVPTXMCAsmInfo.h - NVPTX asm properties ----------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the NVPTXMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTX_MCASM_INFO_H +#define NVPTX_MCASM_INFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { +class Target; +class StringRef; + +class NVPTXMCAsmInfo : public MCAsmInfo { + virtual void anchor(); +public: + explicit NVPTXMCAsmInfo(const Target &T, const StringRef &TT); +}; +} // namespace llvm + +#endif // NVPTX_MCASM_INFO_H diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp new file mode 100644 index 0000000..44aa01c --- /dev/null +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp @@ -0,0 +1,91 @@ +//===-- NVPTXMCTargetDesc.cpp - NVPTX Target Descriptions -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides NVPTX specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "NVPTXMCTargetDesc.h" +#include "NVPTXMCAsmInfo.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "NVPTXGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "NVPTXGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "NVPTXGenRegisterInfo.inc" + + +using namespace llvm; + +static MCInstrInfo *createNVPTXMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitNVPTXMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + // PTX does not have a return address register. + InitNVPTXMCRegisterInfo(X, 0); + return X; +} + +static MCSubtargetInfo *createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitNVPTXMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +static MCCodeGenInfo *createNVPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(RM, CM, OL); + return X; +} + + +// Force static initialization. +extern "C" void LLVMInitializeNVPTXTargetMC() { + // Register the MC asm info. + RegisterMCAsmInfo<NVPTXMCAsmInfo> X(TheNVPTXTarget32); + RegisterMCAsmInfo<NVPTXMCAsmInfo> Y(TheNVPTXTarget64); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget32, + createNVPTXMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget64, + createNVPTXMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget32, createNVPTXMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget64, createNVPTXMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget32, + createNVPTXMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget64, + createNVPTXMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget32, + createNVPTXMCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget64, + createNVPTXMCSubtargetInfo); + +} diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h new file mode 100644 index 0000000..af95c76 --- /dev/null +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h @@ -0,0 +1,36 @@ +//===-- NVPTXMCTargetDesc.h - NVPTX Target Descriptions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides NVPTX specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTXMCTARGETDESC_H +#define NVPTXMCTARGETDESC_H + +namespace llvm { +class Target; + +extern Target TheNVPTXTarget32; +extern Target TheNVPTXTarget64; + +} // End llvm namespace + +// Defines symbolic names for PTX registers. +#define GET_REGINFO_ENUM +#include "NVPTXGenRegisterInfo.inc" + +// Defines symbolic names for the PTX instructions. +#define GET_INSTRINFO_ENUM +#include "NVPTXGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "NVPTXGenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/NVPTX/Makefile b/lib/Target/NVPTX/Makefile new file mode 100644 index 0000000..8db20eb --- /dev/null +++ b/lib/Target/NVPTX/Makefile @@ -0,0 +1,23 @@ +##===- lib/Target/NVPTX/Makefile ---------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMNVPTXCodeGen +TARGET = NVPTX + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = NVPTXGenAsmWriter.inc \ + NVPTXGenDAGISel.inc \ + NVPTXGenInstrInfo.inc \ + NVPTXGenRegisterInfo.inc \ + NVPTXGenSubtargetInfo.inc + +DIRS = InstPrinter TargetInfo MCTargetDesc + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h new file mode 100644 index 0000000..b568488 --- /dev/null +++ b/lib/Target/NVPTX/ManagedStringPool.h @@ -0,0 +1,49 @@ +//===-- ManagedStringPool.h - Managed String Pool ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The strings allocated from a managed string pool are owned by the string +// pool and will be deleted together with the managed string pool. +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_SUPPORT_MANAGED_STRING_H +#define LLVM_SUPPORT_MANAGED_STRING_H + +#include "llvm/ADT/SmallVector.h" +#include <string> + +namespace llvm { + +/// ManagedStringPool - The strings allocated from a managed string pool are +/// owned by the string pool and will be deleted together with the managed +/// string pool. +class ManagedStringPool { + SmallVector<std::string *, 8> Pool; + +public: + ManagedStringPool() {} + ~ManagedStringPool() { + SmallVector<std::string *, 8>::iterator Current = Pool.begin(); + while (Current != Pool.end()) { + delete *Current; + Current++; + } + } + + std::string *getManagedString(const char *S) { + std::string *Str = new std::string(S); + Pool.push_back(Str); + return Str; + } +}; + +} + +#endif diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h new file mode 100644 index 0000000..ebdf423 --- /dev/null +++ b/lib/Target/NVPTX/NVPTX.h @@ -0,0 +1,137 @@ +//===-- NVPTX.h - Top-level interface for NVPTX representation --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in +// the LLVM NVPTX back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_NVPTX_H +#define LLVM_TARGET_NVPTX_H + +#include <cassert> +#include <iosfwd> +#include "llvm/Value.h" +#include "llvm/Module.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetMachine.h" +#include "MCTargetDesc/NVPTXBaseInfo.h" + +namespace llvm { +class NVPTXTargetMachine; +class FunctionPass; +class formatted_raw_ostream; + +namespace NVPTXCC { +enum CondCodes { + EQ, + NE, + LT, + LE, + GT, + GE +}; +} + +inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) { + switch (CC) { + default: assert(0 && "Unknown condition code"); + case NVPTXCC::NE: return "ne"; + case NVPTXCC::EQ: return "eq"; + case NVPTXCC::LT: return "lt"; + case NVPTXCC::LE: return "le"; + case NVPTXCC::GT: return "gt"; + case NVPTXCC::GE: return "ge"; + } +} + +FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM, + llvm::CodeGenOpt::Level OptLevel); +FunctionPass *createVectorElementizePass(NVPTXTargetMachine &); +FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &); +FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &); +FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &); + +bool isImageOrSamplerVal(const Value *, const Module *); + +extern Target TheNVPTXTarget32; +extern Target TheNVPTXTarget64; + +namespace NVPTX +{ +enum DrvInterface { + NVCL, + CUDA, + TEST +}; + +// A field inside TSFlags needs a shift and a mask. The usage is +// always as follows : +// ((TSFlags & fieldMask) >> fieldShift) +// The enum keeps the mask, the shift, and all valid values of the +// field in one place. +enum VecInstType { + VecInstTypeShift = 0, + VecInstTypeMask = 0xF, + + VecNOP = 0, + VecLoad = 1, + VecStore = 2, + VecBuild = 3, + VecShuffle = 4, + VecExtract = 5, + VecInsert = 6, + VecDest = 7, + VecOther = 15 +}; + +enum SimpleMove { + SimpleMoveMask = 0x10, + SimpleMoveShift = 4 +}; +enum LoadStore { + isLoadMask = 0x20, + isLoadShift = 5, + isStoreMask = 0x40, + isStoreShift = 6 +}; + +namespace PTXLdStInstCode { +enum AddressSpace{ + GENERIC = 0, + GLOBAL = 1, + CONSTANT = 2, + SHARED = 3, + PARAM = 4, + LOCAL = 5 +}; +enum FromType { + Unsigned = 0, + Signed, + Float +}; +enum VecType { + Scalar = 1, + V2 = 2, + V4 = 4 +}; +} +} +} // end namespace llvm; + +// Defines symbolic names for NVPTX registers. This defines a mapping from +// register name to register number. +#define GET_REGINFO_ENUM +#include "NVPTXGenRegisterInfo.inc" + +// Defines symbolic names for the NVPTX instructions. +#define GET_INSTRINFO_ENUM +#include "NVPTXGenInstrInfo.inc" + +#endif diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td new file mode 100644 index 0000000..ae7710e --- /dev/null +++ b/lib/Target/NVPTX/NVPTX.td @@ -0,0 +1,44 @@ +//===- NVPTX.td - Describe the NVPTX Target Machine -----------*- tblgen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is the top level entry point for the NVPTX target. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +include "NVPTXRegisterInfo.td" +include "NVPTXInstrInfo.td" + +//===----------------------------------------------------------------------===// +// Subtarget Features. +// - We use the SM version number instead of explicit feature table. +// - Need at least one feature to avoid generating zero sized array by +// TableGen in NVPTXGenSubtarget.inc. +//===----------------------------------------------------------------------===// +def FeatureDummy : SubtargetFeature<"dummy", "dummy", "true", "">; + +//===----------------------------------------------------------------------===// +// NVPTX supported processors. +//===----------------------------------------------------------------------===// + +class Proc<string Name, list<SubtargetFeature> Features> + : Processor<Name, NoItineraries, Features>; + +def : Proc<"sm_10", [FeatureDummy]>; + + +def NVPTXInstrInfo : InstrInfo { +} + +def NVPTX : Target { + let InstructionSet = NVPTXInstrInfo; +} diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp new file mode 100644 index 0000000..2706b0b --- /dev/null +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp @@ -0,0 +1,48 @@ +//===-- AllocaHoisting.cpp - Hosist allocas to the entry block --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Hoist the alloca instructions in the non-entry blocks to the entry blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Constants.h" +#include "NVPTXAllocaHoisting.h" + +namespace llvm { + +bool NVPTXAllocaHoisting::runOnFunction(Function &function) { + bool functionModified = false; + Function::iterator I = function.begin(); + TerminatorInst *firstTerminatorInst = (I++)->getTerminator(); + + for (Function::iterator E = function.end(); I != E; ++I) { + for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { + AllocaInst *allocaInst = dyn_cast<AllocaInst>(BI++); + if (allocaInst && isa<ConstantInt>(allocaInst->getArraySize())) { + allocaInst->moveBefore(firstTerminatorInst); + functionModified = true; + } + } + } + + return functionModified; +} + +char NVPTXAllocaHoisting::ID = 1; +RegisterPass<NVPTXAllocaHoisting> X("alloca-hoisting", + "Hoisting alloca instructsion in non-entry " + "blocks to the entry block"); + +FunctionPass *createAllocaHoisting() { + return new NVPTXAllocaHoisting(); +} + +} // end namespace llvm diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h new file mode 100644 index 0000000..24b3bd5 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h @@ -0,0 +1,49 @@ +//===-- AllocaHoisting.h - Hosist allocas to the entry block ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Hoist the alloca instructions in the non-entry blocks to the entry blocks. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTX_ALLOCA_HOISTING_H_ +#define NVPTX_ALLOCA_HOISTING_H_ + +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetData.h" + +namespace llvm { + +class FunctionPass; +class Function; + +// Hoisting the alloca instructions in the non-entry blocks to the entry +// block. +class NVPTXAllocaHoisting : public FunctionPass { +public: + static char ID; // Pass ID + NVPTXAllocaHoisting() : FunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetData>(); + AU.addPreserved<MachineFunctionAnalysis>(); + } + + virtual const char *getPassName() const { + return "NVPTX specific alloca hoisting"; + } + + virtual bool runOnFunction(Function &function); +}; + +extern FunctionPass *createAllocaHoisting(); + +} // end namespace llvm + +#endif // NVPTX_ALLOCA_HOISTING_H_ diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp new file mode 100644 index 0000000..f268b4a --- /dev/null +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -0,0 +1,2068 @@ +//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to NVPTX assembly language. +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "NVPTXInstrInfo.h" +#include "NVPTXTargetMachine.h" +#include "NVPTXRegisterInfo.h" +#include "NVPTXAsmPrinter.h" +#include "MCTargetDesc/NVPTXMCAsmInfo.h" +#include "NVPTXNumRegisters.h" +#include "../lib/CodeGen/AsmPrinter/DwarfDebug.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/DerivedTypes.h" +#include "NVPTXUtilities.h" +#include "llvm/Support/TimeValue.h" +#include <sstream> +#include "llvm/Support/CommandLine.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Support/Path.h" +#include "llvm/Assembly/Writer.h" +#include "cl_common_defines.h" + + +using namespace llvm; + + +#include "NVPTXGenAsmWriter.inc" + +bool RegAllocNilUsed = true; + +#define DEPOTNAME "__local_depot" + +static cl::opt<bool> +EmitLineNumbers("nvptx-emit-line-numbers", + cl::desc("NVPTX Specific: Emit Line numbers even without -G"), + cl::init(true)); + +namespace llvm { +bool InterleaveSrcInPtx = false; +} + +static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src", + cl::ZeroOrMore, + cl::desc("NVPTX Specific: Emit source line in ptx file"), + cl::location(llvm::InterleaveSrcInPtx)); + + + + +// @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we +// cannot just link to the existing version. +/// LowerConstant - Lower the specified LLVM Constant to an MCExpr. +/// +using namespace nvptx; +const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { + MCContext &Ctx = AP.OutContext; + + if (CV->isNullValue() || isa<UndefValue>(CV)) + return MCConstantExpr::Create(0, Ctx); + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) + return MCConstantExpr::Create(CI->getZExtValue(), Ctx); + + if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) + return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx); + + if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) + return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); + + const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); + if (CE == 0) + llvm_unreachable("Unknown constant value to lower!"); + + + switch (CE->getOpcode()) { + default: + // If the code isn't optimized, there may be outstanding folding + // opportunities. Attempt to fold the expression using TargetData as a + // last resort before giving up. + if (Constant *C = + ConstantFoldConstantExpression(CE, AP.TM.getTargetData())) + if (C != CE) + return LowerConstant(C, AP); + + // Otherwise report the problem to the user. + { + std::string S; + raw_string_ostream OS(S); + OS << "Unsupported expression in static initializer: "; + WriteAsOperand(OS, CE, /*PrintType=*/false, + !AP.MF ? 0 : AP.MF->getFunction()->getParent()); + report_fatal_error(OS.str()); + } + case Instruction::GetElementPtr: { + const TargetData &TD = *AP.TM.getTargetData(); + // Generate a symbolic expression for the byte address + const Constant *PtrVal = CE->getOperand(0); + SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end()); + int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec); + + const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); + if (Offset == 0) + return Base; + + // Truncate/sext the offset to the pointer size. + if (TD.getPointerSizeInBits() != 64) { + int SExtAmount = 64-TD.getPointerSizeInBits(); + Offset = (Offset << SExtAmount) >> SExtAmount; + } + + return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), + Ctx); + } + + case Instruction::Trunc: + // We emit the value and depend on the assembler to truncate the generated + // expression properly. This is important for differences between + // blockaddress labels. Since the two labels are in the same function, it + // is reasonable to treat their delta as a 32-bit value. + // FALL THROUGH. + case Instruction::BitCast: + return LowerConstant(CE->getOperand(0), AP); + + case Instruction::IntToPtr: { + const TargetData &TD = *AP.TM.getTargetData(); + // Handle casts to pointers by changing them into casts to the appropriate + // integer type. This promotes constant folding and simplifies this code. + Constant *Op = CE->getOperand(0); + Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), + false/*ZExt*/); + return LowerConstant(Op, AP); + } + + case Instruction::PtrToInt: { + const TargetData &TD = *AP.TM.getTargetData(); + // Support only foldable casts to/from pointers that can be eliminated by + // changing the pointer to the appropriately sized integer type. + Constant *Op = CE->getOperand(0); + Type *Ty = CE->getType(); + + const MCExpr *OpExpr = LowerConstant(Op, AP); + + // We can emit the pointer value into this slot if the slot is an + // integer slot equal to the size of the pointer. + if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) + return OpExpr; + + // Otherwise the pointer is smaller than the resultant integer, mask off + // the high bits so we are sure to get a proper truncation if the input is + // a constant expr. + unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); + const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); + return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); + } + + // The MC library also has a right-shift operator, but it isn't consistently + // signed or unsigned between different targets. + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::SDiv: + case Instruction::SRem: + case Instruction::Shl: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); + const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); + switch (CE->getOpcode()) { + default: llvm_unreachable("Unknown binary operator constant cast expr"); + case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); + case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); + case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); + case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); + case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); + case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); + case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); + case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx); + case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); + } + } + } +} + + +void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) +{ + if (!EmitLineNumbers) + return; + if (ignoreLoc(MI)) + return; + + DebugLoc curLoc = MI.getDebugLoc(); + + if (prevDebugLoc.isUnknown() && curLoc.isUnknown()) + return; + + if (prevDebugLoc == curLoc) + return; + + prevDebugLoc = curLoc; + + if (curLoc.isUnknown()) + return; + + + const MachineFunction *MF = MI.getParent()->getParent(); + //const TargetMachine &TM = MF->getTarget(); + + const LLVMContext &ctx = MF->getFunction()->getContext(); + DIScope Scope(curLoc.getScope(ctx)); + + if (!Scope.Verify()) + return; + + StringRef fileName(Scope.getFilename()); + StringRef dirName(Scope.getDirectory()); + SmallString<128> FullPathName = dirName; + if (!dirName.empty() && !sys::path::is_absolute(fileName)) { + sys::path::append(FullPathName, fileName); + fileName = FullPathName.str(); + } + + if (filenameMap.find(fileName.str()) == filenameMap.end()) + return; + + + // Emit the line from the source file. + if (llvm::InterleaveSrcInPtx) + this->emitSrcInText(fileName.str(), curLoc.getLine()); + + std::stringstream temp; + temp << "\t.loc " << filenameMap[fileName.str()] + << " " << curLoc.getLine() << " " << curLoc.getCol(); + OutStreamer.EmitRawText(Twine(temp.str().c_str())); +} + +void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { + SmallString<128> Str; + raw_svector_ostream OS(Str); + if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) + emitLineNumberAsDotLoc(*MI); + printInstruction(MI, OS); + OutStreamer.EmitRawText(OS.str()); +} + +void NVPTXAsmPrinter::printReturnValStr(const Function *F, + raw_ostream &O) +{ + const TargetData *TD = TM.getTargetData(); + const TargetLowering *TLI = TM.getTargetLowering(); + + Type *Ty = F->getReturnType(); + + bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + + if (Ty->getTypeID() == Type::VoidTyID) + return; + + O << " ("; + + if (isABI) { + if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { + unsigned size = 0; + if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) { + size = ITy->getBitWidth(); + if (size < 32) size = 32; + } else { + assert(Ty->isFloatingPointTy() && + "Floating point type expected here"); + size = Ty->getPrimitiveSizeInBits(); + } + + O << ".param .b" << size << " func_retval0"; + } + else if (isa<PointerType>(Ty)) { + O << ".param .b" << TLI->getPointerTy().getSizeInBits() + << " func_retval0"; + } else { + if ((Ty->getTypeID() == Type::StructTyID) || + isa<VectorType>(Ty)) { + SmallVector<EVT, 16> vtparts; + ComputeValueVTs(*TLI, Ty, vtparts); + unsigned totalsz = 0; + for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + unsigned elems = 1; + EVT elemtype = vtparts[i]; + if (vtparts[i].isVector()) { + elems = vtparts[i].getVectorNumElements(); + elemtype = vtparts[i].getVectorElementType(); + } + for (unsigned j=0, je=elems; j!=je; ++j) { + unsigned sz = elemtype.getSizeInBits(); + if (elemtype.isInteger() && (sz < 8)) sz = 8; + totalsz += sz/8; + } + } + unsigned retAlignment = 0; + if (!llvm::getAlign(*F, 0, retAlignment)) + retAlignment = TD->getABITypeAlignment(Ty); + O << ".param .align " + << retAlignment + << " .b8 func_retval0[" + << totalsz << "]"; + } else + assert(false && + "Unknown return type"); + } + } else { + SmallVector<EVT, 16> vtparts; + ComputeValueVTs(*TLI, Ty, vtparts); + unsigned idx = 0; + for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + unsigned elems = 1; + EVT elemtype = vtparts[i]; + if (vtparts[i].isVector()) { + elems = vtparts[i].getVectorNumElements(); + elemtype = vtparts[i].getVectorElementType(); + } + + for (unsigned j=0, je=elems; j!=je; ++j) { + unsigned sz = elemtype.getSizeInBits(); + if (elemtype.isInteger() && (sz < 32)) sz = 32; + O << ".reg .b" << sz << " func_retval" << idx; + if (j<je-1) O << ", "; + ++idx; + } + if (i < e-1) + O << ", "; + } + } + O << ") "; + return; +} + +void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, + raw_ostream &O) { + const Function *F = MF.getFunction(); + printReturnValStr(F, O); +} + +void NVPTXAsmPrinter::EmitFunctionEntryLabel() { + SmallString<128> Str; + raw_svector_ostream O(Str); + + // Set up + MRI = &MF->getRegInfo(); + F = MF->getFunction(); + emitLinkageDirective(F,O); + if (llvm::isKernelFunction(*F)) + O << ".entry "; + else { + O << ".func "; + printReturnValStr(*MF, O); + } + + O << *CurrentFnSym; + + emitFunctionParamList(*MF, O); + + if (llvm::isKernelFunction(*F)) + emitKernelFunctionDirectives(*F, O); + + OutStreamer.EmitRawText(O.str()); + + prevDebugLoc = DebugLoc(); +} + +void NVPTXAsmPrinter::EmitFunctionBodyStart() { + const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + unsigned numRegClasses = TRI.getNumRegClasses(); + VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses+1]; + OutStreamer.EmitRawText(StringRef("{\n")); + setAndEmitFunctionVirtualRegisters(*MF); + + SmallString<128> Str; + raw_svector_ostream O(Str); + emitDemotedVars(MF->getFunction(), O); + OutStreamer.EmitRawText(O.str()); +} + +void NVPTXAsmPrinter::EmitFunctionBodyEnd() { + OutStreamer.EmitRawText(StringRef("}\n")); + delete []VRidGlobal2LocalMap; +} + + +void +NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function& F, + raw_ostream &O) const { + // If the NVVM IR has some of reqntid* specified, then output + // the reqntid directive, and set the unspecified ones to 1. + // If none of reqntid* is specified, don't output reqntid directive. + unsigned reqntidx, reqntidy, reqntidz; + bool specified = false; + if (llvm::getReqNTIDx(F, reqntidx) == false) reqntidx = 1; + else specified = true; + if (llvm::getReqNTIDy(F, reqntidy) == false) reqntidy = 1; + else specified = true; + if (llvm::getReqNTIDz(F, reqntidz) == false) reqntidz = 1; + else specified = true; + + if (specified) + O << ".reqntid " << reqntidx << ", " + << reqntidy << ", " << reqntidz << "\n"; + + // If the NVVM IR has some of maxntid* specified, then output + // the maxntid directive, and set the unspecified ones to 1. + // If none of maxntid* is specified, don't output maxntid directive. + unsigned maxntidx, maxntidy, maxntidz; + specified = false; + if (llvm::getMaxNTIDx(F, maxntidx) == false) maxntidx = 1; + else specified = true; + if (llvm::getMaxNTIDy(F, maxntidy) == false) maxntidy = 1; + else specified = true; + if (llvm::getMaxNTIDz(F, maxntidz) == false) maxntidz = 1; + else specified = true; + + if (specified) + O << ".maxntid " << maxntidx << ", " + << maxntidy << ", " << maxntidz << "\n"; + + unsigned mincta; + if (llvm::getMinCTASm(F, mincta)) + O << ".minnctapersm " << mincta << "\n"; +} + +void +NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, + raw_ostream &O) { + const TargetRegisterClass * RC = MRI->getRegClass(vr); + unsigned id = RC->getID(); + + std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[id]; + unsigned mapped_vr = regmap[vr]; + + if (!isVec) { + O << getNVPTXRegClassStr(RC) << mapped_vr; + return; + } + // Vector virtual register + if (getNVPTXVectorSize(RC) == 4) + O << "{" + << getNVPTXRegClassStr(RC) << mapped_vr << "_0, " + << getNVPTXRegClassStr(RC) << mapped_vr << "_1, " + << getNVPTXRegClassStr(RC) << mapped_vr << "_2, " + << getNVPTXRegClassStr(RC) << mapped_vr << "_3" + << "}"; + else if (getNVPTXVectorSize(RC) == 2) + O << "{" + << getNVPTXRegClassStr(RC) << mapped_vr << "_0, " + << getNVPTXRegClassStr(RC) << mapped_vr << "_1" + << "}"; + else + assert(0 && "Unsupported vector size"); +} + +void +NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec, + raw_ostream &O) { + getVirtualRegisterName(vr, isVec, O); +} + +void NVPTXAsmPrinter::printVecModifiedImmediate(const MachineOperand &MO, + const char *Modifier, + raw_ostream &O) { +char vecelem[] = {'0', '1', '2', '3', '0', '1', '2', '3'}; + int Imm = (int)MO.getImm(); + if(0 == strcmp(Modifier, "vecelem")) + O << "_" << vecelem[Imm]; + else if(0 == strcmp(Modifier, "vecv4comm1")) { + if((Imm < 0) || (Imm > 3)) + O << "//"; + } + else if(0 == strcmp(Modifier, "vecv4comm2")) { + if((Imm < 4) || (Imm > 7)) + O << "//"; + } + else if(0 == strcmp(Modifier, "vecv4pos")) { + if(Imm < 0) Imm = 0; + O << "_" << vecelem[Imm%4]; + } + else if(0 == strcmp(Modifier, "vecv2comm1")) { + if((Imm < 0) || (Imm > 1)) + O << "//"; + } + else if(0 == strcmp(Modifier, "vecv2comm2")) { + if((Imm < 2) || (Imm > 3)) + O << "//"; + } + else if(0 == strcmp(Modifier, "vecv2pos")) { + if(Imm < 0) Imm = 0; + O << "_" << vecelem[Imm%2]; + } + else + assert(0 && "Unknown Modifier on immediate operand"); +} + +void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, + raw_ostream &O, const char *Modifier) { + const MachineOperand &MO = MI->getOperand(opNum); + switch (MO.getType()) { + case MachineOperand::MO_Register: + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + if (MO.getReg() == NVPTX::VRDepot) + O << DEPOTNAME << getFunctionNumber(); + else + O << getRegisterName(MO.getReg()); + } else { + if (!Modifier) + emitVirtualRegister(MO.getReg(), false, O); + else { + if (strcmp(Modifier, "vecfull") == 0) + emitVirtualRegister(MO.getReg(), true, O); + else + assert(0 && + "Don't know how to handle the modifier on virtual register."); + } + } + return; + + case MachineOperand::MO_Immediate: + if (!Modifier) + O << MO.getImm(); + else if (strstr(Modifier, "vec") == Modifier) + printVecModifiedImmediate(MO, Modifier, O); + else + assert(0 && "Don't know how to handle modifier on immediate operand"); + return; + + case MachineOperand::MO_FPImmediate: + printFPConstant(MO.getFPImm(), O); + break; + + case MachineOperand::MO_GlobalAddress: + O << *Mang->getSymbol(MO.getGlobal()); + break; + + case MachineOperand::MO_ExternalSymbol: { + const char * symbname = MO.getSymbolName(); + if (strstr(symbname, ".PARAM") == symbname) { + unsigned index; + sscanf(symbname+6, "%u[];", &index); + printParamName(index, O); + } + else if (strstr(symbname, ".HLPPARAM") == symbname) { + unsigned index; + sscanf(symbname+9, "%u[];", &index); + O << *CurrentFnSym << "_param_" << index << "_offset"; + } + else + O << symbname; + break; + } + + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + return; + + default: + assert(0 && " Operand type not supported."); + } +} + +void NVPTXAsmPrinter:: +printImplicitDef(const MachineInstr *MI, raw_ostream &O) const { +#ifndef __OPTIMIZE__ + O << "\t// Implicit def :"; + //printOperand(MI, 0); + O << "\n"; +#endif +} + +void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, + raw_ostream &O, const char *Modifier) { + printOperand(MI, opNum, O); + + if (Modifier && !strcmp(Modifier, "add")) { + O << ", "; + printOperand(MI, opNum+1, O); + } else { + if (MI->getOperand(opNum+1).isImm() && + MI->getOperand(opNum+1).getImm() == 0) + return; // don't print ',0' or '+0' + O << "+"; + printOperand(MI, opNum+1, O); + } +} + +void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum, + raw_ostream &O, const char *Modifier) +{ + if (Modifier) { + const MachineOperand &MO = MI->getOperand(opNum); + int Imm = (int)MO.getImm(); + if (!strcmp(Modifier, "volatile")) { + if (Imm) + O << ".volatile"; + } else if (!strcmp(Modifier, "addsp")) { + switch (Imm) { + case NVPTX::PTXLdStInstCode::GLOBAL: O << ".global"; break; + case NVPTX::PTXLdStInstCode::SHARED: O << ".shared"; break; + case NVPTX::PTXLdStInstCode::LOCAL: O << ".local"; break; + case NVPTX::PTXLdStInstCode::PARAM: O << ".param"; break; + case NVPTX::PTXLdStInstCode::CONSTANT: O << ".const"; break; + case NVPTX::PTXLdStInstCode::GENERIC: + if (!nvptxSubtarget.hasGenericLdSt()) + O << ".global"; + break; + default: + assert("wrong value"); + } + } + else if (!strcmp(Modifier, "sign")) { + if (Imm==NVPTX::PTXLdStInstCode::Signed) + O << "s"; + else if (Imm==NVPTX::PTXLdStInstCode::Unsigned) + O << "u"; + else + O << "f"; + } + else if (!strcmp(Modifier, "vec")) { + if (Imm==NVPTX::PTXLdStInstCode::V2) + O << ".v2"; + else if (Imm==NVPTX::PTXLdStInstCode::V4) + O << ".v4"; + } + else + assert("unknown modifier"); + } + else + assert("unknown modifier"); +} + +void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) { + + emitLinkageDirective(F,O); + if (llvm::isKernelFunction(*F)) + O << ".entry "; + else + O << ".func "; + printReturnValStr(F, O); + O << *CurrentFnSym << "\n"; + emitFunctionParamList(F, O); + O << ";\n"; +} + +static bool usedInGlobalVarDef(const Constant *C) +{ + if (!C) + return false; + + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { + if (GV->getName().str() == "llvm.used") + return false; + return true; + } + + for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end(); + ui!=ue; ++ui) { + const Constant *C = dyn_cast<Constant>(*ui); + if (usedInGlobalVarDef(C)) + return true; + } + return false; +} + +static bool usedInOneFunc(const User *U, Function const *&oneFunc) +{ + if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { + if (othergv->getName().str() == "llvm.used") + return true; + } + + if (const Instruction *instr = dyn_cast<Instruction>(U)) { + if (instr->getParent() && instr->getParent()->getParent()) { + const Function *curFunc = instr->getParent()->getParent(); + if (oneFunc && (curFunc != oneFunc)) + return false; + oneFunc = curFunc; + return true; + } + else + return false; + } + + if (const MDNode *md = dyn_cast<MDNode>(U)) + if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") || + (md->getName().str() == "llvm.dbg.sp"))) + return true; + + + for (User::const_use_iterator ui=U->use_begin(), ue=U->use_end(); + ui!=ue; ++ui) { + if (usedInOneFunc(*ui, oneFunc) == false) + return false; + } + return true; +} + +/* Find out if a global variable can be demoted to local scope. + * Currently, this is valid for CUDA shared variables, which have local + * scope and global lifetime. So the conditions to check are : + * 1. Is the global variable in shared address space? + * 2. Does it have internal linkage? + * 3. Is the global variable referenced only in one function? + */ +static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { + if (gv->hasInternalLinkage() == false) + return false; + const PointerType *Pty = gv->getType(); + if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) + return false; + + const Function *oneFunc = 0; + + bool flag = usedInOneFunc(gv, oneFunc); + if (flag == false) + return false; + if (!oneFunc) + return false; + f = oneFunc; + return true; +} + +static bool useFuncSeen(const Constant *C, + llvm::DenseMap<const Function *, bool> &seenMap) { + for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end(); + ui!=ue; ++ui) { + if (const Constant *cu = dyn_cast<Constant>(*ui)) { + if (useFuncSeen(cu, seenMap)) + return true; + } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) { + const BasicBlock *bb = I->getParent(); + if (!bb) continue; + const Function *caller = bb->getParent(); + if (!caller) continue; + if (seenMap.find(caller) != seenMap.end()) + return true; + } + } + return false; +} + +void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) { + llvm::DenseMap<const Function *, bool> seenMap; + for (Module::const_iterator FI=M.begin(), FE=M.end(); + FI!=FE; ++FI) { + const Function *F = FI; + + if (F->isDeclaration()) { + if (F->use_empty()) + continue; + if (F->getIntrinsicID()) + continue; + CurrentFnSym = Mang->getSymbol(F); + emitDeclaration(F, O); + continue; + } + for (Value::const_use_iterator iter=F->use_begin(), + iterEnd=F->use_end(); iter!=iterEnd; ++iter) { + if (const Constant *C = dyn_cast<Constant>(*iter)) { + if (usedInGlobalVarDef(C)) { + // The use is in the initialization of a global variable + // that is a function pointer, so print a declaration + // for the original function + CurrentFnSym = Mang->getSymbol(F); + emitDeclaration(F, O); + break; + } + // Emit a declaration of this function if the function that + // uses this constant expr has already been seen. + if (useFuncSeen(C, seenMap)) { + CurrentFnSym = Mang->getSymbol(F); + emitDeclaration(F, O); + break; + } + } + + if (!isa<Instruction>(*iter)) continue; + const Instruction *instr = cast<Instruction>(*iter); + const BasicBlock *bb = instr->getParent(); + if (!bb) continue; + const Function *caller = bb->getParent(); + if (!caller) continue; + + // If a caller has already been seen, then the caller is + // appearing in the module before the callee. so print out + // a declaration for the callee. + if (seenMap.find(caller) != seenMap.end()) { + CurrentFnSym = Mang->getSymbol(F); + emitDeclaration(F, O); + break; + } + } + seenMap[F] = true; + } +} + +void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { + DebugInfoFinder DbgFinder; + DbgFinder.processModule(M); + + unsigned i=1; + for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), + E = DbgFinder.compile_unit_end(); I != E; ++I) { + DICompileUnit DIUnit(*I); + StringRef Filename(DIUnit.getFilename()); + StringRef Dirname(DIUnit.getDirectory()); + SmallString<128> FullPathName = Dirname; + if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { + sys::path::append(FullPathName, Filename); + Filename = FullPathName.str(); + } + if (filenameMap.find(Filename.str()) != filenameMap.end()) + continue; + filenameMap[Filename.str()] = i; + OutStreamer.EmitDwarfFileDirective(i, "", Filename.str()); + ++i; + } + + for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), + E = DbgFinder.subprogram_end(); I != E; ++I) { + DISubprogram SP(*I); + StringRef Filename(SP.getFilename()); + StringRef Dirname(SP.getDirectory()); + SmallString<128> FullPathName = Dirname; + if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { + sys::path::append(FullPathName, Filename); + Filename = FullPathName.str(); + } + if (filenameMap.find(Filename.str()) != filenameMap.end()) + continue; + filenameMap[Filename.str()] = i; + ++i; + } +} + +bool NVPTXAsmPrinter::doInitialization (Module &M) { + + SmallString<128> Str1; + raw_svector_ostream OS1(Str1); + + MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + MMI->AnalyzeModule(M); + + // We need to call the parent's one explicitly. + //bool Result = AsmPrinter::doInitialization(M); + + // Initialize TargetLoweringObjectFile. + const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) + .Initialize(OutContext, TM); + + Mang = new Mangler(OutContext, *TM.getTargetData()); + + // Emit header before any dwarf directives are emitted below. + emitHeader(M, OS1); + OutStreamer.EmitRawText(OS1.str()); + + + // Already commented out + //bool Result = AsmPrinter::doInitialization(M); + + + if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) + recordAndEmitFilenames(M); + + SmallString<128> Str2; + raw_svector_ostream OS2(Str2); + + emitDeclarations(M, OS2); + + // Print out module-level global variables here. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + printModuleLevelGV(I, OS2); + + OS2 << '\n'; + + OutStreamer.EmitRawText(OS2.str()); + return false; // success +} + +void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) { + O << "//\n"; + O << "// Generated by LLVM NVPTX Back-End\n"; + O << "//\n"; + O << "\n"; + + O << ".version 3.0\n"; + + O << ".target "; + O << nvptxSubtarget.getTargetName(); + + if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) + O << ", texmode_independent"; + if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { + if (!nvptxSubtarget.hasDouble()) + O << ", map_f64_to_f32"; + } + + if (MAI->doesSupportDebugInformation()) + O << ", debug"; + + O << "\n"; + + O << ".address_size "; + if (nvptxSubtarget.is64Bit()) + O << "64"; + else + O << "32"; + O << "\n"; + + O << "\n"; +} + +bool NVPTXAsmPrinter::doFinalization(Module &M) { + // XXX Temproarily remove global variables so that doFinalization() will not + // emit them again (global variables are emitted at beginning). + + Module::GlobalListType &global_list = M.getGlobalList(); + int i, n = global_list.size(); + GlobalVariable **gv_array = new GlobalVariable* [n]; + + // first, back-up GlobalVariable in gv_array + i = 0; + for (Module::global_iterator I = global_list.begin(), E = global_list.end(); + I != E; ++I) + gv_array[i++] = &*I; + + // second, empty global_list + while (!global_list.empty()) + global_list.remove(global_list.begin()); + + // call doFinalization + bool ret = AsmPrinter::doFinalization(M); + + // now we restore global variables + for (i = 0; i < n; i ++) + global_list.insert(global_list.end(), gv_array[i]); + + delete[] gv_array; + return ret; + + + //bool Result = AsmPrinter::doFinalization(M); + // Instead of calling the parents doFinalization, we may + // clone parents doFinalization and customize here. + // Currently, we if NVISA out the EmitGlobals() in + // parent's doFinalization, which is too intrusive. + // + // Same for the doInitialization. + //return Result; +} + +// This function emits appropriate linkage directives for +// functions and global variables. +// +// extern function declaration -> .extern +// extern function definition -> .visible +// external global variable with init -> .visible +// external without init -> .extern +// appending -> not allowed, assert. + +void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O) +{ + if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { + if (V->hasExternalLinkage()) { + if (isa<GlobalVariable>(V)) { + const GlobalVariable *GVar = cast<GlobalVariable>(V); + if (GVar) { + if (GVar->hasInitializer()) + O << ".visible "; + else + O << ".extern "; + } + } else if (V->isDeclaration()) + O << ".extern "; + else + O << ".visible "; + } else if (V->hasAppendingLinkage()) { + std::string msg; + msg.append("Error: "); + msg.append("Symbol "); + if (V->hasName()) + msg.append(V->getName().str()); + msg.append("has unsupported appending linkage type"); + llvm_unreachable(msg.c_str()); + } + } +} + + +void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, + bool processDemoted) { + + // Skip meta data + if (GVar->hasSection()) { + if (GVar->getSection() == "llvm.metadata") + return; + } + + const TargetData *TD = TM.getTargetData(); + + // GlobalVariables are always constant pointers themselves. + const PointerType *PTy = GVar->getType(); + Type *ETy = PTy->getElementType(); + + if (GVar->hasExternalLinkage()) { + if (GVar->hasInitializer()) + O << ".visible "; + else + O << ".extern "; + } + + if (llvm::isTexture(*GVar)) { + O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n"; + return; + } + + if (llvm::isSurface(*GVar)) { + O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n"; + return; + } + + if (GVar->isDeclaration()) { + // (extern) declarations, no definition or initializer + // Currently the only known declaration is for an automatic __local + // (.shared) promoted to global. + emitPTXGlobalVariable(GVar, O); + O << ";\n"; + return; + } + + if (llvm::isSampler(*GVar)) { + O << ".global .samplerref " << llvm::getSamplerName(*GVar); + + Constant *Initializer = NULL; + if (GVar->hasInitializer()) + Initializer = GVar->getInitializer(); + ConstantInt *CI = NULL; + if (Initializer) + CI = dyn_cast<ConstantInt>(Initializer); + if (CI) { + unsigned sample=CI->getZExtValue(); + + O << " = { "; + + for (int i =0, addr=((sample & __CLK_ADDRESS_MASK ) >> + __CLK_ADDRESS_BASE) ; i < 3 ; i++) { + O << "addr_mode_" << i << " = "; + switch (addr) { + case 0: O << "wrap"; break; + case 1: O << "clamp_to_border"; break; + case 2: O << "clamp_to_edge"; break; + case 3: O << "wrap"; break; + case 4: O << "mirror"; break; + } + O <<", "; + } + O << "filter_mode = "; + switch (( sample & __CLK_FILTER_MASK ) >> __CLK_FILTER_BASE ) { + case 0: O << "nearest"; break; + case 1: O << "linear"; break; + case 2: assert ( 0 && "Anisotropic filtering is not supported"); + default: O << "nearest"; break; + } + if (!(( sample &__CLK_NORMALIZED_MASK ) >> __CLK_NORMALIZED_BASE)) { + O << ", force_unnormalized_coords = 1"; + } + O << " }"; + } + + O << ";\n"; + return; + } + + if (GVar->hasPrivateLinkage()) { + + if (!strncmp(GVar->getName().data(), "unrollpragma", 12)) + return; + + // FIXME - need better way (e.g. Metadata) to avoid generating this global + if (!strncmp(GVar->getName().data(), "filename", 8)) + return; + if (GVar->use_empty()) + return; + } + + const Function *demotedFunc = 0; + if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { + O << "// " << GVar->getName().str() << " has been demoted\n"; + if (localDecls.find(demotedFunc) != localDecls.end()) + localDecls[demotedFunc].push_back(GVar); + else { + std::vector<GlobalVariable *> temp; + temp.push_back(GVar); + localDecls[demotedFunc] = temp; + } + return; + } + + O << "."; + emitPTXAddressSpace(PTy->getAddressSpace(), O); + if (GVar->getAlignment() == 0) + O << " .align " << (int) TD->getPrefTypeAlignment(ETy); + else + O << " .align " << GVar->getAlignment(); + + + if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { + O << " ."; + O << getPTXFundamentalTypeStr(ETy, false); + O << " "; + O << *Mang->getSymbol(GVar); + + // Ptx allows variable initilization only for constant and global state + // spaces. + if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) + && GVar->hasInitializer()) { + Constant *Initializer = GVar->getInitializer(); + if (!Initializer->isNullValue()) { + O << " = " ; + printScalarConstant(Initializer, O); + } + } + } else { + unsigned int ElementSize =0; + + // Although PTX has direct support for struct type and array type and + // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for + // targets that support these high level field accesses. Structs, arrays + // and vectors are lowered into arrays of bytes. + switch (ETy->getTypeID()) { + case Type::StructTyID: + case Type::ArrayTyID: + case Type::VectorTyID: + ElementSize = TD->getTypeStoreSize(ETy); + // Ptx allows variable initilization only for constant and + // global state spaces. + if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) + && GVar->hasInitializer()) { + Constant *Initializer = GVar->getInitializer(); + if (!isa<UndefValue>(Initializer) && + !Initializer->isNullValue()) { + AggBuffer aggBuffer(ElementSize, O, *this); + bufferAggregateConstant(Initializer, &aggBuffer); + if (aggBuffer.numSymbols) { + if (nvptxSubtarget.is64Bit()) { + O << " .u64 " << *Mang->getSymbol(GVar) <<"[" ; + O << ElementSize/8; + } + else { + O << " .u32 " << *Mang->getSymbol(GVar) <<"[" ; + O << ElementSize/4; + } + O << "]"; + } + else { + O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ; + O << ElementSize; + O << "]"; + } + O << " = {" ; + aggBuffer.print(); + O << "}"; + } + else { + O << " .b8 " << *Mang->getSymbol(GVar) ; + if (ElementSize) { + O <<"[" ; + O << ElementSize; + O << "]"; + } + } + } + else { + O << " .b8 " << *Mang->getSymbol(GVar); + if (ElementSize) { + O <<"[" ; + O << ElementSize; + O << "]"; + } + } + break; + default: + assert( 0 && "type not supported yet"); + } + + } + O << ";\n"; +} + +void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { + if (localDecls.find(f) == localDecls.end()) + return; + + std::vector<GlobalVariable *> &gvars = localDecls[f]; + + for (unsigned i=0, e=gvars.size(); i!=e; ++i) { + O << "\t// demoted variable\n\t"; + printModuleLevelGV(gvars[i], O, true); + } +} + +void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, + raw_ostream &O) const { + switch (AddressSpace) { + case llvm::ADDRESS_SPACE_LOCAL: + O << "local" ; + break; + case llvm::ADDRESS_SPACE_GLOBAL: + O << "global" ; + break; + case llvm::ADDRESS_SPACE_CONST: + // This logic should be consistent with that in + // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp) + if (nvptxSubtarget.hasGenericLdSt()) + O << "global" ; + else + O << "const" ; + break; + case llvm::ADDRESS_SPACE_CONST_NOT_GEN: + O << "const" ; + break; + case llvm::ADDRESS_SPACE_SHARED: + O << "shared" ; + break; + default: + assert(0 && "unexpected address space"); + } +} + +std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, + bool useB4PTR) const { + switch (Ty->getTypeID()) { + default: + llvm_unreachable("unexpected type"); + break; + case Type::IntegerTyID: { + unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); + if (NumBits == 1) + return "pred"; + else if (NumBits <= 64) { + std::string name = "u"; + return name + utostr(NumBits); + } else { + llvm_unreachable("Integer too large"); + break; + } + break; + } + case Type::FloatTyID: + return "f32"; + case Type::DoubleTyID: + return "f64"; + case Type::PointerTyID: + if (nvptxSubtarget.is64Bit()) + if (useB4PTR) return "b64"; + else return "u64"; + else + if (useB4PTR) return "b32"; + else return "u32"; + } + llvm_unreachable("unexpected type"); + return NULL; +} + +void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar, + raw_ostream &O) { + + const TargetData *TD = TM.getTargetData(); + + // GlobalVariables are always constant pointers themselves. + const PointerType *PTy = GVar->getType(); + Type *ETy = PTy->getElementType(); + + O << "."; + emitPTXAddressSpace(PTy->getAddressSpace(), O); + if (GVar->getAlignment() == 0) + O << " .align " << (int) TD->getPrefTypeAlignment(ETy); + else + O << " .align " << GVar->getAlignment(); + + if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { + O << " ."; + O << getPTXFundamentalTypeStr(ETy); + O << " "; + O << *Mang->getSymbol(GVar); + return; + } + + int64_t ElementSize =0; + + // Although PTX has direct support for struct type and array type and LLVM IR + // is very similar to PTX, the LLVM CodeGen does not support for targets that + // support these high level field accesses. Structs and arrays are lowered + // into arrays of bytes. + switch (ETy->getTypeID()) { + case Type::StructTyID: + case Type::ArrayTyID: + case Type::VectorTyID: + ElementSize = TD->getTypeStoreSize(ETy); + O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ; + if (ElementSize) { + O << itostr(ElementSize) ; + } + O << "]"; + break; + default: + assert( 0 && "type not supported yet"); + } + return ; +} + + +static unsigned int +getOpenCLAlignment(const TargetData *TD, + Type *Ty) { + if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty)) + return TD->getPrefTypeAlignment(Ty); + + const ArrayType *ATy = dyn_cast<ArrayType>(Ty); + if (ATy) + return getOpenCLAlignment(TD, ATy->getElementType()); + + const VectorType *VTy = dyn_cast<VectorType>(Ty); + if (VTy) { + Type *ETy = VTy->getElementType(); + unsigned int numE = VTy->getNumElements(); + unsigned int alignE = TD->getPrefTypeAlignment(ETy); + if (numE == 3) + return 4*alignE; + else + return numE*alignE; + } + + const StructType *STy = dyn_cast<StructType>(Ty); + if (STy) { + unsigned int alignStruct = 1; + // Go through each element of the struct and find the + // largest alignment. + for (unsigned i=0, e=STy->getNumElements(); i != e; i++) { + Type *ETy = STy->getElementType(i); + unsigned int align = getOpenCLAlignment(TD, ETy); + if (align > alignStruct) + alignStruct = align; + } + return alignStruct; + } + + const FunctionType *FTy = dyn_cast<FunctionType>(Ty); + if (FTy) + return TD->getPointerPrefAlignment(); + return TD->getPrefTypeAlignment(Ty); +} + +void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, + int paramIndex, raw_ostream &O) { + if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || + (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) + O << *CurrentFnSym << "_param_" << paramIndex; + else { + std::string argName = I->getName(); + const char *p = argName.c_str(); + while (*p) { + if (*p == '.') + O << "_"; + else + O << *p; + p++; + } + } +} + +void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { + Function::const_arg_iterator I, E; + int i = 0; + + if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || + (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) { + O << *CurrentFnSym << "_param_" << paramIndex; + return; + } + + for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) { + if (i==paramIndex) { + printParamName(I, paramIndex, O); + return; + } + } + llvm_unreachable("paramIndex out of bound"); +} + +void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, + raw_ostream &O) { + const TargetData *TD = TM.getTargetData(); + const AttrListPtr &PAL = F->getAttributes(); + const TargetLowering *TLI = TM.getTargetLowering(); + Function::const_arg_iterator I, E; + unsigned paramIndex = 0; + bool first = true; + bool isKernelFunc = llvm::isKernelFunction(*F); + bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + MVT thePointerTy = TLI->getPointerTy(); + + O << "(\n"; + + for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { + const Type *Ty = I->getType(); + + if (!first) + O << ",\n"; + + first = false; + + // Handle image/sampler parameters + if (llvm::isSampler(*I) || llvm::isImage(*I)) { + if (llvm::isImage(*I)) { + std::string sname = I->getName(); + if (llvm::isImageWriteOnly(*I)) + O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex; + else // Default image is read_only + O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex; + } + else // Should be llvm::isSampler(*I) + O << "\t.param .samplerref " << *CurrentFnSym << "_param_" + << paramIndex; + continue; + } + + if (PAL.paramHasAttr(paramIndex+1, Attribute::ByVal) == false) { + // Just a scalar + const PointerType *PTy = dyn_cast<PointerType>(Ty); + if (isKernelFunc) { + if (PTy) { + // Special handling for pointer arguments to kernel + O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; + + if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) { + Type *ETy = PTy->getElementType(); + int addrSpace = PTy->getAddressSpace(); + switch(addrSpace) { + default: + O << ".ptr "; + break; + case llvm::ADDRESS_SPACE_CONST_NOT_GEN: + O << ".ptr .const "; + break; + case llvm::ADDRESS_SPACE_SHARED: + O << ".ptr .shared "; + break; + case llvm::ADDRESS_SPACE_GLOBAL: + case llvm::ADDRESS_SPACE_CONST: + O << ".ptr .global "; + break; + } + O << ".align " << (int)getOpenCLAlignment(TD, ETy) << " "; + } + printParamName(I, paramIndex, O); + continue; + } + + // non-pointer scalar to kernel func + O << "\t.param ." + << getPTXFundamentalTypeStr(Ty) << " "; + printParamName(I, paramIndex, O); + continue; + } + // Non-kernel function, just print .param .b<size> for ABI + // and .reg .b<size> for non ABY + unsigned sz = 0; + if (isa<IntegerType>(Ty)) { + sz = cast<IntegerType>(Ty)->getBitWidth(); + if (sz < 32) sz = 32; + } + else if (isa<PointerType>(Ty)) + sz = thePointerTy.getSizeInBits(); + else + sz = Ty->getPrimitiveSizeInBits(); + if (isABI) + O << "\t.param .b" << sz << " "; + else + O << "\t.reg .b" << sz << " "; + printParamName(I, paramIndex, O); + continue; + } + + // param has byVal attribute. So should be a pointer + const PointerType *PTy = dyn_cast<PointerType>(Ty); + assert(PTy && + "Param with byval attribute should be a pointer type"); + Type *ETy = PTy->getElementType(); + + if (isABI || isKernelFunc) { + // Just print .param .b8 .align <a> .param[size]; + // <a> = PAL.getparamalignment + // size = typeallocsize of element type + unsigned align = PAL.getParamAlignment(paramIndex+1); + unsigned sz = TD->getTypeAllocSize(ETy); + O << "\t.param .align " << align + << " .b8 "; + printParamName(I, paramIndex, O); + O << "[" << sz << "]"; + continue; + } else { + // Split the ETy into constituent parts and + // print .param .b<size> <name> for each part. + // Further, if a part is vector, print the above for + // each vector element. + SmallVector<EVT, 16> vtparts; + ComputeValueVTs(*TLI, ETy, vtparts); + for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + unsigned elems = 1; + EVT elemtype = vtparts[i]; + if (vtparts[i].isVector()) { + elems = vtparts[i].getVectorNumElements(); + elemtype = vtparts[i].getVectorElementType(); + } + + for (unsigned j=0,je=elems; j!=je; ++j) { + unsigned sz = elemtype.getSizeInBits(); + if (elemtype.isInteger() && (sz < 32)) sz = 32; + O << "\t.reg .b" << sz << " "; + printParamName(I, paramIndex, O); + if (j<je-1) O << ",\n"; + ++paramIndex; + } + if (i<e-1) + O << ",\n"; + } + --paramIndex; + continue; + } + } + + O << "\n)\n"; +} + +void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, + raw_ostream &O) { + const Function *F = MF.getFunction(); + emitFunctionParamList(F, O); +} + + +void NVPTXAsmPrinter:: +setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) { + SmallString<128> Str; + raw_svector_ostream O(Str); + + // Map the global virtual register number to a register class specific + // virtual register number starting from 1 with that class. + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + //unsigned numRegClasses = TRI->getNumRegClasses(); + + // Emit the Fake Stack Object + const MachineFrameInfo *MFI = MF.getFrameInfo(); + int NumBytes = (int) MFI->getStackSize(); + if (NumBytes) { + O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" + << DEPOTNAME + << getFunctionNumber() << "[" << NumBytes << "];\n"; + if (nvptxSubtarget.is64Bit()) { + O << "\t.reg .b64 \t%SP;\n"; + O << "\t.reg .b64 \t%SPL;\n"; + } + else { + O << "\t.reg .b32 \t%SP;\n"; + O << "\t.reg .b32 \t%SPL;\n"; + } + } + + // Go through all virtual registers to establish the mapping between the + // global virtual + // register number and the per class virtual register number. + // We use the per class virtual register number in the ptx output. + unsigned int numVRs = MRI->getNumVirtRegs(); + for (unsigned i=0; i< numVRs; i++) { + unsigned int vr = TRI->index2VirtReg(i); + const TargetRegisterClass *RC = MRI->getRegClass(vr); + std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[RC->getID()]; + int n = regmap.size(); + regmap.insert(std::make_pair(vr, n+1)); + } + + // Emit register declarations + // @TODO: Extract out the real register usage + O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n"; + O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; + O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; + O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; + O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n"; + O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; + O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n"; + + // Emit declaration of the virtual registers or 'physical' registers for + // each register class + //for (unsigned i=0; i< numRegClasses; i++) { + // std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[i]; + // const TargetRegisterClass *RC = TRI->getRegClass(i); + // std::string rcname = getNVPTXRegClassName(RC); + // std::string rcStr = getNVPTXRegClassStr(RC); + // //int n = regmap.size(); + // if (!isNVPTXVectorRegClass(RC)) { + // O << "\t.reg " << rcname << " \t" << rcStr << "<" + // << NVPTXNumRegisters << ">;\n"; + // } + + // Only declare those registers that may be used. And do not emit vector + // registers as + // they are all elementized to scalar registers. + //if (n && !isNVPTXVectorRegClass(RC)) { + // if (RegAllocNilUsed) { + // O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1) + // << ">;\n"; + // } + // else { + // O << "\t.reg " << rcname << " \t" << StrToUpper(rcStr) + // << "<" << 32 << ">;\n"; + // } + //} + //} + + OutStreamer.EmitRawText(O.str()); +} + + +void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { + APFloat APF = APFloat(Fp->getValueAPF()); // make a copy + bool ignored; + unsigned int numHex; + const char *lead; + + if (Fp->getType()->getTypeID()==Type::FloatTyID) { + numHex = 8; + lead = "0f"; + APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, + &ignored); + } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { + numHex = 16; + lead = "0d"; + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + &ignored); + } else + llvm_unreachable("unsupported fp type"); + + APInt API = APF.bitcastToAPInt(); + std::string hexstr(utohexstr(API.getZExtValue())); + O << lead; + if (hexstr.length() < numHex) + O << std::string(numHex - hexstr.length(), '0'); + O << utohexstr(API.getZExtValue()); +} + +void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { + O << CI->getValue(); + return; + } + if (ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { + printFPConstant(CFP, O); + return; + } + if (isa<ConstantPointerNull>(CPV)) { + O << "0"; + return; + } + if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { + O << *Mang->getSymbol(GVar); + return; + } + if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { + Value *v = Cexpr->stripPointerCasts(); + if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { + O << *Mang->getSymbol(GVar); + return; + } else { + O << *LowerConstant(CPV, *this); + return; + } + } + llvm_unreachable("Not scalar type found in printScalarConstant()"); +} + + +void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, + AggBuffer *aggBuffer) { + + const TargetData *TD = TM.getTargetData(); + + if (isa<UndefValue>(CPV) || CPV->isNullValue()) { + int s = TD->getTypeAllocSize(CPV->getType()); + if (s<Bytes) + s = Bytes; + aggBuffer->addZeros(s); + return; + } + + unsigned char *ptr; + switch (CPV->getType()->getTypeID()) { + + case Type::IntegerTyID: { + const Type *ETy = CPV->getType(); + if ( ETy == Type::getInt8Ty(CPV->getContext()) ){ + unsigned char c = + (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); + ptr = &c; + aggBuffer->addBytes(ptr, 1, Bytes); + } else if ( ETy == Type::getInt16Ty(CPV->getContext()) ) { + short int16 = + (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); + ptr = (unsigned char*)&int16; + aggBuffer->addBytes(ptr, 2, Bytes); + } else if ( ETy == Type::getInt32Ty(CPV->getContext()) ) { + if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { + int int32 =(int)(constInt->getZExtValue()); + ptr = (unsigned char*)&int32; + aggBuffer->addBytes(ptr, 4, Bytes); + break; + } + else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { + if (ConstantInt *constInt = + dyn_cast<ConstantInt>(ConstantFoldConstantExpression( + Cexpr, TD))) { + int int32 =(int)(constInt->getZExtValue()); + ptr = (unsigned char*)&int32; + aggBuffer->addBytes(ptr, 4, Bytes); + break; + } + if (Cexpr->getOpcode() == Instruction::PtrToInt) { + Value *v = Cexpr->getOperand(0)->stripPointerCasts(); + aggBuffer->addSymbol(v); + aggBuffer->addZeros(4); + break; + } + } + assert(0 && "unsupported integer const type"); + } else if (ETy == Type::getInt64Ty(CPV->getContext()) ) { + if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { + long long int64 =(long long)(constInt->getZExtValue()); + ptr = (unsigned char*)&int64; + aggBuffer->addBytes(ptr, 8, Bytes); + break; + } + else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { + if (ConstantInt *constInt = dyn_cast<ConstantInt>( + ConstantFoldConstantExpression(Cexpr, TD))) { + long long int64 =(long long)(constInt->getZExtValue()); + ptr = (unsigned char*)&int64; + aggBuffer->addBytes(ptr, 8, Bytes); + break; + } + if (Cexpr->getOpcode() == Instruction::PtrToInt) { + Value *v = Cexpr->getOperand(0)->stripPointerCasts(); + aggBuffer->addSymbol(v); + aggBuffer->addZeros(8); + break; + } + } + llvm_unreachable("unsupported integer const type"); + } + else + llvm_unreachable("unsupported integer const type"); + break; + } + case Type::FloatTyID: + case Type::DoubleTyID: { + ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); + const Type* Ty = CFP->getType(); + if (Ty == Type::getFloatTy(CPV->getContext())) { + float float32 = (float)CFP->getValueAPF().convertToFloat(); + ptr = (unsigned char*)&float32; + aggBuffer->addBytes(ptr, 4, Bytes); + } else if (Ty == Type::getDoubleTy(CPV->getContext())) { + double float64 = CFP->getValueAPF().convertToDouble(); + ptr = (unsigned char*)&float64; + aggBuffer->addBytes(ptr, 8, Bytes); + } + else { + llvm_unreachable("unsupported fp const type"); + } + break; + } + case Type::PointerTyID: { + if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { + aggBuffer->addSymbol(GVar); + } + else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { + Value *v = Cexpr->stripPointerCasts(); + aggBuffer->addSymbol(v); + } + unsigned int s = TD->getTypeAllocSize(CPV->getType()); + aggBuffer->addZeros(s); + break; + } + + case Type::ArrayTyID: + case Type::VectorTyID: + case Type::StructTyID: { + if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) || + isa<ConstantStruct>(CPV)) { + int ElementSize = TD->getTypeAllocSize(CPV->getType()); + bufferAggregateConstant(CPV, aggBuffer); + if ( Bytes > ElementSize ) + aggBuffer->addZeros(Bytes-ElementSize); + } + else if (isa<ConstantAggregateZero>(CPV)) + aggBuffer->addZeros(Bytes); + else + llvm_unreachable("Unexpected Constant type"); + break; + } + + default: + llvm_unreachable("unsupported type"); + } +} + +void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, + AggBuffer *aggBuffer) { + const TargetData *TD = TM.getTargetData(); + int Bytes; + + // Old constants + if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) { + if (CPV->getNumOperands()) + for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) + bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer); + return; + } + + if (const ConstantDataSequential *CDS = + dyn_cast<ConstantDataSequential>(CPV)) { + if (CDS->getNumElements()) + for (unsigned i = 0; i < CDS->getNumElements(); ++i) + bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, + aggBuffer); + return; + } + + + if (isa<ConstantStruct>(CPV)) { + if (CPV->getNumOperands()) { + StructType *ST = cast<StructType>(CPV->getType()); + for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { + if ( i == (e - 1)) + Bytes = TD->getStructLayout(ST)->getElementOffset(0) + + TD->getTypeAllocSize(ST) + - TD->getStructLayout(ST)->getElementOffset(i); + else + Bytes = TD->getStructLayout(ST)->getElementOffset(i+1) - + TD->getStructLayout(ST)->getElementOffset(i); + bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, + aggBuffer); + } + } + return; + } + assert(0 && "unsupported constant type in printAggregateConstant()"); +} + +// buildTypeNameMap - Run through symbol table looking for type names. +// + + +bool NVPTXAsmPrinter::isImageType(const Type *Ty) { + + std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty); + + if (PI != TypeNameMap.end() && + (!PI->second.compare("struct._image1d_t") || + !PI->second.compare("struct._image2d_t") || + !PI->second.compare("struct._image3d_t"))) + return true; + + return false; +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'r': + break; + } + } + + printOperand(MI, OpNo, O); + + return false; +} + +bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier + + O << '['; + printMemOperand(MI, OpNo, O); + O << ']'; + + return false; +} + +bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) +{ + switch(MI.getOpcode()) { + default: + return false; + case NVPTX::CallArgBeginInst: case NVPTX::CallArgEndInst0: + case NVPTX::CallArgEndInst1: case NVPTX::CallArgF32: + case NVPTX::CallArgF64: case NVPTX::CallArgI16: + case NVPTX::CallArgI32: case NVPTX::CallArgI32imm: + case NVPTX::CallArgI64: case NVPTX::CallArgI8: + case NVPTX::CallArgParam: case NVPTX::CallVoidInst: + case NVPTX::CallVoidInstReg: case NVPTX::Callseq_End: + case NVPTX::CallVoidInstReg64: + case NVPTX::DeclareParamInst: case NVPTX::DeclareRetMemInst: + case NVPTX::DeclareRetRegInst: case NVPTX::DeclareRetScalarInst: + case NVPTX::DeclareScalarParamInst: case NVPTX::DeclareScalarRegInst: + case NVPTX::StoreParamF32: case NVPTX::StoreParamF64: + case NVPTX::StoreParamI16: case NVPTX::StoreParamI32: + case NVPTX::StoreParamI64: case NVPTX::StoreParamI8: + case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8: + case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16: + case NVPTX::StoreParamScalar2F32: case NVPTX::StoreParamScalar2F64: + case NVPTX::StoreParamScalar2I16: case NVPTX::StoreParamScalar2I32: + case NVPTX::StoreParamScalar2I64: case NVPTX::StoreParamScalar2I8: + case NVPTX::StoreParamScalar4F32: case NVPTX::StoreParamScalar4I16: + case NVPTX::StoreParamScalar4I32: case NVPTX::StoreParamScalar4I8: + case NVPTX::StoreParamV2F32: case NVPTX::StoreParamV2F64: + case NVPTX::StoreParamV2I16: case NVPTX::StoreParamV2I32: + case NVPTX::StoreParamV2I64: case NVPTX::StoreParamV2I8: + case NVPTX::StoreParamV4F32: case NVPTX::StoreParamV4I16: + case NVPTX::StoreParamV4I32: case NVPTX::StoreParamV4I8: + case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64: + case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32: + case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8: + case NVPTX::StoreRetvalScalar2F32: case NVPTX::StoreRetvalScalar2F64: + case NVPTX::StoreRetvalScalar2I16: case NVPTX::StoreRetvalScalar2I32: + case NVPTX::StoreRetvalScalar2I64: case NVPTX::StoreRetvalScalar2I8: + case NVPTX::StoreRetvalScalar4F32: case NVPTX::StoreRetvalScalar4I16: + case NVPTX::StoreRetvalScalar4I32: case NVPTX::StoreRetvalScalar4I8: + case NVPTX::StoreRetvalV2F32: case NVPTX::StoreRetvalV2F64: + case NVPTX::StoreRetvalV2I16: case NVPTX::StoreRetvalV2I32: + case NVPTX::StoreRetvalV2I64: case NVPTX::StoreRetvalV2I8: + case NVPTX::StoreRetvalV4F32: case NVPTX::StoreRetvalV4I16: + case NVPTX::StoreRetvalV4I32: case NVPTX::StoreRetvalV4I8: + case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64: + case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32: + case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64: + case NVPTX::LastCallArgI8: case NVPTX::LastCallArgParam: + case NVPTX::LoadParamMemF32: case NVPTX::LoadParamMemF64: + case NVPTX::LoadParamMemI16: case NVPTX::LoadParamMemI32: + case NVPTX::LoadParamMemI64: case NVPTX::LoadParamMemI8: + case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64: + case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32: + case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8: + case NVPTX::LoadParamScalar2F32: case NVPTX::LoadParamScalar2F64: + case NVPTX::LoadParamScalar2I16: case NVPTX::LoadParamScalar2I32: + case NVPTX::LoadParamScalar2I64: case NVPTX::LoadParamScalar2I8: + case NVPTX::LoadParamScalar4F32: case NVPTX::LoadParamScalar4I16: + case NVPTX::LoadParamScalar4I32: case NVPTX::LoadParamScalar4I8: + case NVPTX::LoadParamV2F32: case NVPTX::LoadParamV2F64: + case NVPTX::LoadParamV2I16: case NVPTX::LoadParamV2I32: + case NVPTX::LoadParamV2I64: case NVPTX::LoadParamV2I8: + case NVPTX::LoadParamV4F32: case NVPTX::LoadParamV4I16: + case NVPTX::LoadParamV4I32: case NVPTX::LoadParamV4I8: + case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE: + return true; + } + return false; +} + +// Force static initialization. +extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { + RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); + RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); +} + + +void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { + std::stringstream temp; + LineReader * reader = this->getReader(filename.str()); + temp << "\n//"; + temp << filename.str(); + temp << ":"; + temp << line; + temp << " "; + temp << reader->readLine(line); + temp << "\n"; + this->OutStreamer.EmitRawText(Twine(temp.str())); +} + + +LineReader *NVPTXAsmPrinter::getReader(std::string filename) { + if (reader == NULL) { + reader = new LineReader(filename); + } + + if (reader->fileName() != filename) { + delete reader; + reader = new LineReader(filename); + } + + return reader; +} + + +std::string +LineReader::readLine(unsigned lineNum) { + if (lineNum < theCurLine) { + theCurLine = 0; + fstr.seekg(0,std::ios::beg); + } + while (theCurLine < lineNum) { + fstr.getline(buff,500); + theCurLine++; + } + return buff; +} + +// Force static initialization. +extern "C" void LLVMInitializeNVPTXAsmPrinter() { + RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); + RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); +} diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h new file mode 100644 index 0000000..a035299 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -0,0 +1,318 @@ +//===-- NVPTXAsmPrinter.h - NVPTX LLVM assembly writer --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to NVPTX assembly language. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTXASMPRINTER_H +#define NVPTXASMPRINTER_H + +#include "NVPTX.h" +#include "NVPTXTargetMachine.h" +#include "NVPTXSubtarget.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/Mangler.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include <fstream> + +// The ptx syntax and format is very different from that usually seem in a .s +// file, +// therefore we are not able to use the MCAsmStreamer interface here. +// +// We are handcrafting the output method here. +// +// A better approach is to clone the MCAsmStreamer to a MCPTXAsmStreamer +// (subclass of MCStreamer). + +// This is defined in AsmPrinter.cpp. +// Used to process the constant expressions in initializers. +namespace nvptx { +const llvm::MCExpr *LowerConstant(const llvm::Constant *CV, + llvm::AsmPrinter &AP) ; +} + +namespace llvm { + +class LineReader { +private: + unsigned theCurLine ; + std::ifstream fstr; + char buff[512]; + std::string theFileName; + SmallVector<unsigned, 32> lineOffset; +public: + LineReader(std::string filename) { + theCurLine = 0; + fstr.open(filename.c_str()); + theFileName = filename; + } + std::string fileName() { return theFileName; } + ~LineReader() { + fstr.close(); + } + std::string readLine(unsigned line); +}; + + + +class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { + + + class AggBuffer { + // Used to buffer the emitted string for initializing global + // aggregates. + // + // Normally an aggregate (array, vector or structure) is emitted + // as a u8[]. However, if one element/field of the aggregate + // is a non-NULL address, then the aggregate is emitted as u32[] + // or u64[]. + // + // We first layout the aggregate in 'buffer' in bytes, except for + // those symbol addresses. For the i-th symbol address in the + //aggregate, its corresponding 4-byte or 8-byte elements in 'buffer' + // are filled with 0s. symbolPosInBuffer[i-1] records its position + // in 'buffer', and Symbols[i-1] records the Value*. + // + // Once we have this AggBuffer setup, we can choose how to print + // it out. + public: + unsigned size; // size of the buffer in bytes + unsigned char *buffer; // the buffer + unsigned numSymbols; // number of symbol addresses + SmallVector<unsigned, 4> symbolPosInBuffer; + SmallVector<Value *, 4> Symbols; + + private: + unsigned curpos; + raw_ostream &O; + NVPTXAsmPrinter &AP; + + public: + AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP) + :O(_O),AP(_AP) { + buffer = new unsigned char[_size]; + size = _size; + curpos = 0; + numSymbols = 0; + } + ~AggBuffer() { + delete [] buffer; + } + unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) { + assert((curpos+Num) <= size); + assert((curpos+Bytes) <= size); + for ( int i= 0; i < Num; ++i) { + buffer[curpos] = Ptr[i]; + curpos ++; + } + for ( int i=Num; i < Bytes ; ++i) { + buffer[curpos] = 0; + curpos ++; + } + return curpos; + } + unsigned addZeros(int Num) { + assert((curpos+Num) <= size); + for ( int i= 0; i < Num; ++i) { + buffer[curpos] = 0; + curpos ++; + } + return curpos; + } + void addSymbol(Value *GVar) { + symbolPosInBuffer.push_back(curpos); + Symbols.push_back(GVar); + numSymbols++; + } + void print() { + if (numSymbols == 0) { + // print out in bytes + for (unsigned i=0; i<size; i++) { + if (i) + O << ", "; + O << (unsigned int)buffer[i]; + } + } + else { + // print out in 4-bytes or 8-bytes + unsigned int pos = 0; + unsigned int nSym = 0; + unsigned int nextSymbolPos = symbolPosInBuffer[nSym]; + unsigned int nBytes = 4; + if (AP.nvptxSubtarget.is64Bit()) + nBytes = 8; + for (pos=0; pos<size; pos+=nBytes) { + if (pos) + O << ", "; + if (pos == nextSymbolPos) { + Value *v = Symbols[nSym]; + if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { + MCSymbol *Name = AP.Mang->getSymbol(GVar); + O << *Name; + } + else if (ConstantExpr *Cexpr = + dyn_cast<ConstantExpr>(v)) { + O << *nvptx::LowerConstant(Cexpr, AP); + } + else + assert(0 && "symbol type unknown"); + nSym++; + if (nSym >= numSymbols) + nextSymbolPos = size+1; + else + nextSymbolPos = symbolPosInBuffer[nSym]; + } + else + if (nBytes == 4) + O << *(unsigned int*)(buffer+pos); + else + O << *(unsigned long long*)(buffer+pos); + } + } + } + }; + + friend class AggBuffer; + + virtual void emitSrcInText(StringRef filename, unsigned line); + +private : + virtual const char *getPassName() const { + return "NVPTX Assembly Printer"; + } + + const Function *F; + std::string CurrentFnName; + + void EmitFunctionEntryLabel(); + void EmitFunctionBodyStart(); + void EmitFunctionBodyEnd(); + + void EmitInstruction(const MachineInstr *); + + void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const {} + + void printGlobalVariable(const GlobalVariable *GVar); + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O, + const char *Modifier=0); + void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O, + const char *Modifier=0); + void printVecModifiedImmediate(const MachineOperand &MO, + const char *Modifier, raw_ostream &O); + void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, + const char *Modifier=0); + void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const; + // definition autogenerated. + void printInstruction(const MachineInstr *MI, raw_ostream &O); + void printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, + bool=false); + void printParamName(int paramIndex, raw_ostream &O); + void printParamName(Function::const_arg_iterator I, int paramIndex, + raw_ostream &O); + void emitHeader(Module &M, raw_ostream &O); + void emitKernelFunctionDirectives(const Function& F, + raw_ostream &O) const; + void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O); + void emitFunctionExternParamList(const MachineFunction &MF); + void emitFunctionParamList(const Function *, raw_ostream &O); + void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O); + void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF); + void emitFunctionTempData(const MachineFunction &MF, + unsigned &FrameSize); + bool isImageType(const Type *Ty); + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &); + void printReturnValStr(const Function *, raw_ostream &O); + void printReturnValStr(const MachineFunction &MF, raw_ostream &O); + +protected: + bool doInitialization(Module &M); + bool doFinalization(Module &M); + +private: + std::string CurrentBankselLabelInBasicBlock; + + // This is specific per MachineFunction. + const MachineRegisterInfo *MRI; + // The contents are specific for each + // MachineFunction. But the size of the + // array is not. + std::map<unsigned, unsigned> *VRidGlobal2LocalMap; + // cache the subtarget here. + const NVPTXSubtarget &nvptxSubtarget; + // Build the map between type name and ID based on module's type + // symbol table. + std::map<const Type *, std::string> TypeNameMap; + + // List of variables demoted to a function scope. + std::map<const Function *, std::vector<GlobalVariable *> > localDecls; + + // To record filename to ID mapping + std::map<std::string, unsigned> filenameMap; + void recordAndEmitFilenames(Module &); + + void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O); + void emitPTXAddressSpace(unsigned int AddressSpace, + raw_ostream &O) const; + std::string getPTXFundamentalTypeStr(const Type *Ty, bool=true) const ; + void printScalarConstant(Constant *CPV, raw_ostream &O) ; + void printFPConstant(const ConstantFP *Fp, raw_ostream &O) ; + void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) ; + void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer) ; + + void printOperandProper(const MachineOperand &MO); + + void emitLinkageDirective(const GlobalValue* V, raw_ostream &O); + void emitDeclarations(Module &, raw_ostream &O); + void emitDeclaration(const Function *, raw_ostream &O); + + static const char *getRegisterName(unsigned RegNo); + void emitDemotedVars(const Function *, raw_ostream &); + + LineReader *reader; + LineReader *getReader(std::string); +public: + NVPTXAsmPrinter(TargetMachine &TM, + MCStreamer &Streamer) + : AsmPrinter(TM, Streamer), + nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { + CurrentBankselLabelInBasicBlock = ""; + VRidGlobal2LocalMap = NULL; + reader = NULL; + } + + ~NVPTXAsmPrinter() { + if (!reader) + delete reader; + } + + bool ignoreLoc(const MachineInstr &); + + virtual void getVirtualRegisterName(unsigned, bool, raw_ostream &); + + DebugLoc prevDebugLoc; + void emitLineNumberAsDotLoc(const MachineInstr &); +}; +} // end of namespace + +#endif diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp new file mode 100644 index 0000000..a9abc00 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -0,0 +1,76 @@ +//=======- NVPTXFrameLowering.cpp - NVPTX Frame Information ---*- C++ -*-=====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the NVPTX implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "NVPTXFrameLowering.h" +#include "NVPTX.h" +#include "NVPTXRegisterInfo.h" +#include "NVPTXSubtarget.h" +#include "NVPTXTargetMachine.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { + return true; +} + +void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { + if (MF.getFrameInfo()->hasStackObjects()) { + MachineBasicBlock &MBB = MF.front(); + // Insert "mov.u32 %SP, %Depot" + MachineBasicBlock::iterator MBBI = MBB.begin(); + // This instruction really occurs before first instruction + // in the BB, so giving it no debug location. + DebugLoc dl = DebugLoc(); + + if (tm.getSubtargetImpl()->hasGenericLdSt()) { + // mov %SPL, %depot; + // cvta.local %SP, %SPL; + if (is64bit) { + MachineInstr *MI = BuildMI(MBB, MBBI, dl, + tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64), + NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); + BuildMI(MBB, MI, dl, + tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrameLocal) + .addReg(NVPTX::VRDepot); + } else { + MachineInstr *MI = BuildMI(MBB, MBBI, dl, + tm.getInstrInfo()->get(NVPTX::cvta_local_yes), + NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); + BuildMI(MBB, MI, dl, + tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrameLocal) + .addReg(NVPTX::VRDepot); + } + } + else { + // mov %SP, %depot; + if (is64bit) + BuildMI(MBB, MBBI, dl, + tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrame) + .addReg(NVPTX::VRDepot); + else + BuildMI(MBB, MBBI, dl, + tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrame) + .addReg(NVPTX::VRDepot); + } + } +} + +void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { +} diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h new file mode 100644 index 0000000..ee87b39 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXFrameLowering.h @@ -0,0 +1,40 @@ +//===--- NVPTXFrameLowering.h - Define frame lowering for NVPTX -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTX_FRAMELOWERING_H +#define NVPTX_FRAMELOWERING_H + +#include "llvm/Target/TargetFrameLowering.h" + + +namespace llvm { +class NVPTXTargetMachine; + +class NVPTXFrameLowering : public TargetFrameLowering { + NVPTXTargetMachine &tm; + bool is64bit; + +public: + explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit) + : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), + tm(_tm), is64bit(_is64bit) {} + + virtual bool hasFP(const MachineFunction &MF) const; + virtual void emitPrologue(MachineFunction &MF) const; + virtual void emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp new file mode 100644 index 0000000..e26f9e4 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -0,0 +1,681 @@ +//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the NVPTX target. +// +//===----------------------------------------------------------------------===// + + +#include "llvm/Instructions.h" +#include "llvm/Support/raw_ostream.h" +#include "NVPTXISelDAGToDAG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/GlobalValue.h" + +#undef DEBUG_TYPE +#define DEBUG_TYPE "nvptx-isel" + +using namespace llvm; + + +static cl::opt<bool> +UseFMADInstruction("nvptx-mad-enable", + cl::ZeroOrMore, + cl::desc("NVPTX Specific: Enable generating FMAD instructions"), + cl::init(false)); + +static cl::opt<int> +FMAContractLevel("nvptx-fma-level", + cl::ZeroOrMore, + cl::desc("NVPTX Specific: FMA contraction (0: don't do it" + " 1: do it 2: do it aggressively"), + cl::init(2)); + + +static cl::opt<int> +UsePrecDivF32("nvptx-prec-divf32", + cl::ZeroOrMore, + cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" + " IEEE Compliant F32 div.rnd if avaiable."), + cl::init(2)); + +/// createNVPTXISelDag - This pass converts a legalized DAG into a +/// NVPTX-specific DAG, ready for instruction scheduling. +FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, + llvm::CodeGenOpt::Level OptLevel) { + return new NVPTXDAGToDAGISel(TM, OptLevel); +} + + +NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, + CodeGenOpt::Level OptLevel) +: SelectionDAGISel(tm, OptLevel), + Subtarget(tm.getSubtarget<NVPTXSubtarget>()) +{ + // Always do fma.f32 fpcontract if the target supports the instruction. + // Always do fma.f64 fpcontract if the target supports the instruction. + // Do mad.f32 is nvptx-mad-enable is specified and the target does not + // support fma.f32. + + doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32(); + doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && + (FMAContractLevel>=1); + doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && + (FMAContractLevel>=1); + doFMAF32AGG = (OptLevel > 0) && Subtarget.hasFMAF32() && + (FMAContractLevel==2); + doFMAF64AGG = (OptLevel > 0) && Subtarget.hasFMAF64() && + (FMAContractLevel==2); + + allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction; + + doMulWide = (OptLevel > 0); + + // Decide how to translate f32 div + do_DIVF32_PREC = UsePrecDivF32; + // sm less than sm_20 does not support div.rnd. Use div.full. + if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20()) + do_DIVF32_PREC = 1; + +} + +/// Select - Select instructions not customized! Used for +/// expanded, promoted and normal instructions. +SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) { + + if (N->isMachineOpcode()) + return NULL; // Already selected. + + SDNode *ResNode = NULL; + switch (N->getOpcode()) { + case ISD::LOAD: + ResNode = SelectLoad(N); + break; + case ISD::STORE: + ResNode = SelectStore(N); + break; + } + if (ResNode) + return ResNode; + return SelectCode(N); +} + + +static unsigned int +getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget) +{ + const Value *Src = N->getSrcValue(); + if (!Src) + return NVPTX::PTXLdStInstCode::LOCAL; + + if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) { + switch (PT->getAddressSpace()) { + case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL; + case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL; + case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED; + case llvm::ADDRESS_SPACE_CONST_NOT_GEN: + return NVPTX::PTXLdStInstCode::CONSTANT; + case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC; + case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM; + case llvm::ADDRESS_SPACE_CONST: + // If the arch supports generic address space, translate it to GLOBAL + // for correctness. + // If the arch does not support generic address space, then the arch + // does not really support ADDRESS_SPACE_CONST, translate it to + // to CONSTANT for better performance. + if (Subtarget.hasGenericLdSt()) + return NVPTX::PTXLdStInstCode::GLOBAL; + else + return NVPTX::PTXLdStInstCode::CONSTANT; + default: break; + } + } + return NVPTX::PTXLdStInstCode::LOCAL; +} + + +SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + LoadSDNode *LD = cast<LoadSDNode>(N); + EVT LoadedVT = LD->getMemoryVT(); + SDNode *NVPTXLD= NULL; + + // do not support pre/post inc/dec + if (LD->isIndexed()) + return NULL; + + if (!LoadedVT.isSimple()) + return NULL; + + // Address Space Setting + unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget); + + // Volatile Setting + // - .volatile is only availalble for .global and .shared + bool isVolatile = LD->isVolatile(); + if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && + codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && + codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) + isVolatile = false; + + // Vector Setting + MVT SimpleVT = LoadedVT.getSimpleVT(); + unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; + if (SimpleVT.isVector()) { + unsigned num = SimpleVT.getVectorNumElements(); + if (num == 2) + vecType = NVPTX::PTXLdStInstCode::V2; + else if (num == 4) + vecType = NVPTX::PTXLdStInstCode::V4; + else + return NULL; + } + + // Type Setting: fromType + fromTypeWidth + // + // Sign : ISD::SEXTLOAD + // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the + // type is integer + // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float + MVT ScalarVT = SimpleVT.getScalarType(); + unsigned fromTypeWidth = ScalarVT.getSizeInBits(); + unsigned int fromType; + if ((LD->getExtensionType() == ISD::SEXTLOAD)) + fromType = NVPTX::PTXLdStInstCode::Signed; + else if (ScalarVT.isFloatingPoint()) + fromType = NVPTX::PTXLdStInstCode::Float; + else + fromType = NVPTX::PTXLdStInstCode::Unsigned; + + // Create the machine instruction DAG + SDValue Chain = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue Addr; + SDValue Offset, Base; + unsigned Opcode; + MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy; + + if (SelectDirectAddr(N1, Addr)) { + switch (TargetVT) { + case MVT::i8: Opcode = NVPTX::LD_i8_avar; break; + case MVT::i16: Opcode = NVPTX::LD_i16_avar; break; + case MVT::i32: Opcode = NVPTX::LD_i32_avar; break; + case MVT::i64: Opcode = NVPTX::LD_i64_avar; break; + case MVT::f32: Opcode = NVPTX::LD_f32_avar; break; + case MVT::f64: Opcode = NVPTX::LD_f64_avar; break; + case MVT::v2i8: Opcode = NVPTX::LD_v2i8_avar; break; + case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break; + case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break; + case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break; + case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break; + case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break; + case MVT::v4i8: Opcode = NVPTX::LD_v4i8_avar; break; + case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break; + case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break; + case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break; + default: return NULL; + } + SDValue Ops[] = { getI32Imm(isVolatile), + getI32Imm(codeAddrSpace), + getI32Imm(vecType), + getI32Imm(fromType), + getI32Imm(fromTypeWidth), + Addr, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, + MVT::Other, Ops, 7); + } else if (Subtarget.is64Bit()? + SelectADDRsi64(N1.getNode(), N1, Base, Offset): + SelectADDRsi(N1.getNode(), N1, Base, Offset)) { + switch (TargetVT) { + case MVT::i8: Opcode = NVPTX::LD_i8_asi; break; + case MVT::i16: Opcode = NVPTX::LD_i16_asi; break; + case MVT::i32: Opcode = NVPTX::LD_i32_asi; break; + case MVT::i64: Opcode = NVPTX::LD_i64_asi; break; + case MVT::f32: Opcode = NVPTX::LD_f32_asi; break; + case MVT::f64: Opcode = NVPTX::LD_f64_asi; break; + case MVT::v2i8: Opcode = NVPTX::LD_v2i8_asi; break; + case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break; + case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break; + case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break; + case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break; + case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break; + case MVT::v4i8: Opcode = NVPTX::LD_v4i8_asi; break; + case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break; + case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break; + case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break; + default: return NULL; + } + SDValue Ops[] = { getI32Imm(isVolatile), + getI32Imm(codeAddrSpace), + getI32Imm(vecType), + getI32Imm(fromType), + getI32Imm(fromTypeWidth), + Base, Offset, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, + MVT::Other, Ops, 8); + } else if (Subtarget.is64Bit()? + SelectADDRri64(N1.getNode(), N1, Base, Offset): + SelectADDRri(N1.getNode(), N1, Base, Offset)) { + switch (TargetVT) { + case MVT::i8: Opcode = NVPTX::LD_i8_ari; break; + case MVT::i16: Opcode = NVPTX::LD_i16_ari; break; + case MVT::i32: Opcode = NVPTX::LD_i32_ari; break; + case MVT::i64: Opcode = NVPTX::LD_i64_ari; break; + case MVT::f32: Opcode = NVPTX::LD_f32_ari; break; + case MVT::f64: Opcode = NVPTX::LD_f64_ari; break; + case MVT::v2i8: Opcode = NVPTX::LD_v2i8_ari; break; + case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break; + case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break; + case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break; + case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break; + case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break; + case MVT::v4i8: Opcode = NVPTX::LD_v4i8_ari; break; + case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break; + case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break; + case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break; + default: return NULL; + } + SDValue Ops[] = { getI32Imm(isVolatile), + getI32Imm(codeAddrSpace), + getI32Imm(vecType), + getI32Imm(fromType), + getI32Imm(fromTypeWidth), + Base, Offset, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, + MVT::Other, Ops, 8); + } + else { + switch (TargetVT) { + case MVT::i8: Opcode = NVPTX::LD_i8_areg; break; + case MVT::i16: Opcode = NVPTX::LD_i16_areg; break; + case MVT::i32: Opcode = NVPTX::LD_i32_areg; break; + case MVT::i64: Opcode = NVPTX::LD_i64_areg; break; + case MVT::f32: Opcode = NVPTX::LD_f32_areg; break; + case MVT::f64: Opcode = NVPTX::LD_f64_areg; break; + case MVT::v2i8: Opcode = NVPTX::LD_v2i8_areg; break; + case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break; + case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break; + case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break; + case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break; + case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break; + case MVT::v4i8: Opcode = NVPTX::LD_v4i8_areg; break; + case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break; + case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break; + case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break; + default: return NULL; + } + SDValue Ops[] = { getI32Imm(isVolatile), + getI32Imm(codeAddrSpace), + getI32Imm(vecType), + getI32Imm(fromType), + getI32Imm(fromTypeWidth), + N1, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, + MVT::Other, Ops, 7); + } + + if (NVPTXLD != NULL) { + MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); + MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); + cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1); + } + + return NVPTXLD; +} + +SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + StoreSDNode *ST = cast<StoreSDNode>(N); + EVT StoreVT = ST->getMemoryVT(); + SDNode *NVPTXST = NULL; + + // do not support pre/post inc/dec + if (ST->isIndexed()) + return NULL; + + if (!StoreVT.isSimple()) + return NULL; + + // Address Space Setting + unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget); + + // Volatile Setting + // - .volatile is only availalble for .global and .shared + bool isVolatile = ST->isVolatile(); + if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && + codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && + codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) + isVolatile = false; + + // Vector Setting + MVT SimpleVT = StoreVT.getSimpleVT(); + unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; + if (SimpleVT.isVector()) { + unsigned num = SimpleVT.getVectorNumElements(); + if (num == 2) + vecType = NVPTX::PTXLdStInstCode::V2; + else if (num == 4) + vecType = NVPTX::PTXLdStInstCode::V4; + else + return NULL; + } + + // Type Setting: toType + toTypeWidth + // - for integer type, always use 'u' + // + MVT ScalarVT = SimpleVT.getScalarType(); + unsigned toTypeWidth = ScalarVT.getSizeInBits(); + unsigned int toType; + if (ScalarVT.isFloatingPoint()) + toType = NVPTX::PTXLdStInstCode::Float; + else + toType = NVPTX::PTXLdStInstCode::Unsigned; + + // Create the machine instruction DAG + SDValue Chain = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + SDValue Addr; + SDValue Offset, Base; + unsigned Opcode; + MVT::SimpleValueType SourceVT = + N1.getNode()->getValueType(0).getSimpleVT().SimpleTy; + + if (SelectDirectAddr(N2, Addr)) { + switch (SourceVT) { + case MVT::i8: Opcode = NVPTX::ST_i8_avar; break; + case MVT::i16: Opcode = NVPTX::ST_i16_avar; break; + case MVT::i32: Opcode = NVPTX::ST_i32_avar; break; + case MVT::i64: Opcode = NVPTX::ST_i64_avar; break; + case MVT::f32: Opcode = NVPTX::ST_f32_avar; break; + case MVT::f64: Opcode = NVPTX::ST_f64_avar; break; + case MVT::v2i8: Opcode = NVPTX::ST_v2i8_avar; break; + case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break; + case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break; + case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break; + case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break; + case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break; + case MVT::v4i8: Opcode = NVPTX::ST_v4i8_avar; break; + case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break; + case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break; + case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break; + default: return NULL; + } + SDValue Ops[] = { N1, + getI32Imm(isVolatile), + getI32Imm(codeAddrSpace), + getI32Imm(vecType), + getI32Imm(toType), + getI32Imm(toTypeWidth), + Addr, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, + MVT::Other, Ops, 8); + } else if (Subtarget.is64Bit()? + SelectADDRsi64(N2.getNode(), N2, Base, Offset): + SelectADDRsi(N2.getNode(), N2, Base, Offset)) { + switch (SourceVT) { + case MVT::i8: Opcode = NVPTX::ST_i8_asi; break; + case MVT::i16: Opcode = NVPTX::ST_i16_asi; break; + case MVT::i32: Opcode = NVPTX::ST_i32_asi; break; + case MVT::i64: Opcode = NVPTX::ST_i64_asi; break; + case MVT::f32: Opcode = NVPTX::ST_f32_asi; break; + case MVT::f64: Opcode = NVPTX::ST_f64_asi; break; + case MVT::v2i8: Opcode = NVPTX::ST_v2i8_asi; break; + case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break; + case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break; + case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break; + case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break; + case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break; + case MVT::v4i8: Opcode = NVPTX::ST_v4i8_asi; break; + case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break; + case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break; + case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break; + default: return NULL; + } + SDValue Ops[] = { N1, + getI32Imm(isVolatile), + getI32Imm(codeAddrSpace), + getI32Imm(vecType), + getI32Imm(toType), + getI32Imm(toTypeWidth), + Base, Offset, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, + MVT::Other, Ops, 9); + } else if (Subtarget.is64Bit()? + SelectADDRri64(N2.getNode(), N2, Base, Offset): + SelectADDRri(N2.getNode(), N2, Base, Offset)) { + switch (SourceVT) { + case MVT::i8: Opcode = NVPTX::ST_i8_ari; break; + case MVT::i16: Opcode = NVPTX::ST_i16_ari; break; + case MVT::i32: Opcode = NVPTX::ST_i32_ari; break; + case MVT::i64: Opcode = NVPTX::ST_i64_ari; break; + case MVT::f32: Opcode = NVPTX::ST_f32_ari; break; + case MVT::f64: Opcode = NVPTX::ST_f64_ari; break; + case MVT::v2i8: Opcode = NVPTX::ST_v2i8_ari; break; + case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break; + case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break; + case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break; + case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break; + case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break; + case MVT::v4i8: Opcode = NVPTX::ST_v4i8_ari; break; + case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break; + case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break; + case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break; + default: return NULL; + } + SDValue Ops[] = { N1, + getI32Imm(isVolatile), + getI32Imm(codeAddrSpace), + getI32Imm(vecType), + getI32Imm(toType), + getI32Imm(toTypeWidth), + Base, Offset, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, + MVT::Other, Ops, 9); + } else { + switch (SourceVT) { + case MVT::i8: Opcode = NVPTX::ST_i8_areg; break; + case MVT::i16: Opcode = NVPTX::ST_i16_areg; break; + case MVT::i32: Opcode = NVPTX::ST_i32_areg; break; + case MVT::i64: Opcode = NVPTX::ST_i64_areg; break; + case MVT::f32: Opcode = NVPTX::ST_f32_areg; break; + case MVT::f64: Opcode = NVPTX::ST_f64_areg; break; + case MVT::v2i8: Opcode = NVPTX::ST_v2i8_areg; break; + case MVT::v2i16: Opcode = NVPTX::ST_v2i16_areg; break; + case MVT::v2i32: Opcode = NVPTX::ST_v2i32_areg; break; + case MVT::v2i64: Opcode = NVPTX::ST_v2i64_areg; break; + case MVT::v2f32: Opcode = NVPTX::ST_v2f32_areg; break; + case MVT::v2f64: Opcode = NVPTX::ST_v2f64_areg; break; + case MVT::v4i8: Opcode = NVPTX::ST_v4i8_areg; break; + case MVT::v4i16: Opcode = NVPTX::ST_v4i16_areg; break; + case MVT::v4i32: Opcode = NVPTX::ST_v4i32_areg; break; + case MVT::v4f32: Opcode = NVPTX::ST_v4f32_areg; break; + default: return NULL; + } + SDValue Ops[] = { N1, + getI32Imm(isVolatile), + getI32Imm(codeAddrSpace), + getI32Imm(vecType), + getI32Imm(toType), + getI32Imm(toTypeWidth), + N2, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, + MVT::Other, Ops, 8); + } + + if (NVPTXST != NULL) { + MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); + MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); + cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1); + } + + return NVPTXST; +} + +// SelectDirectAddr - Match a direct address for DAG. +// A direct address could be a globaladdress or externalsymbol. +bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { + // Return true if TGA or ES. + if (N.getOpcode() == ISD::TargetGlobalAddress + || N.getOpcode() == ISD::TargetExternalSymbol) { + Address = N; + return true; + } + if (N.getOpcode() == NVPTXISD::Wrapper) { + Address = N.getOperand(0); + return true; + } + if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) { + unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue(); + if (IID == Intrinsic::nvvm_ptr_gen_to_param) + if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam) + return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address)); + } + return false; +} + +// symbol+offset +bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr, + SDValue &Base, SDValue &Offset, + MVT mvt) { + if (Addr.getOpcode() == ISD::ADD) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { + SDValue base=Addr.getOperand(0); + if (SelectDirectAddr(base, Base)) { + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt); + return true; + } + } + } + return false; +} + +// symbol+offset +bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr, + SDValue &Base, SDValue &Offset) { + return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32); +} + +// symbol+offset +bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr, + SDValue &Base, SDValue &Offset) { + return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64); +} + +// register+offset +bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr, + SDValue &Base, SDValue &Offset, + MVT mvt) { + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); + Offset = CurDAG->getTargetConstant(0, mvt); + return true; + } + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // direct calls. + + if (Addr.getOpcode() == ISD::ADD) { + if (SelectDirectAddr(Addr.getOperand(0), Addr)) { + return false; + } + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { + if (FrameIndexSDNode *FIN = + dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) + // Constant offset from frame ref. + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); + else + Base = Addr.getOperand(0); + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt); + return true; + } + } + return false; +} + +// register+offset +bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr, + SDValue &Base, SDValue &Offset) { + return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32); +} + +// register+offset +bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, + SDValue &Base, SDValue &Offset) { + return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64); +} + +bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, + unsigned int spN) const { + const Value *Src = NULL; + // Even though MemIntrinsicSDNode is a subclas of MemSDNode, + // the classof() for MemSDNode does not include MemIntrinsicSDNode + // (See SelectionDAGNodes.h). So we need to check for both. + if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) { + Src = mN->getSrcValue(); + } + else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) { + Src = mN->getSrcValue(); + } + if (!Src) + return false; + if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) + return (PT->getAddressSpace() == spN); + return false; +} + +/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for +/// inline asm expressions. +bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps) { + SDValue Op0, Op1; + switch (ConstraintCode) { + default: return true; + case 'm': // memory + if (SelectDirectAddr(Op, Op0)) { + OutOps.push_back(Op0); + OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); + return false; + } + if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) { + OutOps.push_back(Op0); + OutOps.push_back(Op1); + return false; + } + break; + } + return true; +} + +// Return true if N is a undef or a constant. +// If N was undef, return a (i8imm 0) in Retval +// If N was imm, convert it to i8imm and return in Retval +// Note: The convert to i8imm is required, otherwise the +// pattern matcher inserts a bunch of IMOVi8rr to convert +// the imm to i8imm, and this causes instruction selection +// to fail. +bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, + SDValue &Retval) { + if (!(N.getOpcode() == ISD::UNDEF) && + !(N.getOpcode() == ISD::Constant)) + return false; + + if (N.getOpcode() == ISD::UNDEF) + Retval = CurDAG->getTargetConstant(0, MVT::i8); + else { + ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode()); + unsigned retval = cn->getZExtValue(); + Retval = CurDAG->getTargetConstant(retval, MVT::i8); + } + return true; +} diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h new file mode 100644 index 0000000..ccd69b2 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -0,0 +1,105 @@ +//===-- NVPTXISelDAGToDAG.h - A dag to dag inst selector for NVPTX --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the NVPTX target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "nvptx-isel" + +#include "NVPTX.h" +#include "NVPTXISelLowering.h" +#include "NVPTXRegisterInfo.h" +#include "NVPTXTargetMachine.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Intrinsics.h" +using namespace llvm; + +namespace { + +class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { + + // If true, generate corresponding FPCONTRACT. This is + // language dependent (i.e. CUDA and OpenCL works differently). + bool doFMADF32; + bool doFMAF64; + bool doFMAF32; + bool doFMAF64AGG; + bool doFMAF32AGG; + bool allowFMA; + + // 0: use div.approx + // 1: use div.full + // 2: For sm_20 and later, ieee-compliant div.rnd.f32 can be generated; + // Otherwise, use div.full + int do_DIVF32_PREC; + + // If true, add .ftz to f32 instructions. + // This is only meaningful for sm_20 and later, as the default + // is not ftz. + // For sm earlier than sm_20, f32 denorms are always ftz by the + // hardware. + // We always add the .ftz modifier regardless of the sm value + // when Use32FTZ is true. + bool UseF32FTZ; + + // If true, generate mul.wide from sext and mul + bool doMulWide; + +public: + explicit NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, + CodeGenOpt::Level OptLevel); + + // Pass Name + virtual const char *getPassName() const { + return "NVPTX DAG->DAG Pattern Instruction Selection"; + } + + const NVPTXSubtarget &Subtarget; + + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps); +private: + // Include the pieces autogenerated from the target description. +#include "NVPTXGenDAGISel.inc" + + SDNode *Select(SDNode *N); + SDNode* SelectLoad(SDNode *N); + SDNode* SelectStore(SDNode *N); + + inline SDValue getI32Imm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i32); + } + + // Match direct address complex pattern. + bool SelectDirectAddr(SDValue N, SDValue &Address); + + bool SelectADDRri_imp(SDNode *OpNode, SDValue Addr, SDValue &Base, + SDValue &Offset, MVT mvt); + bool SelectADDRri(SDNode *OpNode, SDValue Addr, SDValue &Base, + SDValue &Offset); + bool SelectADDRri64(SDNode *OpNode, SDValue Addr, SDValue &Base, + SDValue &Offset); + + bool SelectADDRsi_imp(SDNode *OpNode, SDValue Addr, SDValue &Base, + SDValue &Offset, MVT mvt); + bool SelectADDRsi(SDNode *OpNode, SDValue Addr, SDValue &Base, + SDValue &Offset); + bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base, + SDValue &Offset); + + + bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const; + + bool UndefOrImm(SDValue Op, SDValue N, SDValue &Retval); + +}; +} diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp new file mode 100644 index 0000000..5f925ff --- /dev/null +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -0,0 +1,1294 @@ +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that NVPTX uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + + +#include "NVPTX.h" +#include "NVPTXISelLowering.h" +#include "NVPTXTargetMachine.h" +#include "NVPTXTargetObjectFile.h" +#include "NVPTXUtilities.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalValue.h" +#include "llvm/Module.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/MC/MCSectionELF.h" +#include <sstream> + +#undef DEBUG_TYPE +#define DEBUG_TYPE "nvptx-lower" + +using namespace llvm; + +static unsigned int uniqueCallSite = 0; + +static cl::opt<bool> +RetainVectorOperands("nvptx-codegen-vectors", + cl::desc("NVPTX Specific: Retain LLVM's vectors and generate PTX vectors"), + cl::init(true)); + +static cl::opt<bool> +sched4reg("nvptx-sched4reg", + cl::desc("NVPTX Specific: schedule for register pressue"), + cl::init(false)); + +// NVPTXTargetLowering Constructor. +NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) +: TargetLowering(TM, new NVPTXTargetObjectFile()), + nvTM(&TM), + nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { + + // always lower memset, memcpy, and memmove intrinsics to load/store + // instructions, rather + // then generating calls to memset, mempcy or memmove. + maxStoresPerMemset = (unsigned)0xFFFFFFFF; + maxStoresPerMemcpy = (unsigned)0xFFFFFFFF; + maxStoresPerMemmove = (unsigned)0xFFFFFFFF; + + setBooleanContents(ZeroOrNegativeOneBooleanContent); + + // Jump is Expensive. Don't create extra control flow for 'and', 'or' + // condition branches. + setJumpIsExpensive(true); + + // By default, use the Source scheduling + if (sched4reg) + setSchedulingPreference(Sched::RegPressure); + else + setSchedulingPreference(Sched::Source); + + addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); + addRegisterClass(MVT::i8, &NVPTX::Int8RegsRegClass); + addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); + addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); + addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); + addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); + addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); + + if (RetainVectorOperands) { + addRegisterClass(MVT::v2f32, &NVPTX::V2F32RegsRegClass); + addRegisterClass(MVT::v4f32, &NVPTX::V4F32RegsRegClass); + addRegisterClass(MVT::v2i32, &NVPTX::V2I32RegsRegClass); + addRegisterClass(MVT::v4i32, &NVPTX::V4I32RegsRegClass); + addRegisterClass(MVT::v2f64, &NVPTX::V2F64RegsRegClass); + addRegisterClass(MVT::v2i64, &NVPTX::V2I64RegsRegClass); + addRegisterClass(MVT::v2i16, &NVPTX::V2I16RegsRegClass); + addRegisterClass(MVT::v4i16, &NVPTX::V4I16RegsRegClass); + addRegisterClass(MVT::v2i8, &NVPTX::V2I8RegsRegClass); + addRegisterClass(MVT::v4i8, &NVPTX::V4I8RegsRegClass); + + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32 , Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32 , Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16 , Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8 , Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64 , Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64 , Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32 , Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32 , Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16 , Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i8 , Custom); + + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32 , Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32 , Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16 , Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8 , Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64 , Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64 , Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32 , Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32 , Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16 , Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i8 , Custom); + } + + // Operations not directly supported by NVPTX. + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::Other, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); + + if (nvptxSubtarget.hasROT64()) { + setOperationAction(ISD::ROTL , MVT::i64, Legal); + setOperationAction(ISD::ROTR , MVT::i64, Legal); + } + else { + setOperationAction(ISD::ROTL , MVT::i64, Expand); + setOperationAction(ISD::ROTR , MVT::i64, Expand); + } + if (nvptxSubtarget.hasROT32()) { + setOperationAction(ISD::ROTL , MVT::i32, Legal); + setOperationAction(ISD::ROTR , MVT::i32, Legal); + } + else { + setOperationAction(ISD::ROTL , MVT::i32, Expand); + setOperationAction(ISD::ROTR , MVT::i32, Expand); + } + + setOperationAction(ISD::ROTL , MVT::i16, Expand); + setOperationAction(ISD::ROTR , MVT::i16, Expand); + setOperationAction(ISD::ROTL , MVT::i8, Expand); + setOperationAction(ISD::ROTR , MVT::i8, Expand); + setOperationAction(ISD::BSWAP , MVT::i16, Expand); + setOperationAction(ISD::BSWAP , MVT::i32, Expand); + setOperationAction(ISD::BSWAP , MVT::i64, Expand); + + // Indirect branch is not supported. + // This also disables Jump Table creation. + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); + + setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); + setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); + + // We want to legalize constant related memmove and memcopy + // intrinsics. + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + + // Turn FP extload into load/fextend + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + // Turn FP truncstore into trunc + store. + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + // PTX does not support load / store predicate registers + setOperationAction(ISD::LOAD, MVT::i1, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setOperationAction(ISD::STORE, MVT::i1, Expand); + setTruncStoreAction(MVT::i64, MVT::i1, Expand); + setTruncStoreAction(MVT::i32, MVT::i1, Expand); + setTruncStoreAction(MVT::i16, MVT::i1, Expand); + setTruncStoreAction(MVT::i8, MVT::i1, Expand); + + // This is legal in NVPTX + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + + // TRAP can be lowered to PTX trap + setOperationAction(ISD::TRAP, MVT::Other, Legal); + + // By default, CONCAT_VECTORS is implemented via store/load + // through stack. It is slow and uses local memory. We need + // to custom-lowering them. + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32 , Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32 , Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16 , Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8 , Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64 , Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64 , Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32 , Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f32 , Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i16 , Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i8 , Custom); + + // Expand vector int to float and float to int conversions + // - For SINT_TO_FP and UINT_TO_FP, the src type + // (Node->getOperand(0).getValueType()) + // is used to determine the action, while for FP_TO_UINT and FP_TO_SINT, + // the dest type (Node->getValueType(0)) is used. + // + // See VectorLegalizer::LegalizeOp() (LegalizeVectorOps.cpp) for the vector + // case, and + // SelectionDAGLegalize::LegalizeOp() (LegalizeDAG.cpp) for the scalar case. + // + // That is why v4i32 or v2i32 are used here. + // + // The expansion for vectors happens in VectorLegalizer::LegalizeOp() + // (LegalizeVectorOps.cpp). + setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand); + + // Now deduce the information based on the above mentioned + // actions + computeRegisterProperties(); +} + + +const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return 0; + case NVPTXISD::CALL: return "NVPTXISD::CALL"; + case NVPTXISD::RET_FLAG: return "NVPTXISD::RET_FLAG"; + case NVPTXISD::Wrapper: return "NVPTXISD::Wrapper"; + case NVPTXISD::NVBuiltin: return "NVPTXISD::NVBuiltin"; + case NVPTXISD::DeclareParam: return "NVPTXISD::DeclareParam"; + case NVPTXISD::DeclareScalarParam: + return "NVPTXISD::DeclareScalarParam"; + case NVPTXISD::DeclareRet: return "NVPTXISD::DeclareRet"; + case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam"; + case NVPTXISD::PrintCall: return "NVPTXISD::PrintCall"; + case NVPTXISD::LoadParam: return "NVPTXISD::LoadParam"; + case NVPTXISD::StoreParam: return "NVPTXISD::StoreParam"; + case NVPTXISD::StoreParamS32: return "NVPTXISD::StoreParamS32"; + case NVPTXISD::StoreParamU32: return "NVPTXISD::StoreParamU32"; + case NVPTXISD::MoveToParam: return "NVPTXISD::MoveToParam"; + case NVPTXISD::CallArgBegin: return "NVPTXISD::CallArgBegin"; + case NVPTXISD::CallArg: return "NVPTXISD::CallArg"; + case NVPTXISD::LastCallArg: return "NVPTXISD::LastCallArg"; + case NVPTXISD::CallArgEnd: return "NVPTXISD::CallArgEnd"; + case NVPTXISD::CallVoid: return "NVPTXISD::CallVoid"; + case NVPTXISD::CallVal: return "NVPTXISD::CallVal"; + case NVPTXISD::CallSymbol: return "NVPTXISD::CallSymbol"; + case NVPTXISD::Prototype: return "NVPTXISD::Prototype"; + case NVPTXISD::MoveParam: return "NVPTXISD::MoveParam"; + case NVPTXISD::MoveRetval: return "NVPTXISD::MoveRetval"; + case NVPTXISD::MoveToRetval: return "NVPTXISD::MoveToRetval"; + case NVPTXISD::StoreRetval: return "NVPTXISD::StoreRetval"; + case NVPTXISD::PseudoUseParam: return "NVPTXISD::PseudoUseParam"; + case NVPTXISD::RETURN: return "NVPTXISD::RETURN"; + case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin"; + case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd"; + } +} + + +SDValue +NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); + return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); +} + +std::string NVPTXTargetLowering::getPrototype(Type *retTy, + const ArgListTy &Args, + const SmallVectorImpl<ISD::OutputArg> &Outs, + unsigned retAlignment) const { + + bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + + std::stringstream O; + O << "prototype_" << uniqueCallSite << " : .callprototype "; + + if (retTy->getTypeID() == Type::VoidTyID) + O << "()"; + else { + O << "("; + if (isABI) { + if (retTy->isPrimitiveType() || retTy->isIntegerTy()) { + unsigned size = 0; + if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) { + size = ITy->getBitWidth(); + if (size < 32) size = 32; + } + else { + assert(retTy->isFloatingPointTy() && + "Floating point type expected here"); + size = retTy->getPrimitiveSizeInBits(); + } + + O << ".param .b" << size << " _"; + } + else if (isa<PointerType>(retTy)) + O << ".param .b" << getPointerTy().getSizeInBits() + << " _"; + else { + if ((retTy->getTypeID() == Type::StructTyID) || + isa<VectorType>(retTy)) { + SmallVector<EVT, 16> vtparts; + ComputeValueVTs(*this, retTy, vtparts); + unsigned totalsz = 0; + for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + unsigned elems = 1; + EVT elemtype = vtparts[i]; + if (vtparts[i].isVector()) { + elems = vtparts[i].getVectorNumElements(); + elemtype = vtparts[i].getVectorElementType(); + } + for (unsigned j=0, je=elems; j!=je; ++j) { + unsigned sz = elemtype.getSizeInBits(); + if (elemtype.isInteger() && (sz < 8)) sz = 8; + totalsz += sz/8; + } + } + O << ".param .align " + << retAlignment + << " .b8 _[" + << totalsz << "]"; + } + else { + assert(false && + "Unknown return type"); + } + } + } + else { + SmallVector<EVT, 16> vtparts; + ComputeValueVTs(*this, retTy, vtparts); + unsigned idx = 0; + for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + unsigned elems = 1; + EVT elemtype = vtparts[i]; + if (vtparts[i].isVector()) { + elems = vtparts[i].getVectorNumElements(); + elemtype = vtparts[i].getVectorElementType(); + } + + for (unsigned j=0, je=elems; j!=je; ++j) { + unsigned sz = elemtype.getSizeInBits(); + if (elemtype.isInteger() && (sz < 32)) sz = 32; + O << ".reg .b" << sz << " _"; + if (j<je-1) O << ", "; + ++idx; + } + if (i < e-1) + O << ", "; + } + } + O << ") "; + } + O << "_ ("; + + bool first = true; + MVT thePointerTy = getPointerTy(); + + for (unsigned i=0,e=Args.size(); i!=e; ++i) { + const Type *Ty = Args[i].Ty; + if (!first) { + O << ", "; + } + first = false; + + if (Outs[i].Flags.isByVal() == false) { + unsigned sz = 0; + if (isa<IntegerType>(Ty)) { + sz = cast<IntegerType>(Ty)->getBitWidth(); + if (sz < 32) sz = 32; + } + else if (isa<PointerType>(Ty)) + sz = thePointerTy.getSizeInBits(); + else + sz = Ty->getPrimitiveSizeInBits(); + if (isABI) + O << ".param .b" << sz << " "; + else + O << ".reg .b" << sz << " "; + O << "_"; + continue; + } + const PointerType *PTy = dyn_cast<PointerType>(Ty); + assert(PTy && + "Param with byval attribute should be a pointer type"); + Type *ETy = PTy->getElementType(); + + if (isABI) { + unsigned align = Outs[i].Flags.getByValAlign(); + unsigned sz = getTargetData()->getTypeAllocSize(ETy); + O << ".param .align " << align + << " .b8 "; + O << "_"; + O << "[" << sz << "]"; + continue; + } + else { + SmallVector<EVT, 16> vtparts; + ComputeValueVTs(*this, ETy, vtparts); + for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + unsigned elems = 1; + EVT elemtype = vtparts[i]; + if (vtparts[i].isVector()) { + elems = vtparts[i].getVectorNumElements(); + elemtype = vtparts[i].getVectorElementType(); + } + + for (unsigned j=0,je=elems; j!=je; ++j) { + unsigned sz = elemtype.getSizeInBits(); + if (elemtype.isInteger() && (sz < 32)) sz = 32; + O << ".reg .b" << sz << " "; + O << "_"; + if (j<je-1) O << ", "; + } + if (i<e-1) + O << ", "; + } + continue; + } + } + O << ");"; + return O.str(); +} + + +#if 0 +SDValue +NVPTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool doesNotRet, bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals, Type *retTy, + const ArgListTy &Args) const { + bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + + SDValue tempChain = Chain; + Chain = DAG.getCALLSEQ_START(Chain, + DAG.getIntPtrConstant(uniqueCallSite, true)); + SDValue InFlag = Chain.getValue(1); + + assert((Outs.size() == Args.size()) && + "Unexpected number of arguments to function call"); + unsigned paramCount = 0; + // Declare the .params or .reg need to pass values + // to the function + for (unsigned i=0, e=Outs.size(); i!=e; ++i) { + EVT VT = Outs[i].VT; + + if (Outs[i].Flags.isByVal() == false) { + // Plain scalar + // for ABI, declare .param .b<size> .param<n>; + // for nonABI, declare .reg .b<size> .param<n>; + unsigned isReg = 1; + if (isABI) + isReg = 0; + unsigned sz = VT.getSizeInBits(); + if (VT.isInteger() && (sz < 32)) sz = 32; + SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue DeclareParamOps[] = { Chain, + DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(sz, MVT::i32), + DAG.getConstant(isReg, MVT::i32), + InFlag }; + Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, + DeclareParamOps, 5); + InFlag = Chain.getValue(1); + SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(0, MVT::i32), OutVals[i], InFlag }; + + unsigned opcode = NVPTXISD::StoreParam; + if (isReg) + opcode = NVPTXISD::MoveToParam; + else { + if (Outs[i].Flags.isZExt()) + opcode = NVPTXISD::StoreParamU32; + else if (Outs[i].Flags.isSExt()) + opcode = NVPTXISD::StoreParamS32; + } + Chain = DAG.getNode(opcode, dl, CopyParamVTs, CopyParamOps, 5); + + InFlag = Chain.getValue(1); + ++paramCount; + continue; + } + // struct or vector + SmallVector<EVT, 16> vtparts; + const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty); + assert(PTy && + "Type of a byval parameter should be pointer"); + ComputeValueVTs(*this, PTy->getElementType(), vtparts); + + if (isABI) { + // declare .param .align 16 .b8 .param<n>[<size>]; + unsigned sz = Outs[i].Flags.getByValSize(); + SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + // The ByValAlign in the Outs[i].Flags is alway set at this point, so we + // don't need to + // worry about natural alignment or not. See TargetLowering::LowerCallTo() + SDValue DeclareParamOps[] = { Chain, + DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32), + DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(sz, MVT::i32), + InFlag }; + Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, + DeclareParamOps, 5); + InFlag = Chain.getValue(1); + unsigned curOffset = 0; + for (unsigned j=0,je=vtparts.size(); j!=je; ++j) { + unsigned elems = 1; + EVT elemtype = vtparts[j]; + if (vtparts[j].isVector()) { + elems = vtparts[j].getVectorNumElements(); + elemtype = vtparts[j].getVectorElementType(); + } + for (unsigned k=0,ke=elems; k!=ke; ++k) { + unsigned sz = elemtype.getSizeInBits(); + if (elemtype.isInteger() && (sz < 8)) sz = 8; + SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), + OutVals[i], + DAG.getConstant(curOffset, + getPointerTy())); + SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, + MachinePointerInfo(), false, false, false, 0); + SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, + MVT::i32), + DAG.getConstant(curOffset, MVT::i32), + theVal, InFlag }; + Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs, + CopyParamOps, 5); + InFlag = Chain.getValue(1); + curOffset += sz/8; + } + } + ++paramCount; + continue; + } + // Non-abi, struct or vector + // Declare a bunch or .reg .b<size> .param<n> + unsigned curOffset = 0; + for (unsigned j=0,je=vtparts.size(); j!=je; ++j) { + unsigned elems = 1; + EVT elemtype = vtparts[j]; + if (vtparts[j].isVector()) { + elems = vtparts[j].getVectorNumElements(); + elemtype = vtparts[j].getVectorElementType(); + } + for (unsigned k=0,ke=elems; k!=ke; ++k) { + unsigned sz = elemtype.getSizeInBits(); + if (elemtype.isInteger() && (sz < 32)) sz = 32; + SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount, + MVT::i32), + DAG.getConstant(sz, MVT::i32), + DAG.getConstant(1, MVT::i32), + InFlag }; + Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, + DeclareParamOps, 5); + InFlag = Chain.getValue(1); + SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i], + DAG.getConstant(curOffset, + getPointerTy())); + SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, + MachinePointerInfo(), false, false, false, 0); + SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(0, MVT::i32), theVal, + InFlag }; + Chain = DAG.getNode(NVPTXISD::MoveToParam, dl, CopyParamVTs, + CopyParamOps, 5); + InFlag = Chain.getValue(1); + ++paramCount; + } + } + } + + GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode()); + unsigned retAlignment = 0; + + // Handle Result + unsigned retCount = 0; + if (Ins.size() > 0) { + SmallVector<EVT, 16> resvtparts; + ComputeValueVTs(*this, retTy, resvtparts); + + // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or + // individual .reg .b<size> func_retval<0..> for non ABI + unsigned resultsz = 0; + for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) { + unsigned elems = 1; + EVT elemtype = resvtparts[i]; + if (resvtparts[i].isVector()) { + elems = resvtparts[i].getVectorNumElements(); + elemtype = resvtparts[i].getVectorElementType(); + } + for (unsigned j=0,je=elems; j!=je; ++j) { + unsigned sz = elemtype.getSizeInBits(); + if (isABI == false) { + if (elemtype.isInteger() && (sz < 32)) sz = 32; + } + else { + if (elemtype.isInteger() && (sz < 8)) sz = 8; + } + if (isABI == false) { + SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue DeclareRetOps[] = { Chain, DAG.getConstant(2, MVT::i32), + DAG.getConstant(sz, MVT::i32), + DAG.getConstant(retCount, MVT::i32), + InFlag }; + Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, + DeclareRetOps, 5); + InFlag = Chain.getValue(1); + ++retCount; + } + resultsz += sz; + } + } + if (isABI) { + if (retTy->isPrimitiveType() || retTy->isIntegerTy() || + retTy->isPointerTy() ) { + // Scalar needs to be at least 32bit wide + if (resultsz < 32) + resultsz = 32; + SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), + DAG.getConstant(resultsz, MVT::i32), + DAG.getConstant(0, MVT::i32), InFlag }; + Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, + DeclareRetOps, 5); + InFlag = Chain.getValue(1); + } + else { + // @TODO: Re-enable getAlign calls. We do not have the + // ImmutableCallSite object here anymore. + //if (Func) { // direct call + //if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment)) + //retAlignment = TD->getABITypeAlignment(retTy); + //} + //else { // indirect call + //const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction()); + //if (!llvm::getAlign(*CallI, 0, retAlignment)) + //retAlignment = TD->getABITypeAlignment(retTy); + //} + // @TODO: Remove this hack! + // Functions with explicit alignment metadata will be broken, for now. + retAlignment = 16; + SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment, + MVT::i32), + DAG.getConstant(resultsz/8, MVT::i32), + DAG.getConstant(0, MVT::i32), InFlag }; + Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, + DeclareRetOps, 5); + InFlag = Chain.getValue(1); + } + } + } + + if (!Func) { + // This is indirect function call case : PTX requires a prototype of the + // form + // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); + // to be emitted, and the label has to used as the last arg of call + // instruction. + // The prototype is embedded in a string and put as the operand for an + // INLINEASM SDNode. + SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue); + std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment); + const char *asmstr = nvTM->getManagedStrPool()-> + getManagedString(proto_string.c_str())->c_str(); + SDValue InlineAsmOps[] = { Chain, + DAG.getTargetExternalSymbol(asmstr, + getPointerTy()), + DAG.getMDNode(0), + DAG.getTargetConstant(0, MVT::i32), InFlag }; + Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5); + InFlag = Chain.getValue(1); + } + // Op to just print "call" + SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue PrintCallOps[] = { Chain, + DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1) + : retCount, MVT::i32), + InFlag }; + Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl, + PrintCallVTs, PrintCallOps, 3); + InFlag = Chain.getValue(1); + + // Ops to print out the function name + SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CallVoidOps[] = { Chain, Callee, InFlag }; + Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3); + InFlag = Chain.getValue(1); + + // Ops to print out the param list + SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CallArgBeginOps[] = { Chain, InFlag }; + Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, + CallArgBeginOps, 2); + InFlag = Chain.getValue(1); + + for (unsigned i=0, e=paramCount; i!=e; ++i) { + unsigned opcode; + if (i==(e-1)) + opcode = NVPTXISD::LastCallArg; + else + opcode = NVPTXISD::CallArg; + SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32), + DAG.getConstant(i, MVT::i32), + InFlag }; + Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4); + InFlag = Chain.getValue(1); + } + SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CallArgEndOps[] = { Chain, + DAG.getConstant(Func ? 1 : 0, MVT::i32), + InFlag }; + Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, + 3); + InFlag = Chain.getValue(1); + + if (!Func) { + SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue PrototypeOps[] = { Chain, + DAG.getConstant(uniqueCallSite, MVT::i32), + InFlag }; + Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3); + InFlag = Chain.getValue(1); + } + + // Generate loads from param memory/moves from registers for result + if (Ins.size() > 0) { + if (isABI) { + unsigned resoffset = 0; + for (unsigned i=0,e=Ins.size(); i!=e; ++i) { + unsigned sz = Ins[i].VT.getSizeInBits(); + if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8; + std::vector<EVT> LoadRetVTs; + LoadRetVTs.push_back(Ins[i].VT); + LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue); + std::vector<SDValue> LoadRetOps; + LoadRetOps.push_back(Chain); + LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); + LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32)); + LoadRetOps.push_back(InFlag); + SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs, + &LoadRetOps[0], LoadRetOps.size()); + Chain = retval.getValue(1); + InFlag = retval.getValue(2); + InVals.push_back(retval); + resoffset += sz/8; + } + } + else { + SmallVector<EVT, 16> resvtparts; + ComputeValueVTs(*this, retTy, resvtparts); + + assert(Ins.size() == resvtparts.size() && + "Unexpected number of return values in non-ABI case"); + unsigned paramNum = 0; + for (unsigned i=0,e=Ins.size(); i!=e; ++i) { + assert(EVT(Ins[i].VT) == resvtparts[i] && + "Unexpected EVT type in non-ABI case"); + unsigned numelems = 1; + EVT elemtype = Ins[i].VT; + if (Ins[i].VT.isVector()) { + numelems = Ins[i].VT.getVectorNumElements(); + elemtype = Ins[i].VT.getVectorElementType(); + } + std::vector<SDValue> tempRetVals; + for (unsigned j=0; j<numelems; ++j) { + std::vector<EVT> MoveRetVTs; + MoveRetVTs.push_back(elemtype); + MoveRetVTs.push_back(MVT::Other); MoveRetVTs.push_back(MVT::Glue); + std::vector<SDValue> MoveRetOps; + MoveRetOps.push_back(Chain); + MoveRetOps.push_back(DAG.getConstant(0, MVT::i32)); + MoveRetOps.push_back(DAG.getConstant(paramNum, MVT::i32)); + MoveRetOps.push_back(InFlag); + SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs, + &MoveRetOps[0], MoveRetOps.size()); + Chain = retval.getValue(1); + InFlag = retval.getValue(2); + tempRetVals.push_back(retval); + ++paramNum; + } + if (Ins[i].VT.isVector()) + InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, Ins[i].VT, + &tempRetVals[0], tempRetVals.size())); + else + InVals.push_back(tempRetVals[0]); + } + } + } + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getIntPtrConstant(uniqueCallSite, true), + DAG.getIntPtrConstant(uniqueCallSite+1, true), + InFlag); + uniqueCallSite++; + + // set isTailCall to false for now, until we figure out how to express + // tail call optimization in PTX + isTailCall = false; + return Chain; +} +#endif + +// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() +// (see LegalizeDAG.cpp). This is slow and uses local memory. +// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 +SDValue NVPTXTargetLowering:: +LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + DebugLoc dl = Node->getDebugLoc(); + SmallVector<SDValue, 8> Ops; + unsigned NumOperands = Node->getNumOperands(); + for (unsigned i=0; i < NumOperands; ++i) { + SDValue SubOp = Node->getOperand(i); + EVT VVT = SubOp.getNode()->getValueType(0); + EVT EltVT = VVT.getVectorElementType(); + unsigned NumSubElem = VVT.getVectorNumElements(); + for (unsigned j=0; j < NumSubElem; ++j) { + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, + DAG.getIntPtrConstant(j))); + } + } + return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), + &Ops[0], Ops.size()); +} + +SDValue NVPTXTargetLowering:: +LowerOperation(SDValue Op, SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + case ISD::RETURNADDR: return SDValue(); + case ISD::FRAMEADDR: return SDValue(); + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: return Op; + case ISD::BUILD_VECTOR: + case ISD::EXTRACT_SUBVECTOR: + return Op; + case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + default: + assert(0 && "Custom lowering not defined for operation"); + } +} + +SDValue +NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx, + EVT v) const { + std::string *name = nvTM->getManagedStrPool()->getManagedString(inname); + std::stringstream suffix; + suffix << idx; + *name += suffix.str(); + return DAG.getTargetExternalSymbol(name->c_str(), v); +} + +SDValue +NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { + return getExtSymb(DAG, ".PARAM", idx, v); +} + +SDValue +NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { + return getExtSymb(DAG, ".HLPPARAM", idx); +} + +// Check to see if the kernel argument is image*_t or sampler_t + +bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { + const char *specialTypes[] = { + "struct._image2d_t", + "struct._image3d_t", + "struct._sampler_t" + }; + + const Type *Ty = arg->getType(); + const PointerType *PTy = dyn_cast<PointerType>(Ty); + + if (!PTy) + return false; + + if (!context) + return false; + + const StructType *STy = dyn_cast<StructType>(PTy->getElementType()); + const std::string TypeName = STy ? STy->getName() : ""; + + for (int i=0, e=sizeof(specialTypes)/sizeof(specialTypes[0]); i!=e; ++i) + if (TypeName == specialTypes[i]) + return true; + + return false; +} + +SDValue +NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + const TargetData *TD = getTargetData(); + + const Function *F = MF.getFunction(); + const AttrListPtr &PAL = F->getAttributes(); + + SDValue Root = DAG.getRoot(); + std::vector<SDValue> OutChains; + + bool isKernel = llvm::isKernelFunction(*F); + bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + + std::vector<Type *> argTypes; + std::vector<const Argument *> theArgs; + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I) { + theArgs.push_back(I); + argTypes.push_back(I->getType()); + } + assert(argTypes.size() == Ins.size() && + "Ins types and function types did not match"); + + int idx = 0; + for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) { + Type *Ty = argTypes[i]; + EVT ObjectVT = getValueType(Ty); + assert(ObjectVT == Ins[i].VT && + "Ins type did not match function type"); + + // If the kernel argument is image*_t or sampler_t, convert it to + // a i32 constant holding the parameter position. This can later + // matched in the AsmPrinter to output the correct mangled name. + if (isImageOrSamplerVal(theArgs[i], + (theArgs[i]->getParent() ? + theArgs[i]->getParent()->getParent() : 0))) { + assert(isKernel && "Only kernels can have image/sampler params"); + InVals.push_back(DAG.getConstant(i+1, MVT::i32)); + continue; + } + + if (theArgs[i]->use_empty()) { + // argument is dead + InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT)); + continue; + } + + // In the following cases, assign a node order of "idx+1" + // to newly created nodes. The SDNOdes for params have to + // appear in the same order as their order of appearance + // in the original function. "idx+1" holds that order. + if (PAL.paramHasAttr(i+1, Attribute::ByVal) == false) { + // A plain scalar. + if (isABI || isKernel) { + // If ABI, load from the param symbol + SDValue Arg = getParamSymbol(DAG, idx); + Value *srcValue = new Argument(PointerType::get(ObjectVT.getTypeForEVT( + F->getContext()), + llvm::ADDRESS_SPACE_PARAM)); + SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg, + MachinePointerInfo(srcValue), false, false, + false, + TD->getABITypeAlignment(ObjectVT.getTypeForEVT( + F->getContext()))); + if (p.getNode()) + DAG.AssignOrdering(p.getNode(), idx+1); + InVals.push_back(p); + } + else { + // If no ABI, just move the param symbol + SDValue Arg = getParamSymbol(DAG, idx, ObjectVT); + SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); + if (p.getNode()) + DAG.AssignOrdering(p.getNode(), idx+1); + InVals.push_back(p); + } + continue; + } + + // Param has ByVal attribute + if (isABI || isKernel) { + // Return MoveParam(param symbol). + // Ideally, the param symbol can be returned directly, + // but when SDNode builder decides to use it in a CopyToReg(), + // machine instruction fails because TargetExternalSymbol + // (not lowered) is target dependent, and CopyToReg assumes + // the source is lowered. + SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); + SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); + if (p.getNode()) + DAG.AssignOrdering(p.getNode(), idx+1); + if (isKernel) + InVals.push_back(p); + else { + SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, + DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), + p); + InVals.push_back(p2); + } + } else { + // Have to move a set of param symbols to registers and + // store them locally and return the local pointer in InVals + const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]); + assert(elemPtrType && + "Byval parameter should be a pointer type"); + Type *elemType = elemPtrType->getElementType(); + // Compute the constituent parts + SmallVector<EVT, 16> vtparts; + SmallVector<uint64_t, 16> offsets; + ComputeValueVTs(*this, elemType, vtparts, &offsets, 0); + unsigned totalsize = 0; + for (unsigned j=0, je=vtparts.size(); j!=je; ++j) + totalsize += vtparts[j].getStoreSizeInBits(); + SDValue localcopy = DAG.getFrameIndex(MF.getFrameInfo()-> + CreateStackObject(totalsize/8, 16, false), + getPointerTy()); + unsigned sizesofar = 0; + std::vector<SDValue> theChains; + for (unsigned j=0, je=vtparts.size(); j!=je; ++j) { + unsigned numElems = 1; + if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements(); + for (unsigned k=0, ke=numElems; k!=ke; ++k) { + EVT tmpvt = vtparts[j]; + if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType(); + SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt, + getParamSymbol(DAG, idx, tmpvt)); + SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy, + DAG.getConstant(sizesofar, getPointerTy())); + theChains.push_back(DAG.getStore(Chain, dl, arg, addr, + MachinePointerInfo(), false, false, 0)); + sizesofar += tmpvt.getStoreSizeInBits()/8; + ++idx; + } + } + --idx; + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &theChains[0], + theChains.size()); + InVals.push_back(localcopy); + } + } + + // Clang will check explicit VarArg and issue error if any. However, Clang + // will let code with + // implicit var arg like f() pass. + // We treat this case as if the arg list is empty. + //if (F.isVarArg()) { + // assert(0 && "VarArg not supported yet!"); + //} + + if (!OutChains.empty()) + DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size())); + + return Chain; +} + +SDValue +NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, SelectionDAG &DAG) const { + + bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + + unsigned sizesofar = 0; + unsigned idx = 0; + for (unsigned i=0, e=Outs.size(); i!=e; ++i) { + SDValue theVal = OutVals[i]; + EVT theValType = theVal.getValueType(); + unsigned numElems = 1; + if (theValType.isVector()) numElems = theValType.getVectorNumElements(); + for (unsigned j=0,je=numElems; j!=je; ++j) { + SDValue tmpval = theVal; + if (theValType.isVector()) + tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + theValType.getVectorElementType(), + tmpval, DAG.getIntPtrConstant(j)); + Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval, + dl, MVT::Other, + Chain, + DAG.getConstant(isABI ? sizesofar : idx, MVT::i32), + tmpval); + if (theValType.isVector()) + sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8; + else + sizesofar += theValType.getStoreSizeInBits()/8; + ++idx; + } + } + + return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); +} + +void +NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const +{ + if (Constraint.length() > 1) + return; + else + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + +// NVPTX suuport vector of legal types of any length in Intrinsics because the +// NVPTX specific type legalizer +// will legalize them to the PTX supported length. +bool +NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { + if (isTypeLegal(VT)) + return true; + if (VT.isVector()) { + MVT eVT = VT.getVectorElementType(); + if (isTypeLegal(eVT)) + return true; + } + return false; +} + + +// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as +// TgtMemIntrinsic +// because we need the information that is only available in the "Value" type +// of destination +// pointer. In particular, the address space information. +bool +NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I, + unsigned Intrinsic) const { + switch (Intrinsic) { + default: + return false; + + case Intrinsic::nvvm_atomic_load_add_f32: + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::f32; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.vol = 0; + Info.readMem = true; + Info.writeMem = true; + Info.align = 0; + return true; + + case Intrinsic::nvvm_atomic_load_inc_32: + case Intrinsic::nvvm_atomic_load_dec_32: + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::i32; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.vol = 0; + Info.readMem = true; + Info.writeMem = true; + Info.align = 0; + return true; + + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldu_global_p: + + Info.opc = ISD::INTRINSIC_W_CHAIN; + if (Intrinsic == Intrinsic::nvvm_ldu_global_i) + Info.memVT = MVT::i32; + else if (Intrinsic == Intrinsic::nvvm_ldu_global_p) + Info.memVT = getPointerTy(); + else + Info.memVT = MVT::f32; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.vol = 0; + Info.readMem = true; + Info.writeMem = false; + Info.align = 0; + return true; + + } + return false; +} + +/// isLegalAddressingMode - Return true if the addressing mode represented +/// by AM is legal for this target, for a load/store of the specified type. +/// Used to guide target specific optimizations, like loop strength reduction +/// (LoopStrengthReduce.cpp) and memory optimization for address mode +/// (CodeGenPrepare.cpp) +bool +NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { + + // AddrMode - This represents an addressing mode of: + // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + // + // The legal address modes are + // - [avar] + // - [areg] + // - [areg+immoff] + // - [immAddr] + + if (AM.BaseGV) { + if (AM.BaseOffs || AM.HasBaseReg || AM.Scale) + return false; + return true; + } + + switch (AM.Scale) { + case 0: // "r", "r+i" or "i" is allowed + break; + case 1: + if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. + return false; + // Otherwise we have r+i. + break; + default: + // No scale > 1 is allowed + return false; + } + return true; +} + +//===----------------------------------------------------------------------===// +// NVPTX Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +NVPTXTargetLowering::ConstraintType +NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: + break; + case 'r': + case 'h': + case 'c': + case 'l': + case 'f': + case 'd': + case '0': + case 'N': + return C_RegisterClass; + } + } + return TargetLowering::getConstraintType(Constraint); +} + + +std::pair<unsigned, const TargetRegisterClass*> +NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'c': + return std::make_pair(0U, &NVPTX::Int8RegsRegClass); + case 'h': + return std::make_pair(0U, &NVPTX::Int16RegsRegClass); + case 'r': + return std::make_pair(0U, &NVPTX::Int32RegsRegClass); + case 'l': + case 'N': + return std::make_pair(0U, &NVPTX::Int64RegsRegClass); + case 'f': + return std::make_pair(0U, &NVPTX::Float32RegsRegClass); + case 'd': + return std::make_pair(0U, &NVPTX::Float64RegsRegClass); + } + } + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + + + +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { + return 4; +} diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h new file mode 100644 index 0000000..5707ad7 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -0,0 +1,153 @@ +//===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that NVPTX uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTXISELLOWERING_H +#define NVPTXISELLOWERING_H + +#include "NVPTX.h" +#include "NVPTXSubtarget.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { +namespace NVPTXISD { +enum NodeType { + // Start the numbering from where ISD NodeType finishes. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + Wrapper, + CALL, + RET_FLAG, + LOAD_PARAM, + NVBuiltin, + DeclareParam, + DeclareScalarParam, + DeclareRetParam, + DeclareRet, + DeclareScalarRet, + LoadParam, + StoreParam, + StoreParamS32, // to sext and store a <32bit value, not used currently + StoreParamU32, // to zext and store a <32bit value, not used currently + MoveToParam, + PrintCall, + PrintCallUni, + CallArgBegin, + CallArg, + LastCallArg, + CallArgEnd, + CallVoid, + CallVal, + CallSymbol, + Prototype, + MoveParam, + MoveRetval, + MoveToRetval, + StoreRetval, + PseudoUseParam, + RETURN, + CallSeqBegin, + CallSeqEnd, + Dummy +}; +} + +//===--------------------------------------------------------------------===// +// TargetLowering Implementation +//===--------------------------------------------------------------------===// +class NVPTXTargetLowering : public TargetLowering { +public: + explicit NVPTXTargetLowering(NVPTXTargetMachine &TM); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset, + SelectionDAG &DAG) const; + + virtual const char *getTargetNodeName(unsigned Opcode) const; + + bool isTypeSupportedInIntrinsic(MVT VT) const; + + bool getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I, + unsigned Intrinsic) const; + + /// isLegalAddressingMode - Return true if the addressing mode represented + /// by AM is legal for this target, for a load/store of the specified type + /// Used to guide target specific optimizations, like loop strength + /// reduction (LoopStrengthReduce.cpp) and memory optimization for + /// address mode (CodeGenPrepare.cpp) + virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const; + + /// getFunctionAlignment - Return the Log2 alignment of this function. + virtual unsigned getFunctionAlignment(const Function *F) const; + + virtual EVT getSetCCResultType(EVT VT) const { + return MVT::i1; + } + + ConstraintType getConstraintType(const std::string &Constraint) const; + std::pair<unsigned, const TargetRegisterClass*> + getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; + + virtual SDValue + LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + // This will be re-added once the necessary changes to LowerCallTo are + // upstreamed. + // virtual SDValue + // LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, + // bool isVarArg, bool doesNotRet, bool &isTailCall, + // const SmallVectorImpl<ISD::OutputArg> &Outs, + // const SmallVectorImpl<SDValue> &OutVals, + // const SmallVectorImpl<ISD::InputArg> &Ins, + // DebugLoc dl, SelectionDAG &DAG, + // SmallVectorImpl<SDValue> &InVals, + // Type *retTy, const ArgListTy &Args) const; + + std::string getPrototype(Type *, const ArgListTy &, + const SmallVectorImpl<ISD::OutputArg> &, + unsigned retAlignment) const; + + virtual SDValue + LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, + SelectionDAG &DAG) const; + + virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const; + + NVPTXTargetMachine *nvTM; + + // PTX always uses 32-bit shift amounts + virtual MVT getShiftAmountTy(EVT LHSTy) const { + return MVT::i32; + } + +private: + const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here + + SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT = + MVT::i32) const; + SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT = MVT::i32) const; + SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx); + + SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; +}; +} // namespace llvm + +#endif // NVPTXISELLOWERING_H diff --git a/lib/Target/NVPTX/NVPTXInstrFormats.td b/lib/Target/NVPTX/NVPTXInstrFormats.td new file mode 100644 index 0000000..f11f1b8 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXInstrFormats.td @@ -0,0 +1,43 @@ +//===- NVPTXInstrFormats.td - NVPTX Instruction Formats-------*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe NVPTX instructions format +// +//===----------------------------------------------------------------------===// + +// Vector instruction type enum +class VecInstTypeEnum<bits<4> val> { + bits<4> Value=val; +} +def VecNOP : VecInstTypeEnum<0>; + +// Generic NVPTX Format + +class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern> + : Instruction { + field bits<14> Inst; + + let Namespace = "NVPTX"; + dag OutOperandList = outs; + dag InOperandList = ins; + let AsmString = asmstr; + let Pattern = pattern; + + // TSFlagFields + bits<4> VecInstType = VecNOP.Value; + bit IsSimpleMove = 0; + bit IsLoad = 0; + bit IsStore = 0; + + let TSFlags{3-0} = VecInstType; + let TSFlags{4-4} = IsSimpleMove; + let TSFlags{5-5} = IsLoad; + let TSFlags{6-6} = IsStore; +} diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp new file mode 100644 index 0000000..b57592d --- /dev/null +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -0,0 +1,326 @@ +//===- NVPTXInstrInfo.cpp - NVPTX Instruction Information -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the NVPTX implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "NVPTXInstrInfo.h" +#include "NVPTXTargetMachine.h" +#define GET_INSTRINFO_CTOR +#include "NVPTXGenInstrInfo.inc" +#include "llvm/Function.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include <cstdio> + + +using namespace llvm; + +// FIXME: Add the subtarget support on this constructor. +NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm) +: NVPTXGenInstrInfo(), + TM(tm), + RegInfo(*this, *TM.getSubtargetImpl()) {} + + +void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + if (NVPTX::Int32RegsRegClass.contains(DestReg) && + NVPTX::Int32RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::Int8RegsRegClass.contains(DestReg) && + NVPTX::Int8RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::Int1RegsRegClass.contains(DestReg) && + NVPTX::Int1RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::Float32RegsRegClass.contains(DestReg) && + NVPTX::Float32RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::Int16RegsRegClass.contains(DestReg) && + NVPTX::Int16RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::Int64RegsRegClass.contains(DestReg) && + NVPTX::Int64RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::Float64RegsRegClass.contains(DestReg) && + NVPTX::Float64RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::V4F32RegsRegClass.contains(DestReg) && + NVPTX::V4F32RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::V4f32Mov), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::V4I32RegsRegClass.contains(DestReg) && + NVPTX::V4I32RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::V4i32Mov), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::V2F32RegsRegClass.contains(DestReg) && + NVPTX::V2F32RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::V2f32Mov), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::V2I32RegsRegClass.contains(DestReg) && + NVPTX::V2I32RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::V2i32Mov), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::V4I8RegsRegClass.contains(DestReg) && + NVPTX::V4I8RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::V4i8Mov), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::V2I8RegsRegClass.contains(DestReg) && + NVPTX::V2I8RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::V2i8Mov), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::V4I16RegsRegClass.contains(DestReg) && + NVPTX::V4I16RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::V4i16Mov), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::V2I16RegsRegClass.contains(DestReg) && + NVPTX::V2I16RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::V2i16Mov), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::V2I64RegsRegClass.contains(DestReg) && + NVPTX::V2I64RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::V2i64Mov), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::V2F64RegsRegClass.contains(DestReg) && + NVPTX::V2F64RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::V2f64Mov), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else { + assert(0 && "Don't know how to copy a register"); + } +} + +bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, + unsigned &DestReg) const { + // Look for the appropriate part of TSFlags + bool isMove = false; + + unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >> + NVPTX::SimpleMoveShift; + isMove = (TSFlags == 1); + + if (isMove) { + MachineOperand dest = MI.getOperand(0); + MachineOperand src = MI.getOperand(1); + assert(dest.isReg() && "dest of a movrr is not a reg"); + assert(src.isReg() && "src of a movrr is not a reg"); + + SrcReg = src.getReg(); + DestReg = dest.getReg(); + return true; + } + + return false; +} + +bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const +{ + switch (MI.getOpcode()) { + default: return false; + case NVPTX::INT_PTX_SREG_NTID_X: + case NVPTX::INT_PTX_SREG_NTID_Y: + case NVPTX::INT_PTX_SREG_NTID_Z: + case NVPTX::INT_PTX_SREG_TID_X: + case NVPTX::INT_PTX_SREG_TID_Y: + case NVPTX::INT_PTX_SREG_TID_Z: + case NVPTX::INT_PTX_SREG_CTAID_X: + case NVPTX::INT_PTX_SREG_CTAID_Y: + case NVPTX::INT_PTX_SREG_CTAID_Z: + case NVPTX::INT_PTX_SREG_NCTAID_X: + case NVPTX::INT_PTX_SREG_NCTAID_Y: + case NVPTX::INT_PTX_SREG_NCTAID_Z: + case NVPTX::INT_PTX_SREG_WARPSIZE: + return true; + } +} + + +bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI, + unsigned &AddrSpace) const { + bool isLoad = false; + unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isLoadMask) >> + NVPTX::isLoadShift; + isLoad = (TSFlags == 1); + if (isLoad) + AddrSpace = getLdStCodeAddrSpace(MI); + return isLoad; +} + +bool NVPTXInstrInfo::isStoreInstr(const MachineInstr &MI, + unsigned &AddrSpace) const { + bool isStore = false; + unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isStoreMask) >> + NVPTX::isStoreShift; + isStore = (TSFlags == 1); + if (isStore) + AddrSpace = getLdStCodeAddrSpace(MI); + return isStore; +} + + +bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const { + unsigned addrspace = 0; + if (MI->getOpcode() == NVPTX::INT_CUDA_SYNCTHREADS) + return false; + if (isLoadInstr(*MI, addrspace)) + if (addrspace == NVPTX::PTXLdStInstCode::SHARED) + return false; + if (isStoreInstr(*MI, addrspace)) + if (addrspace == NVPTX::PTXLdStInstCode::SHARED) + return false; + return true; +} + + +/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning +/// true if it cannot be understood (e.g. it's a switch dispatch or isn't +/// implemented for a target). Upon success, this returns false and returns +/// with the following information in various cases: +/// +/// 1. If this block ends with no branches (it just falls through to its succ) +/// just return false, leaving TBB/FBB null. +/// 2. If this block ends with only an unconditional branch, it sets TBB to be +/// the destination block. +/// 3. If this block ends with an conditional branch and it falls through to +/// an successor block, it sets TBB to be the branch destination block and a +/// list of operands that evaluate the condition. These +/// operands can be passed to other TargetInstrInfo methods to create new +/// branches. +/// 4. If this block ends with an conditional branch and an unconditional +/// block, it returns the 'true' destination in TBB, the 'false' destination +/// in FBB, and a list of operands that evaluate the condition. These +/// operands can be passed to other TargetInstrInfo methods to create new +/// branches. +/// +/// Note that RemoveBranch and InsertBranch must be implemented to support +/// cases where this method returns success. +/// +bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (LastInst->getOpcode() == NVPTX::GOTO) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } else if (LastInst->getOpcode() == NVPTX::CBranch) { + // Block ends with fall-through condbranch. + TBB = LastInst->getOperand(1).getMBB(); + Cond.push_back(LastInst->getOperand(0)); + return false; + } + // Otherwise, don't know what this is. + return true; + } + + // Get the instruction before it if it's a terminator. + MachineInstr *SecondLastInst = I; + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && + isUnpredicatedTerminator(--I)) + return true; + + // If the block ends with NVPTX::GOTO and NVPTX:CBranch, handle it. + if (SecondLastInst->getOpcode() == NVPTX::CBranch && + LastInst->getOpcode() == NVPTX::GOTO) { + TBB = SecondLastInst->getOperand(1).getMBB(); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with two NVPTX:GOTOs, handle it. The second one is not + // executed, so remove it. + if (SecondLastInst->getOpcode() == NVPTX::GOTO && + LastInst->getOpcode() == NVPTX::GOTO) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // Otherwise, can't handle this. + return true; +} + +unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) return 0; + --I; + if (I->getOpcode() != NVPTX::GOTO && I->getOpcode() != NVPTX::CBranch) + return 0; + + // Remove the branch. + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) return 1; + --I; + if (I->getOpcode() != NVPTX::CBranch) + return 1; + + // Remove the branch. + I->eraseFromParent(); + return 2; +} + +unsigned +NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 1 || Cond.size() == 0) && + "NVPTX branch conditions have two components!"); + + // One-way branch. + if (FBB == 0) { + if (Cond.empty()) // Unconditional branch + BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB); + else // Conditional branch + BuildMI(&MBB, DL, get(NVPTX::CBranch)) + .addReg(Cond[0].getReg()).addMBB(TBB); + return 1; + } + + // Two-way Conditional Branch. + BuildMI(&MBB, DL, get(NVPTX::CBranch)) + .addReg(Cond[0].getReg()).addMBB(TBB); + BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB); + return 2; +} diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h new file mode 100644 index 0000000..7b8e218 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -0,0 +1,83 @@ +//===- NVPTXInstrInfo.h - NVPTX Instruction Information----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the niversity of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the NVPTX implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTXINSTRUCTIONINFO_H +#define NVPTXINSTRUCTIONINFO_H + +#include "NVPTX.h" +#include "NVPTXRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "NVPTXGenInstrInfo.inc" + +namespace llvm { + +class NVPTXInstrInfo : public NVPTXGenInstrInfo +{ + NVPTXTargetMachine &TM; + const NVPTXRegisterInfo RegInfo; +public: + explicit NVPTXInstrInfo(NVPTXTargetMachine &TM); + + virtual const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; } + + /* The following virtual functions are used in register allocation. + * They are not implemented because the existing interface and the logic + * at the caller side do not work for the elementized vector load and store. + * + * virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + * int &FrameIndex) const; + * virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + * int &FrameIndex) const; + * virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + * MachineBasicBlock::iterator MBBI, + * unsigned SrcReg, bool isKill, int FrameIndex, + * const TargetRegisterClass *RC) const; + * virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + * MachineBasicBlock::iterator MBBI, + * unsigned DestReg, int FrameIndex, + * const TargetRegisterClass *RC) const; + */ + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const ; + virtual bool isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, + unsigned &DestReg) const; + bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const; + bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const; + bool isReadSpecialReg(MachineInstr &MI) const; + + virtual bool CanTailMerge(const MachineInstr *MI) const ; + // Branch analysis. + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const; + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + virtual unsigned InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const { + return MI.getOperand(2).getImm(); + } + +}; + +} // namespace llvm + +#endif diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td new file mode 100644 index 0000000..1ed206b --- /dev/null +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -0,0 +1,2837 @@ +//===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the PTX instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +include "NVPTXInstrFormats.td" + +// A NOP instruction +def NOP : NVPTXInst<(outs), (ins), "", []>; + +// List of vector specific properties +def isVecLD : VecInstTypeEnum<1>; +def isVecST : VecInstTypeEnum<2>; +def isVecBuild : VecInstTypeEnum<3>; +def isVecShuffle : VecInstTypeEnum<4>; +def isVecExtract : VecInstTypeEnum<5>; +def isVecInsert : VecInstTypeEnum<6>; +def isVecDest : VecInstTypeEnum<7>; +def isVecOther : VecInstTypeEnum<15>; + +//===----------------------------------------------------------------------===// +// NVPTX Operand Definitions. +//===----------------------------------------------------------------------===// + +def brtarget : Operand<OtherVT>; + +//===----------------------------------------------------------------------===// +// NVPTX Instruction Predicate Definitions +//===----------------------------------------------------------------------===// + + +def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">; +def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">; +def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">; +def useAtomRedG32forGen32 : + Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">; +def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">; +def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">; +def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">; +def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">; +def useAtomRedG64forGen64 : + Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">; +def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">; +def hasVote : Predicate<"Subtarget.hasVote()">; +def hasDouble : Predicate<"Subtarget.hasDouble()">; +def reqPTX20 : Predicate<"Subtarget.reqPTX20()">; +def hasLDU : Predicate<"Subtarget.hasLDU()">; +def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">; + +def doF32FTZ : Predicate<"UseF32FTZ">; + +def doFMAF32 : Predicate<"doFMAF32">; +def doFMAF32_ftz : Predicate<"(doFMAF32 && UseF32FTZ)">; +def doFMAF32AGG : Predicate<"doFMAF32AGG">; +def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && UseF32FTZ)">; +def doFMAF64 : Predicate<"doFMAF64">; +def doFMAF64AGG : Predicate<"doFMAF64AGG">; +def doFMADF32 : Predicate<"doFMADF32">; +def doFMADF32_ftz : Predicate<"(doFMADF32 && UseF32FTZ)">; + +def doMulWide : Predicate<"doMulWide">; + +def allowFMA : Predicate<"allowFMA">; +def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">; + +def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">; +def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">; + +def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">; + +def true : Predicate<"1">; + +//===----------------------------------------------------------------------===// +// Special Handling for 8-bit Operands and Operations +// +// PTX supports 8-bit signed and unsigned types, but does not support 8-bit +// operations (like add, shift, etc) except for ld/st/cvt. SASS does not have +// 8-bit registers. +// +// PTX ld, st and cvt instructions permit source and destination data operands +// to be wider than the instruction-type size, so that narrow values may be +// loaded, stored, and converted using regular-width registers. +// +// So in PTX generation, we +// - always use 16-bit registers in place in 8-bit registers. +// (8-bit variables should stay as 8-bit as they represent memory layout.) +// - for the following 8-bit operations, we sign-ext/zero-ext the 8-bit values +// before operation +// . div +// . rem +// . neg (sign) +// . set, setp +// . shr +// +// We are patching the operations by inserting the cvt instructions in the +// asm strings of the affected instructions. +// +// Since vector operations, except for ld/st, are eventually elementized. We +// do not need to special-hand the vector 8-bit operations. +// +// +//===----------------------------------------------------------------------===// + +// Generate string block like +// { +// .reg .s16 %temp1; +// .reg .s16 %temp2; +// cvt.s16.s8 %temp1, %a; +// cvt.s16.s8 %temp2, %b; +// opc.s16 %dst, %temp1, %temp2; +// } +// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8 +class Handle_i8rr<string OpcStr, string TypeStr, string CVTStr> { + string s = !strconcat("{{\n\t", + !strconcat(".reg .", !strconcat(TypeStr, + !strconcat(" \t%temp1;\n\t", + !strconcat(".reg .", !strconcat(TypeStr, + !strconcat(" \t%temp2;\n\t", + !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t", + !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t", + !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))); +} + +// Generate string block like +// { +// .reg .s16 %temp1; +// .reg .s16 %temp2; +// cvt.s16.s8 %temp1, %a; +// mov.b16 %temp2, %b; +// cvt.s16.s8 %temp2, %temp2; +// opc.s16 %dst, %temp1, %temp2; +// } +// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8 +class Handle_i8ri<string OpcStr, string TypeStr, string CVTStr> { + string s = !strconcat("{{\n\t", + !strconcat(".reg .", !strconcat(TypeStr, + !strconcat(" \t%temp1;\n\t", + !strconcat(".reg .", + !strconcat(TypeStr, !strconcat(" \t%temp2;\n\t", + !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t", + !strconcat("mov.b16 \t%temp2, $b;\n\t", + !strconcat(CVTStr, !strconcat(" \t%temp2, %temp2;\n\t", + !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))))); +} + +// Generate string block like +// { +// .reg .s16 %temp1; +// .reg .s16 %temp2; +// mov.b16 %temp1, %b; +// cvt.s16.s8 %temp1, %temp1; +// cvt.s16.s8 %temp2, %a; +// opc.s16 %dst, %temp1, %temp2; +// } +// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8 +class Handle_i8ir<string OpcStr, string TypeStr, string CVTStr> { + string s = !strconcat("{{\n\t", + !strconcat(".reg .", !strconcat(TypeStr, + !strconcat(" \t%temp1;\n\t", + !strconcat(".reg .", !strconcat(TypeStr, + !strconcat(" \t%temp2;\n\t", + !strconcat("mov.b16 \t%temp1, $a;\n\t", + !strconcat(CVTStr, !strconcat(" \t%temp1, %temp1;\n\t", + !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t", + !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))))); +} + + +//===----------------------------------------------------------------------===// +// Some Common Instruction Class Templates +//===----------------------------------------------------------------------===// + +multiclass I3<string OpcStr, SDNode OpNode> { + def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, + Int64Regs:$b))]>; + def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; + def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, + Int32Regs:$b))]>; + def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; + def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, + Int16Regs:$b))]>; + def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; + def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; + def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>; +} + +multiclass I3_i8<string OpcStr, SDNode OpNode, string TypeStr, string CVTStr> { + def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, + Int64Regs:$b))]>; + def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; + def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, + Int32Regs:$b))]>; + def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; + def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, + Int16Regs:$b))]>; + def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; + def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), + Handle_i8rr<OpcStr, TypeStr, CVTStr>.s, + [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; + def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), + Handle_i8ri<OpcStr, TypeStr, CVTStr>.s, + [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>; +} + +multiclass I3_noi8<string OpcStr, SDNode OpNode> { + def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, + Int64Regs:$b))]>; + def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; + def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, + Int32Regs:$b))]>; + def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; + def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, + Int16Regs:$b))]>; + def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; +} + +multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> { + def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, + Int32Regs:$b), + !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, + Int32Regs:$b))]>; + def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; +} + +multiclass F3<string OpcStr, SDNode OpNode> { + def f64rr : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b), + !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), + [(set Float64Regs:$dst, + (OpNode Float64Regs:$a, Float64Regs:$b))]>, + Requires<[allowFMA]>; + def f64ri : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, f64imm:$b), + !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), + [(set Float64Regs:$dst, + (OpNode Float64Regs:$a, fpimm:$b))]>, + Requires<[allowFMA]>; + def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, + (OpNode Float32Regs:$a, Float32Regs:$b))]>, + Requires<[allowFMA_ftz]>; + def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, + (OpNode Float32Regs:$a, fpimm:$b))]>, + Requires<[allowFMA_ftz]>; + def f32rr : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, + (OpNode Float32Regs:$a, Float32Regs:$b))]>, + Requires<[allowFMA]>; + def f32ri : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, + (OpNode Float32Regs:$a, fpimm:$b))]>, + Requires<[allowFMA]>; +} + +multiclass F3_rn<string OpcStr, SDNode OpNode> { + def f64rr : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b), + !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), + [(set Float64Regs:$dst, + (OpNode Float64Regs:$a, Float64Regs:$b))]>; + def f64ri : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, f64imm:$b), + !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), + [(set Float64Regs:$dst, + (OpNode Float64Regs:$a, fpimm:$b))]>; + def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, + (OpNode Float32Regs:$a, Float32Regs:$b))]>, + Requires<[doF32FTZ]>; + def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, + (OpNode Float32Regs:$a, fpimm:$b))]>, + Requires<[doF32FTZ]>; + def f32rr : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, + (OpNode Float32Regs:$a, Float32Regs:$b))]>; + def f32ri : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, + (OpNode Float32Regs:$a, fpimm:$b))]>; +} + +multiclass F2<string OpcStr, SDNode OpNode> { + def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a), + !strconcat(OpcStr, ".f64 \t$dst, $a;"), + [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>; + def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), + !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>, + Requires<[doF32FTZ]>; + def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), + !strconcat(OpcStr, ".f32 \t$dst, $a;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>; +} + +//===----------------------------------------------------------------------===// +// NVPTX Instructions. +//===----------------------------------------------------------------------===// + +//----------------------------------- +// Integer Arithmetic +//----------------------------------- + +multiclass ADD_SUB_i1<SDNode OpNode> { + def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), + "xor.pred \t$dst, $a, $b;", + [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; + def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), + "xor.pred \t$dst, $a, $b;", + [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>; +} + +defm ADD_i1 : ADD_SUB_i1<add>; +defm SUB_i1 : ADD_SUB_i1<sub>; + + +defm ADD : I3<"add.s", add>; +defm SUB : I3<"sub.s", sub>; + +defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>; +defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>; + +defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>; +defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>; + +//mul.wide PTX instruction +def SInt32Const : PatLeaf<(imm), [{ + const APInt &v = N->getAPIntValue(); + if (v.isSignedIntN(32)) + return true; + return false; +}]>; + +def UInt32Const : PatLeaf<(imm), [{ + const APInt &v = N->getAPIntValue(); + if (v.isIntN(32)) + return true; + return false; +}]>; + +def SInt16Const : PatLeaf<(imm), [{ + const APInt &v = N->getAPIntValue(); + if (v.isSignedIntN(16)) + return true; + return false; +}]>; + +def UInt16Const : PatLeaf<(imm), [{ + const APInt &v = N->getAPIntValue(); + if (v.isIntN(16)) + return true; + return false; +}]>; + +def Int5Const : PatLeaf<(imm), [{ + const APInt &v = N->getAPIntValue(); + // Check if 0 <= v < 32 + // Only then the result from (x << v) will be i32 + if (v.sge(0) && v.slt(32)) + return true; + return false; +}]>; + +def Int4Const : PatLeaf<(imm), [{ + const APInt &v = N->getAPIntValue(); + // Check if 0 <= v < 16 + // Only then the result from (x << v) will be i16 + if (v.sge(0) && v.slt(16)) + return true; + return false; +}]>; + +def SHL2MUL32 : SDNodeXForm<imm, [{ + const APInt &v = N->getAPIntValue(); + APInt temp(32, 1); + return CurDAG->getTargetConstant(temp.shl(v), MVT::i32); +}]>; + +def SHL2MUL16 : SDNodeXForm<imm, [{ + const APInt &v = N->getAPIntValue(); + APInt temp(16, 1); + return CurDAG->getTargetConstant(temp.shl(v), MVT::i16); +}]>; + +def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b), + "mul.wide.s32 \t$dst, $a, $b;", []>; +def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst), + (ins Int32Regs:$a, i64imm:$b), + "mul.wide.s32 \t$dst, $a, $b;", []>; + +def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b), + "mul.wide.u32 \t$dst, $a, $b;", []>; +def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst), + (ins Int32Regs:$a, i64imm:$b), + "mul.wide.u32 \t$dst, $a, $b;", []>; + +def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), + "mul.wide.s16 \t$dst, $a, $b;", []>; +def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst), + (ins Int16Regs:$a, i32imm:$b), + "mul.wide.s16 \t$dst, $a, $b;", []>; + +def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), + "mul.wide.u16 \t$dst, $a, $b;", []>; +def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst), + (ins Int16Regs:$a, i32imm:$b), + "mul.wide.u16 \t$dst, $a, $b;", []>; + +def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)), + (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, + Requires<[doMulWide]>; +def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)), + (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, + Requires<[doMulWide]>; + +def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)), + (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, + Requires<[doMulWide]>; +def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)), + (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, + Requires<[doMulWide]>; + +def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)), + (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, + Requires<[doMulWide]>; +def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)), + (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>, + Requires<[doMulWide]>; + +def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)), + (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>; +def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)), + (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>, + Requires<[doMulWide]>; + +def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)), + (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; +def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)), + (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>, + Requires<[doMulWide]>; + +def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)), + (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; +def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)), + (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>, + Requires<[doMulWide]>; + +defm MULT : I3<"mul.lo.s", mul>; + +defm MULTHS : I3_noi8<"mul.hi.s", mulhs>; +defm MULTHU : I3_noi8<"mul.hi.u", mulhu>; +def MULTHSi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), + !strconcat("{{ \n\t", + !strconcat(".reg \t.s16 temp1; \n\t", + !strconcat(".reg \t.s16 temp2; \n\t", + !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t", + !strconcat("cvt.s16.s8 \ttemp2, $b; \n\t", + !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t", + !strconcat("shr.s16 \t$dst, $dst, 8; \n\t", + !strconcat("}}", "")))))))), + [(set Int8Regs:$dst, (mulhs Int8Regs:$a, Int8Regs:$b))]>; +def MULTHSi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), + !strconcat("{{ \n\t", + !strconcat(".reg \t.s16 temp1; \n\t", + !strconcat(".reg \t.s16 temp2; \n\t", + !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t", + !strconcat("mov.b16 \ttemp2, $b; \n\t", + !strconcat("cvt.s16.s8 \ttemp2, temp2; \n\t", + !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t", + !strconcat("shr.s16 \t$dst, $dst, 8; \n\t", + !strconcat("}}", ""))))))))), + [(set Int8Regs:$dst, (mulhs Int8Regs:$a, imm:$b))]>; +def MULTHUi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), + !strconcat("{{ \n\t", + !strconcat(".reg \t.u16 temp1; \n\t", + !strconcat(".reg \t.u16 temp2; \n\t", + !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t", + !strconcat("cvt.u16.u8 \ttemp2, $b; \n\t", + !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t", + !strconcat("shr.u16 \t$dst, $dst, 8; \n\t", + !strconcat("}}", "")))))))), + [(set Int8Regs:$dst, (mulhu Int8Regs:$a, Int8Regs:$b))]>; +def MULTHUi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), + !strconcat("{{ \n\t", + !strconcat(".reg \t.u16 temp1; \n\t", + !strconcat(".reg \t.u16 temp2; \n\t", + !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t", + !strconcat("mov.b16 \ttemp2, $b; \n\t", + !strconcat("cvt.u16.u8 \ttemp2, temp2; \n\t", + !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t", + !strconcat("shr.u16 \t$dst, $dst, 8; \n\t", + !strconcat("}}", ""))))))))), + [(set Int8Regs:$dst, (mulhu Int8Regs:$a, imm:$b))]>; + + +defm SDIV : I3_i8<"div.s", sdiv, "s16", "cvt.s16.s8">; +defm UDIV : I3_i8<"div.u", udiv, "u16", "cvt.u16.u8">; + +defm SREM : I3_i8<"rem.s", srem, "s16", "cvt.s16.s8">; +// The ri version will not be selected as DAGCombiner::visitSREM will lower it. +defm UREM : I3_i8<"rem.u", urem, "u16", "cvt.u16.u8">; +// The ri version will not be selected as DAGCombiner::visitUREM will lower it. + +def MAD8rrr : NVPTXInst<(outs Int8Regs:$dst), + (ins Int8Regs:$a, Int8Regs:$b, Int8Regs:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b), + Int8Regs:$c))]>; +def MAD8rri : NVPTXInst<(outs Int8Regs:$dst), + (ins Int8Regs:$a, Int8Regs:$b, i8imm:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b), + imm:$c))]>; +def MAD8rir : NVPTXInst<(outs Int8Regs:$dst), + (ins Int8Regs:$a, i8imm:$b, Int8Regs:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b), + Int8Regs:$c))]>; +def MAD8rii : NVPTXInst<(outs Int8Regs:$dst), + (ins Int8Regs:$a, i8imm:$b, i8imm:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b), + imm:$c))]>; + +def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int16Regs:$dst, (add + (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>; +def MAD16rri : NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int16Regs:$dst, (add + (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>; +def MAD16rir : NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int16Regs:$dst, (add + (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>; +def MAD16rii : NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, i16imm:$b, i16imm:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b), + imm:$c))]>; + +def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), + "mad.lo.s32 \t$dst, $a, $b, $c;", + [(set Int32Regs:$dst, (add + (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>; +def MAD32rri : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c), + "mad.lo.s32 \t$dst, $a, $b, $c;", + [(set Int32Regs:$dst, (add + (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>; +def MAD32rir : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c), + "mad.lo.s32 \t$dst, $a, $b, $c;", + [(set Int32Regs:$dst, (add + (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>; +def MAD32rii : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, i32imm:$b, i32imm:$c), + "mad.lo.s32 \t$dst, $a, $b, $c;", + [(set Int32Regs:$dst, (add + (mul Int32Regs:$a, imm:$b), imm:$c))]>; + +def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c), + "mad.lo.s64 \t$dst, $a, $b, $c;", + [(set Int64Regs:$dst, (add + (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>; +def MAD64rri : NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c), + "mad.lo.s64 \t$dst, $a, $b, $c;", + [(set Int64Regs:$dst, (add + (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>; +def MAD64rir : NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c), + "mad.lo.s64 \t$dst, $a, $b, $c;", + [(set Int64Regs:$dst, (add + (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>; +def MAD64rii : NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$a, i64imm:$b, i64imm:$c), + "mad.lo.s64 \t$dst, $a, $b, $c;", + [(set Int64Regs:$dst, (add + (mul Int64Regs:$a, imm:$b), imm:$c))]>; + + +def INEG8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src), + !strconcat("cvt.s16.s8 \t$dst, $src;\n\t", + "neg.s16 \t$dst, $dst;"), + [(set Int8Regs:$dst, (ineg Int8Regs:$src))]>; +def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), + "neg.s16 \t$dst, $src;", + [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>; +def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), + "neg.s32 \t$dst, $src;", + [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>; +def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + "neg.s64 \t$dst, $src;", + [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>; + +//----------------------------------- +// Floating Point Arithmetic +//----------------------------------- + +// Constant 1.0f +def FloatConst1 : PatLeaf<(fpimm), [{ + if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle) + return false; + float f = (float)N->getValueAPF().convertToFloat(); + return (f==1.0f); +}]>; +// Constand (double)1.0 +def DoubleConst1 : PatLeaf<(fpimm), [{ + if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble) + return false; + double d = (double)N->getValueAPF().convertToDouble(); + return (d==1.0); +}]>; + +defm FADD : F3<"add", fadd>; +defm FSUB : F3<"sub", fsub>; +defm FMUL : F3<"mul", fmul>; + +defm FADD_rn : F3_rn<"add", fadd>; +defm FSUB_rn : F3_rn<"sub", fsub>; +defm FMUL_rn : F3_rn<"mul", fmul>; + +defm FABS : F2<"abs", fabs>; +defm FNEG : F2<"neg", fneg>; +defm FSQRT : F2<"sqrt.rn", fsqrt>; + +// +// F64 division +// +def FDIV641r : NVPTXInst<(outs Float64Regs:$dst), + (ins f64imm:$a, Float64Regs:$b), + "rcp.rn.f64 \t$dst, $b;", + [(set Float64Regs:$dst, + (fdiv DoubleConst1:$a, Float64Regs:$b))]>; +def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b), + "div.rn.f64 \t$dst, $a, $b;", + [(set Float64Regs:$dst, + (fdiv Float64Regs:$a, Float64Regs:$b))]>; +def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, f64imm:$b), + "div.rn.f64 \t$dst, $a, $b;", + [(set Float64Regs:$dst, + (fdiv Float64Regs:$a, fpimm:$b))]>; + +// +// F32 Approximate reciprocal +// +def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.approx.ftz.f32 \t$dst, $b;", + [(set Float32Regs:$dst, + (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_APPROX, doF32FTZ]>; +def FDIV321r : NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.approx.f32 \t$dst, $b;", + [(set Float32Regs:$dst, + (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_APPROX]>; +// +// F32 Approximate division +// +def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.approx.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_APPROX, doF32FTZ]>; +def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.approx.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_APPROX]>; +// +// F32 Semi-accurate reciprocal +// +// rcp.approx gives the same result as div.full(1.0f, a) and is faster. +// +def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.approx.ftz.f32 \t$dst, $b;", + [(set Float32Regs:$dst, + (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_FULL, doF32FTZ]>; +def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.approx.f32 \t$dst, $b;", + [(set Float32Regs:$dst, + (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_FULL]>; +// +// F32 Semi-accurate division +// +def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.full.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_FULL, doF32FTZ]>; +def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.full.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (fdiv Float32Regs:$a, fpimm:$b))]>, + Requires<[do_DIVF32_FULL, doF32FTZ]>; +def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.full.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_FULL]>; +def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.full.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (fdiv Float32Regs:$a, fpimm:$b))]>, + Requires<[do_DIVF32_FULL]>; +// +// F32 Accurate reciprocal +// +def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.rn.ftz.f32 \t$dst, $b;", + [(set Float32Regs:$dst, + (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[reqPTX20, doF32FTZ]>; +def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.rn.f32 \t$dst, $b;", + [(set Float32Regs:$dst, + (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[reqPTX20]>; +// +// F32 Accurate division +// +def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.rn.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[doF32FTZ, reqPTX20]>; +def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.rn.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (fdiv Float32Regs:$a, fpimm:$b))]>, + Requires<[doF32FTZ, reqPTX20]>; +def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.rn.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[reqPTX20]>; +def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.rn.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (fdiv Float32Regs:$a, fpimm:$b))]>, + Requires<[reqPTX20]>; + + +multiclass FPCONTRACT32<string OpcStr, Predicate Pred> { + def rrr : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set Float32Regs:$dst, (fadd + (fmul Float32Regs:$a, Float32Regs:$b), + Float32Regs:$c))]>, Requires<[Pred]>; + // This is to WAR a wierd bug in Tablegen that does not automatically + // generate the following permutated rule rrr2 from the above rrr. + // So we explicitly add it here. This happens to FMA32 only. + // See the comments at FMAD32 and FMA32 for more information. + def rrr2 : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set Float32Regs:$dst, (fadd Float32Regs:$c, + (fmul Float32Regs:$a, Float32Regs:$b)))]>, + Requires<[Pred]>; + def rri : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set Float32Regs:$dst, (fadd + (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>, + Requires<[Pred]>; + def rir : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set Float32Regs:$dst, (fadd + (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>, + Requires<[Pred]>; + def rii : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b, f32imm:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set Float32Regs:$dst, (fadd + (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>, + Requires<[Pred]>; +} + +multiclass FPCONTRACT64<string OpcStr, Predicate Pred> { + def rrr : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set Float64Regs:$dst, (fadd + (fmul Float64Regs:$a, Float64Regs:$b), + Float64Regs:$c))]>, Requires<[Pred]>; + def rri : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a, + Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>; + def rir : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set Float64Regs:$dst, (fadd + (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>, + Requires<[Pred]>; + def rii : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, f64imm:$b, f64imm:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set Float64Regs:$dst, (fadd + (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>, + Requires<[Pred]>; +} + +// Due to a unknown reason (most likely a bug in tablegen), tablegen does not +// automatically generate the rrr2 rule from +// the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32. +// If we reverse the order of the following two lines, then rrr2 rule will be +// generated for FMA32, but not for rrr. +// Therefore, we manually write the rrr2 rule in FPCONTRACT32. +defm FMAD32_ftz : FPCONTRACT32<"mad.ftz.f32", doFMADF32_ftz>; +defm FMAD32 : FPCONTRACT32<"mad.f32", doFMADF32>; +defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>; +defm FMA32 : FPCONTRACT32<"fma.rn.f32", doFMAF32>; +defm FMA64 : FPCONTRACT64<"fma.rn.f64", doFMAF64>; + +// b*c-a => fmad(b, c, -a) +multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> { + def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a), + (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>, + Requires<[Pred]>; +} + +// a-b*c => fmad(-b,c, a) +// - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c +// b*c-a => fmad(b, c, -a) +// - legal because b*c-a <=> b*c+(-a) +multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> { + def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)), + (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>, + Requires<[Pred]>; + def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a), + (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>, + Requires<[Pred]>; +} + +// a-b*c => fmad(-b,c, a) +// b*c-a => fmad(b, c, -a) +multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> { + def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)), + (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>, + Requires<[Pred]>; + + def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a), + (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>, + Requires<[Pred]>; +} + +defm FMAF32ext_ftz : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>; +defm FMAF32ext : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>; +defm FMADF32ext_ftz : FPCONTRACT32_SUB_PAT_MAD<FMAD32_ftzrrr, doFMADF32_ftz>; +defm FMADF32ext : FPCONTRACT32_SUB_PAT_MAD<FMAD32rrr, doFMADF32>; +defm FMAF64ext : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>; + +def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), + "sin.approx.f32 \t$dst, $src;", + [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>; +def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), + "cos.approx.f32 \t$dst, $src;", + [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>; + +//----------------------------------- +// Logical Arithmetic +//----------------------------------- + +multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> { + def b1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), + !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; + def b1ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), + !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>; + def b8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), + !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), + [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; + def b8ri: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), + !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), + [(set Int8Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>; + def b16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), + !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, + Int16Regs:$b))]>; + def b16ri: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; + def b32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, + Int32Regs:$b))]>; + def b32ri: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; + def b64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), + !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, + Int64Regs:$b))]>; + def b64ri: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), + !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; +} + +defm OR : LOG_FORMAT<"or", or>; +defm AND : LOG_FORMAT<"and", and>; +defm XOR : LOG_FORMAT<"xor", xor>; + +def NOT1: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src), + "not.pred \t$dst, $src;", + [(set Int1Regs:$dst, (not Int1Regs:$src))]>; +def NOT8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src), + "not.b16 \t$dst, $src;", + [(set Int8Regs:$dst, (not Int8Regs:$src))]>; +def NOT16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), + "not.b16 \t$dst, $src;", + [(set Int16Regs:$dst, (not Int16Regs:$src))]>; +def NOT32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), + "not.b32 \t$dst, $src;", + [(set Int32Regs:$dst, (not Int32Regs:$src))]>; +def NOT64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + "not.b64 \t$dst, $src;", + [(set Int64Regs:$dst, (not Int64Regs:$src))]>; + +// For shifts, the second src operand must be 32-bit value +multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> { + def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, + Int32Regs:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, + Int32Regs:$b))]>; + def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, + (i32 imm:$b)))]>; + def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, + Int32Regs:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, + Int32Regs:$b))]>; + def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, + (i32 imm:$b)))]>; + def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode (i32 imm:$a), + (i32 imm:$b)))]>; + def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, + Int32Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, + Int32Regs:$b))]>; + def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, + (i32 imm:$b)))]>; + def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int8Regs:$dst, (OpNode Int8Regs:$a, + Int32Regs:$b))]>; + def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int8Regs:$dst, (OpNode Int8Regs:$a, + (i32 imm:$b)))]>; +} + +defm SHL : LSHIFT_FORMAT<"shl.b", shl>; + +// For shifts, the second src operand must be 32-bit value +// Need to add cvt for the 8-bits. +multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode, string CVTStr> { + def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, + Int32Regs:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, + Int32Regs:$b))]>; + def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, + (i32 imm:$b)))]>; + def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, + Int32Regs:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, + Int32Regs:$b))]>; + def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, + (i32 imm:$b)))]>; + def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode (i32 imm:$a), + (i32 imm:$b)))]>; + def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, + Int32Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, + Int32Regs:$b))]>; + def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, + (i32 imm:$b)))]>; + def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b), + !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t", + !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))), + [(set Int8Regs:$dst, (OpNode Int8Regs:$a, + Int32Regs:$b))]>; + def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b), + !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t", + !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))), + [(set Int8Regs:$dst, (OpNode Int8Regs:$a, + (i32 imm:$b)))]>; +} + +defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">; +defm SRL : RSHIFT_FORMAT<"shr.u", srl, "cvt.u16.u8">; + +// 32bit +def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2), + !strconcat("{{\n\t", + !strconcat(".reg .b32 %lhs;\n\t", + !strconcat(".reg .b32 %rhs;\n\t", + !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t", + !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t", + !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", + !strconcat("}}", ""))))))), + []>; + +def SUB_FRM_32 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32); +}]>; + +def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)), + (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>; +def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)), + (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>; + +def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, + Int32Regs:$amt), + !strconcat("{{\n\t", + !strconcat(".reg .b32 %lhs;\n\t", + !strconcat(".reg .b32 %rhs;\n\t", + !strconcat(".reg .b32 %amt2;\n\t", + !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t", + !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t", + !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t", + !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", + !strconcat("}}", ""))))))))), + [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>; + +def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, + Int32Regs:$amt), + !strconcat("{{\n\t", + !strconcat(".reg .b32 %lhs;\n\t", + !strconcat(".reg .b32 %rhs;\n\t", + !strconcat(".reg .b32 %amt2;\n\t", + !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t", + !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t", + !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t", + !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", + !strconcat("}}", ""))))))))), + [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>; + +// 64bit +def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, + i32imm:$amt1, i32imm:$amt2), + !strconcat("{{\n\t", + !strconcat(".reg .b64 %lhs;\n\t", + !strconcat(".reg .b64 %rhs;\n\t", + !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t", + !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t", + !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", + !strconcat("}}", ""))))))), + []>; + +def SUB_FRM_64 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32); +}]>; + +def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)), + (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>; +def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)), + (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>; + +def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, + Int32Regs:$amt), + !strconcat("{{\n\t", + !strconcat(".reg .b64 %lhs;\n\t", + !strconcat(".reg .b64 %rhs;\n\t", + !strconcat(".reg .u32 %amt2;\n\t", + !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t", + !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t", + !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t", + !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", + !strconcat("}}", ""))))))))), + [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>; + +def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, + Int32Regs:$amt), + !strconcat("{{\n\t", + !strconcat(".reg .b64 %lhs;\n\t", + !strconcat(".reg .b64 %rhs;\n\t", + !strconcat(".reg .u32 %amt2;\n\t", + !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t", + !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t", + !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t", + !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", + !strconcat("}}", ""))))))))), + [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>; + + +//----------------------------------- +// Data Movement (Load / Store, Move) +//----------------------------------- + +def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], + [SDNPWantRoot]>; +def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex], + [SDNPWantRoot]>; + +def MEMri : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops Int32Regs, i32imm); +} +def MEMri64 : Operand<i64> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops Int64Regs, i64imm); +} + +def imem : Operand<iPTR> { + let PrintMethod = "printOperand"; +} + +def imemAny : Operand<iPTRAny> { + let PrintMethod = "printOperand"; +} + +def LdStCode : Operand<i32> { + let PrintMethod = "printLdStCode"; +} + +def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; +def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>; + +def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a), + "mov.u32 \t$dst, $a;", + [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>; + +def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a), + "mov.u64 \t$dst, $a;", + [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>; + +// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp +let IsSimpleMove=1 in { +def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss), + "mov.pred \t$dst, $sss;", []>; +def IMOV8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$sss), + "mov.u16 \t$dst, $sss;", []>; +def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss), + "mov.u16 \t$dst, $sss;", []>; +def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss), + "mov.u32 \t$dst, $sss;", []>; +def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss), + "mov.u64 \t$dst, $sss;", []>; + +def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), + "mov.f32 \t$dst, $src;", []>; +def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src), + "mov.f64 \t$dst, $src;", []>; +} +def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src), + "mov.pred \t$dst, $src;", + [(set Int1Regs:$dst, imm:$src)]>; +def IMOV8ri: NVPTXInst<(outs Int8Regs:$dst), (ins i8imm:$src), + "mov.u16 \t$dst, $src;", + [(set Int8Regs:$dst, imm:$src)]>; +def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src), + "mov.u16 \t$dst, $src;", + [(set Int16Regs:$dst, imm:$src)]>; +def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src), + "mov.u32 \t$dst, $src;", + [(set Int32Regs:$dst, imm:$src)]>; +def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src), + "mov.u64 \t$dst, $src;", + [(set Int64Regs:$dst, imm:$src)]>; + +def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src), + "mov.f32 \t$dst, $src;", + [(set Float32Regs:$dst, fpimm:$src)]>; +def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src), + "mov.f64 \t$dst, $src;", + [(set Float64Regs:$dst, fpimm:$src)]>; + +def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>; + +//---- Copy Frame Index ---- +def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr), + "add.u32 \t$dst, ${addr:add};", + [(set Int32Regs:$dst, ADDRri:$addr)]>; +def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr), + "add.u64 \t$dst, ${addr:add};", + [(set Int64Regs:$dst, ADDRri64:$addr)]>; + +//----------------------------------- +// Comparison and Selection +//----------------------------------- + +// Generate string block like +// { +// .reg .pred p; +// setp.gt.s16 p, %a, %b; +// selp.s16 %dst, -1, 0, p; +// } +// when OpcStr=setp.gt.s sz1=16 sz2=16 d=%dst a=%a b=%b +class Set_Str<string OpcStr, string sz1, string sz2, string d, string a, + string b> { + string t1 = "{{\n\t.reg .pred p;\n\t"; + string t2 = !strconcat(t1 , OpcStr); + string t3 = !strconcat(t2 , sz1); + string t4 = !strconcat(t3 , " \tp, "); + string t5 = !strconcat(t4 , a); + string t6 = !strconcat(t5 , ", "); + string t7 = !strconcat(t6 , b); + string t8 = !strconcat(t7 , ";\n\tselp.s"); + string t9 = !strconcat(t8 , sz2); + string t10 = !strconcat(t9, " \t"); + string t11 = !strconcat(t10, d); + string s = !strconcat(t11, ", -1, 0, p;\n\t}}"); +} + +// Generate string block like +// { +// .reg .pred p; +// .reg .s16 %temp1; +// .reg .s16 %temp2; +// cvt.s16.s8 %temp1, %a; +// cvt s16.s8 %temp1, %b; +// setp.gt.s16 p, %temp1, %temp2; +// selp.s16 %dst, -1, 0, p; +// } +// when OpcStr=setp.gt.s d=%dst a=%a b=%b type=s16 cvt=cvt.s16.s8 +class Set_Stri8<string OpcStr, string d, string a, string b, string type, + string cvt> { + string t1 = "{{\n\t.reg .pred p;\n\t"; + string t2 = !strconcat(t1, ".reg ."); + string t3 = !strconcat(t2, type); + string t4 = !strconcat(t3, " %temp1;\n\t"); + string t5 = !strconcat(t4, ".reg ."); + string t6 = !strconcat(t5, type); + string t7 = !strconcat(t6, " %temp2;\n\t"); + string t8 = !strconcat(t7, cvt); + string t9 = !strconcat(t8, " \t%temp1, "); + string t10 = !strconcat(t9, a); + string t11 = !strconcat(t10, ";\n\t"); + string t12 = !strconcat(t11, cvt); + string t13 = !strconcat(t12, " \t%temp2, "); + string t14 = !strconcat(t13, b); + string t15 = !strconcat(t14, ";\n\t"); + string t16 = !strconcat(t15, OpcStr); + string t17 = !strconcat(t16, "16"); + string t18 = !strconcat(t17, " \tp, %temp1, %temp2;\n\t"); + string t19 = !strconcat(t18, "selp.s16 \t"); + string t20 = !strconcat(t19, d); + string s = !strconcat(t20, ", -1, 0, p;\n\t}}"); +} + +multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode, + string TypeStr, string CVTStr> { + def i8rr_toi8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), + Set_Stri8<OpcStr, "$dst", "$a", "$b", TypeStr, CVTStr>.s, + []>; + def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, + Int16Regs:$b), + Set_Str<OpcStr, "16", "16", "$dst", "$a", "$b">.s, + []>; + def i32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, + Int32Regs:$b), + Set_Str<OpcStr, "32", "32", "$dst", "$a", "$b">.s, + []>; + def i64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, + Int64Regs:$b), + Set_Str<OpcStr, "64", "64", "$dst", "$a", "$b">.s, + []>; + + def i8rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), + Handle_i8rr<OpcStr, TypeStr, CVTStr>.s, + [(set Int1Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; + def i8ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, i8imm:$b), + Handle_i8ri<OpcStr, TypeStr, CVTStr>.s, + [(set Int1Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>; + def i8ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i8imm:$a, Int8Regs:$b), + Handle_i8ir<OpcStr, TypeStr, CVTStr>.s, + [(set Int1Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>; + def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; + def i16ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; + def i16ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i16imm:$a, Int16Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>; + def i32rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; + def i32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; + def i32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i32imm:$a, Int32Regs:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>; + def i64rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; + def i64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, i64imm:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; + def i64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i64imm:$a, Int64Regs:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>; + + def i8rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), + Handle_i8rr<OpcStr_u32, TypeStr, CVTStr>.s, + [(set Int32Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; + def i8ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, i8imm:$b), + Handle_i8ri<OpcStr_u32, TypeStr, CVTStr>.s, + [(set Int32Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>; + def i8ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i8imm:$a, Int8Regs:$b), + Handle_i8ir<OpcStr_u32, TypeStr, CVTStr>.s, + [(set Int32Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>; + def i16rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, + Int16Regs:$b), + !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; + def i16ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; + def i16ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i16imm:$a, Int16Regs:$b), + !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>; + def i32rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, + Int32Regs:$b), + !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; + def i32ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; + def i32ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, Int32Regs:$b), + !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>; + def i64rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, + Int64Regs:$b), + !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; + def i64ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, i64imm:$b), + !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; + def i64ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i64imm:$a, Int64Regs:$b), + !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>; +} + +multiclass FSET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode> { + def f32rr_toi32_ftz: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a, + Float32Regs:$b), + Set_Str<OpcStr, "ftz.f32", "32", "$dst", "$a", "$b">.s, + []>, Requires<[doF32FTZ]>; + def f32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a, + Float32Regs:$b), + Set_Str<OpcStr, "f32", "32", "$dst", "$a", "$b">.s, + []>; + def f64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Float64Regs:$a, + Float64Regs:$b), + Set_Str<OpcStr, "f64", "64", "$dst", "$a", "$b">.s, + []>; + def f64rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float64Regs:$a, + Float64Regs:$b), + Set_Str<OpcStr, "f64", "32", "$dst", "$a", "$b">.s, + []>; + + def f32rr_p_ftz: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a + , Float32Regs:$b), + !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]> + , Requires<[doF32FTZ]>; + def f32rr_p: NVPTXInst<(outs Int1Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr, "f32 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; + def f32ri_p_ftz: NVPTXInst<(outs Int1Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, + Requires<[doF32FTZ]>; + def f32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, "f32 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; + def f32ir_p_ftz: NVPTXInst<(outs Int1Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>, + Requires<[doF32FTZ]>; + def f32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f32imm:$a, Float32Regs:$b), + !strconcat(OpcStr, "f32 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>; + def f64rr_p: NVPTXInst<(outs Int1Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b), + !strconcat(OpcStr, "f64 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>; + def f64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float64Regs:$a, f64imm:$b), + !strconcat(OpcStr, "f64 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>; + def f64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f64imm:$a, Float64Regs:$b), + !strconcat(OpcStr, "f64 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>; + + def f32rr_u32_ftz: NVPTXInst<(outs Int32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; + def f32rr_u32: NVPTXInst<(outs Int32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; + def f32ri_u32_ftz: NVPTXInst<(outs Int32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; + def f32ri_u32: NVPTXInst<(outs Int32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; + def f32ir_u32_ftz: NVPTXInst<(outs Int32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>; + def f32ir_u32: NVPTXInst<(outs Int32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>; + def f64rr_u32: NVPTXInst<(outs Int32Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b), + !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>; + def f64ri_u32: NVPTXInst<(outs Int32Regs:$dst), + (ins Float64Regs:$a, f64imm:$b), + !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>; + def f64ir_u32: NVPTXInst<(outs Int32Regs:$dst), + (ins f64imm:$a, Float64Regs:$b), + !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>; +} + +defm ISetSGT +: ISET_FORMAT<"setp.gt.s", "set.gt.u32.s", setgt, "s16", "cvt.s16.s8">; +defm ISetUGT +: ISET_FORMAT<"setp.gt.u", "set.gt.u32.u", setugt, "u16", "cvt.u16.u8">; +defm ISetSLT +: ISET_FORMAT<"setp.lt.s", "set.lt.u32.s", setlt, "s16", "cvt.s16.s8">; +defm ISetULT +: ISET_FORMAT<"setp.lt.u", "set.lt.u32.u", setult, "u16", "cvt.u16.u8">; +defm ISetSGE +: ISET_FORMAT<"setp.ge.s", "set.ge.u32.s", setge, "s16", "cvt.s16.s8">; +defm ISetUGE +: ISET_FORMAT<"setp.ge.u", "set.ge.u32.u", setuge, "u16", "cvt.u16.u8">; +defm ISetSLE +: ISET_FORMAT<"setp.le.s", "set.le.u32.s", setle, "s16", "cvt.s16.s8">; +defm ISetULE +: ISET_FORMAT<"setp.le.u", "set.le.u32.u", setule, "u16", "cvt.u16.u8">; +defm ISetSEQ +: ISET_FORMAT<"setp.eq.s", "set.eq.u32.s", seteq, "s16", "cvt.s16.s8">; +defm ISetUEQ +: ISET_FORMAT<"setp.eq.u", "set.eq.u32.u", setueq, "u16", "cvt.u16.u8">; +defm ISetSNE +: ISET_FORMAT<"setp.ne.s", "set.ne.u32.s", setne, "s16", "cvt.s16.s8">; +defm ISetUNE +: ISET_FORMAT<"setp.ne.u", "set.ne.u32.u", setune, "u16", "cvt.u16.u8">; + +def ISetSNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst), + (ins Int1Regs:$a, Int1Regs:$b), + "xor.pred \t$dst, $a, $b;", + [(set Int1Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>; +def ISetUNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst), + (ins Int1Regs:$a, Int1Regs:$b), + "xor.pred \t$dst, $a, $b;", + [(set Int1Regs:$dst, (setune Int1Regs:$a, Int1Regs:$b))]>; +def ISetSEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst), + (ins Int1Regs:$a, Int1Regs:$b), + !strconcat("{{\n\t", + !strconcat(".reg .pred temp;\n\t", + !strconcat("xor.pred \ttemp, $a, $b;\n\t", + !strconcat("not.pred \t$dst, temp;\n\t}}","")))), + [(set Int1Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>; +def ISetUEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst), + (ins Int1Regs:$a, Int1Regs:$b), + !strconcat("{{\n\t", + !strconcat(".reg .pred temp;\n\t", + !strconcat("xor.pred \ttemp, $a, $b;\n\t", + !strconcat("not.pred \t$dst, temp;\n\t}}","")))), + [(set Int1Regs:$dst, (setueq Int1Regs:$a, Int1Regs:$b))]>; + +// Compare 2 i1's and produce a u32 +def ISETSNEi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst), + (ins Int1Regs:$a, Int1Regs:$b), + !strconcat("{{\n\t", + !strconcat(".reg .pred temp;\n\t", + !strconcat("xor.pred \ttemp, $a, $b;\n\t", + !strconcat("selp.u32 \t$dst, -1, 0, temp;", "\n\t}}")))), + [(set Int32Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>; +def ISETSEQi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst), + (ins Int1Regs:$a, Int1Regs:$b), + !strconcat("{{\n\t", + !strconcat(".reg .pred temp;\n\t", + !strconcat("xor.pred \ttemp, $a, $b;\n\t", + !strconcat("selp.u32 \t$dst, 0, -1, temp;", "\n\t}}")))), + [(set Int32Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>; + +defm FSetGT : FSET_FORMAT<"setp.gt.", "set.gt.u32.", setogt>; +defm FSetLT : FSET_FORMAT<"setp.lt.", "set.lt.u32.", setolt>; +defm FSetGE : FSET_FORMAT<"setp.ge.", "set.ge.u32.", setoge>; +defm FSetLE : FSET_FORMAT<"setp.le.", "set.le.u32.", setole>; +defm FSetEQ : FSET_FORMAT<"setp.eq.", "set.eq.u32.", setoeq>; +defm FSetNE : FSET_FORMAT<"setp.ne.", "set.ne.u32.", setone>; + +defm FSetUGT : FSET_FORMAT<"setp.gtu.", "set.gtu.u32.", setugt>; +defm FSetULT : FSET_FORMAT<"setp.ltu.", "set.ltu.u32.",setult>; +defm FSetUGE : FSET_FORMAT<"setp.geu.", "set.geu.u32.",setuge>; +defm FSetULE : FSET_FORMAT<"setp.leu.", "set.leu.u32.",setule>; +defm FSetUEQ : FSET_FORMAT<"setp.equ.", "set.equ.u32.",setueq>; +defm FSetUNE : FSET_FORMAT<"setp.neu.", "set.neu.u32.",setune>; + +defm FSetNUM : FSET_FORMAT<"setp.num.", "set.num.u32.",seto>; +defm FSetNAN : FSET_FORMAT<"setp.nan.", "set.nan.u32.",setuo>; + +def SELECTi1rr : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)), + (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a), + (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>; +def SELECTi8rr : NVPTXInst<(outs Int8Regs:$dst), + (ins Int8Regs:$a, Int8Regs:$b, Int1Regs:$p), + "selp.b16 \t$dst, $a, $b, $p;", + [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, Int8Regs:$b))]>; +def SELECTi8ri : NVPTXInst<(outs Int8Regs:$dst), + (ins Int8Regs:$a, i8imm:$b, Int1Regs:$p), + "selp.b16 \t$dst, $a, $b, $p;", + [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, imm:$b))]>; +def SELECTi8ir : NVPTXInst<(outs Int8Regs:$dst), + (ins i8imm:$a, Int8Regs:$b, Int1Regs:$p), + "selp.b16 \t$dst, $a, $b, $p;", + [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, Int8Regs:$b))]>; +def SELECTi8ii : NVPTXInst<(outs Int8Regs:$dst), + (ins i8imm:$a, i8imm:$b, Int1Regs:$p), + "selp.b16 \t$dst, $a, $b, $p;", + [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; + +def SELECTi16rr : NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b, Int1Regs:$p), + "selp.b16 \t$dst, $a, $b, $p;", + [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, Int16Regs:$b))]>; +def SELECTi16ri : NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, i16imm:$b, Int1Regs:$p), + "selp.b16 \t$dst, $a, $b, $p;", + [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, imm:$b))]>; +def SELECTi16ir : NVPTXInst<(outs Int16Regs:$dst), + (ins i16imm:$a, Int16Regs:$b, Int1Regs:$p), + "selp.b16 \t$dst, $a, $b, $p;", + [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, Int16Regs:$b))]>; +def SELECTi16ii : NVPTXInst<(outs Int16Regs:$dst), + (ins i16imm:$a, i16imm:$b, Int1Regs:$p), + "selp.b16 \t$dst, $a, $b, $p;", + [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; + +def SELECTi32rr : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p), + "selp.b32 \t$dst, $a, $b, $p;", + [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, Int32Regs:$b))]>; +def SELECTi32ri : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, i32imm:$b, Int1Regs:$p), + "selp.b32 \t$dst, $a, $b, $p;", + [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, imm:$b))]>; +def SELECTi32ir : NVPTXInst<(outs Int32Regs:$dst), + (ins i32imm:$a, Int32Regs:$b, Int1Regs:$p), + "selp.b32 \t$dst, $a, $b, $p;", + [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, Int32Regs:$b))]>; +def SELECTi32ii : NVPTXInst<(outs Int32Regs:$dst), + (ins i32imm:$a, i32imm:$b, Int1Regs:$p), + "selp.b32 \t$dst, $a, $b, $p;", + [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; + +def SELECTi64rr : NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$a, Int64Regs:$b, Int1Regs:$p), + "selp.b64 \t$dst, $a, $b, $p;", + [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, Int64Regs:$b))]>; +def SELECTi64ri : NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$a, i64imm:$b, Int1Regs:$p), + "selp.b64 \t$dst, $a, $b, $p;", + [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, imm:$b))]>; +def SELECTi64ir : NVPTXInst<(outs Int64Regs:$dst), + (ins i64imm:$a, Int64Regs:$b, Int1Regs:$p), + "selp.b64 \t$dst, $a, $b, $p;", + [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, Int64Regs:$b))]>; +def SELECTi64ii : NVPTXInst<(outs Int64Regs:$dst), + (ins i64imm:$a, i64imm:$b, Int1Regs:$p), + "selp.b64 \t$dst, $a, $b, $p;", + [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; + +def SELECTf32rr : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b, Int1Regs:$p), + "selp.f32 \t$dst, $a, $b, $p;", + [(set Float32Regs:$dst, + (select Int1Regs:$p, Float32Regs:$a, Float32Regs:$b))]>; +def SELECTf32ri : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b, Int1Regs:$p), + "selp.f32 \t$dst, $a, $b, $p;", + [(set Float32Regs:$dst, (select Int1Regs:$p, Float32Regs:$a, fpimm:$b))]>; +def SELECTf32ir : NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b, Int1Regs:$p), + "selp.f32 \t$dst, $a, $b, $p;", + [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float32Regs:$b))]>; +def SELECTf32ii : NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, f32imm:$b, Int1Regs:$p), + "selp.f32 \t$dst, $a, $b, $p;", + [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>; + +def SELECTf64rr : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b, Int1Regs:$p), + "selp.f64 \t$dst, $a, $b, $p;", + [(set Float64Regs:$dst, + (select Int1Regs:$p, Float64Regs:$a, Float64Regs:$b))]>; +def SELECTf64ri : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, f64imm:$b, Int1Regs:$p), + "selp.f64 \t$dst, $a, $b, $p;", + [(set Float64Regs:$dst, (select Int1Regs:$p, Float64Regs:$a, fpimm:$b))]>; +def SELECTf64ir : NVPTXInst<(outs Float64Regs:$dst), + (ins f64imm:$a, Float64Regs:$b, Int1Regs:$p), + "selp.f64 \t$dst, $a, $b, $p;", + [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float64Regs:$b))]>; +def SELECTf64ii : NVPTXInst<(outs Float64Regs:$dst), + (ins f64imm:$a, f64imm:$b, Int1Regs:$p), + "selp.f64 \t $dst, $a, $b, $p;", + [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>; + +//def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad, +// [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, + SDTCisInt<2>]>; +def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, + SDTCisInt<1>, SDTCisInt<2>]>; +def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; +def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; +def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; +def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; +def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>; +def SDTCallVoidProfile : SDTypeProfile<0, 1, []>; +def SDTCallValProfile : SDTypeProfile<1, 0, []>; +def SDTMoveParamProfile : SDTypeProfile<1, 1, []>; +def SDTMoveRetvalProfile : SDTypeProfile<0, 1, []>; +def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; +def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>; + +def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam", + SDTDeclareScalarParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam", + SDTDeclareParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def DeclareRet : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def LoadParam : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile, + [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; +def PrintCall : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def StoreParam : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def MoveToParam : SDNode<"NVPTXISD::MoveToParam", SDTStoreParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallArg : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def LastCallArg : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallArgEnd : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallVoid : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def Prototype : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallVal : SDNode<"NVPTXISD::CallVal", SDTCallValProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, + []>; +def MoveRetval : SDNode<"NVPTXISD::MoveRetval", SDTMoveRetvalProfile, + [SDNPHasChain, SDNPSideEffect]>; +def StoreRetval : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile, + [SDNPHasChain, SDNPSideEffect]>; +def MoveToRetval : SDNode<"NVPTXISD::MoveToRetval", SDTStoreRetvalProfile, + [SDNPHasChain, SDNPSideEffect]>; +def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam", + SDTPseudoUseParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def RETURNNode : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile, + [SDNPHasChain, SDNPSideEffect]>; + +class LoadParamMemInst<NVPTXRegClass regclass, string opstr> : + NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), + !strconcat(!strconcat("ld.param", opstr), + "\t$dst, [retval0+$b];"), + [(set regclass:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>; + +class LoadParamRegInst<NVPTXRegClass regclass, string opstr> : + NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), + !strconcat(!strconcat("mov", opstr), + "\t$dst, retval$b;"), + [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>; + +class StoreParamInst<NVPTXRegClass regclass, string opstr> : + NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), + !strconcat(!strconcat("st.param", opstr), + "\t[param$a+$b], $val;"), + [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>; + +class MoveToParamInst<NVPTXRegClass regclass, string opstr> : + NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), + !strconcat(!strconcat("mov", opstr), + "\tparam$a, $val;"), + [(MoveToParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>; + +class StoreRetvalInst<NVPTXRegClass regclass, string opstr> : + NVPTXInst<(outs), (ins regclass:$val, i32imm:$a), + !strconcat(!strconcat("st.param", opstr), + "\t[func_retval0+$a], $val;"), + [(StoreRetval (i32 imm:$a), regclass:$val)]>; + +class MoveToRetvalInst<NVPTXRegClass regclass, string opstr> : + NVPTXInst<(outs), (ins i32imm:$num, regclass:$val), + !strconcat(!strconcat("mov", opstr), + "\tfunc_retval$num, $val;"), + [(MoveToRetval (i32 imm:$num), regclass:$val)]>; + +class MoveRetvalInst<NVPTXRegClass regclass, string opstr> : + NVPTXInst<(outs), (ins regclass:$val), + !strconcat(!strconcat("mov", opstr), + "\tfunc_retval0, $val;"), + [(MoveRetval regclass:$val)]>; + +def PrintCallRetInst1 : NVPTXInst<(outs), (ins), +"call (retval0), ", + [(PrintCall (i32 1))]>; +def PrintCallRetInst2 : NVPTXInst<(outs), (ins), +"call (retval0, retval1), ", + [(PrintCall (i32 2))]>; +def PrintCallRetInst3 : NVPTXInst<(outs), (ins), +"call (retval0, retval1, retval2), ", + [(PrintCall (i32 3))]>; +def PrintCallRetInst4 : NVPTXInst<(outs), (ins), +"call (retval0, retval1, retval2, retval3), ", + [(PrintCall (i32 4))]>; +def PrintCallRetInst5 : NVPTXInst<(outs), (ins), +"call (retval0, retval1, retval2, retval3, retval4), ", + [(PrintCall (i32 5))]>; +def PrintCallRetInst6 : NVPTXInst<(outs), (ins), +"call (retval0, retval1, retval2, retval3, retval4, retval5), ", + [(PrintCall (i32 6))]>; +def PrintCallRetInst7 : NVPTXInst<(outs), (ins), +"call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ", + [(PrintCall (i32 7))]>; +def PrintCallRetInst8 : NVPTXInst<(outs), (ins), +!strconcat("call (retval0, retval1, retval2, retval3, retval4", + ", retval5, retval6, retval7), "), + [(PrintCall (i32 8))]>; + +def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ", + [(PrintCall (i32 0))]>; + +def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins), +"call.uni (retval0), ", + [(PrintCallUni (i32 1))]>; +def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins), +"call.uni (retval0, retval1), ", + [(PrintCallUni (i32 2))]>; +def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins), +"call.uni (retval0, retval1, retval2), ", + [(PrintCallUni (i32 3))]>; +def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins), +"call.uni (retval0, retval1, retval2, retval3), ", + [(PrintCallUni (i32 4))]>; +def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins), +"call.uni (retval0, retval1, retval2, retval3, retval4), ", + [(PrintCallUni (i32 5))]>; +def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins), +"call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ", + [(PrintCallUni (i32 6))]>; +def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins), +"call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ", + [(PrintCallUni (i32 7))]>; +def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins), +!strconcat("call.uni (retval0, retval1, retval2, retval3, retval4", + ", retval5, retval6, retval7), "), + [(PrintCallUni (i32 8))]>; + +def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ", + [(PrintCallUni (i32 0))]>; + +def LoadParamMemI64 : LoadParamMemInst<Int64Regs, ".b64">; +def LoadParamMemI32 : LoadParamMemInst<Int32Regs, ".b32">; +def LoadParamMemI16 : LoadParamMemInst<Int16Regs, ".b16">; +def LoadParamMemI8 : LoadParamMemInst<Int8Regs, ".b8">; + +//def LoadParamMemI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b), +// !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t", +// "cvt.u16.u32\t$dst, temp_param_reg;"), +// [(set Int16Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>; +//def LoadParamMemI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b), +// !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t", +// "cvt.u16.u32\t$dst, temp_param_reg;"), +// [(set Int8Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>; + +def LoadParamMemF32 : LoadParamMemInst<Float32Regs, ".f32">; +def LoadParamMemF64 : LoadParamMemInst<Float64Regs, ".f64">; + +def LoadParamRegI64 : LoadParamRegInst<Int64Regs, ".b64">; +def LoadParamRegI32 : LoadParamRegInst<Int32Regs, ".b32">; +def LoadParamRegI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b), + "cvt.u16.u32\t$dst, retval$b;", + [(set Int16Regs:$dst, + (LoadParam (i32 0), (i32 imm:$b)))]>; +def LoadParamRegI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b), + "cvt.u16.u32\t$dst, retval$b;", + [(set Int8Regs:$dst, + (LoadParam (i32 0), (i32 imm:$b)))]>; + +def LoadParamRegF32 : LoadParamRegInst<Float32Regs, ".f32">; +def LoadParamRegF64 : LoadParamRegInst<Float64Regs, ".f64">; + +def StoreParamI64 : StoreParamInst<Int64Regs, ".b64">; +def StoreParamI32 : StoreParamInst<Int32Regs, ".b32">; + +def StoreParamI16 : NVPTXInst<(outs), + (ins Int16Regs:$val, i32imm:$a, i32imm:$b), + "st.param.b16\t[param$a+$b], $val;", + [(StoreParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; + +def StoreParamI8 : NVPTXInst<(outs), + (ins Int8Regs:$val, i32imm:$a, i32imm:$b), + "st.param.b8\t[param$a+$b], $val;", + [(StoreParam + (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; + +def StoreParamS32I16 : NVPTXInst<(outs), + (ins Int16Regs:$val, i32imm:$a, i32imm:$b), + !strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t", + "st.param.b32\t[param$a+$b], temp_param_reg;"), + [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; +def StoreParamU32I16 : NVPTXInst<(outs), + (ins Int16Regs:$val, i32imm:$a, i32imm:$b), + !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t", + "st.param.b32\t[param$a+$b], temp_param_reg;"), + [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; + +def StoreParamU32I8 : NVPTXInst<(outs), + (ins Int8Regs:$val, i32imm:$a, i32imm:$b), + !strconcat("cvt.u32.u8\ttemp_param_reg, $val;\n\t", + "st.param.b32\t[param$a+$b], temp_param_reg;"), + [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; +def StoreParamS32I8 : NVPTXInst<(outs), + (ins Int8Regs:$val, i32imm:$a, i32imm:$b), + !strconcat("cvt.s32.s8\ttemp_param_reg, $val;\n\t", + "st.param.b32\t[param$a+$b], temp_param_reg;"), + [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; + +def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">; +def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">; + +def MoveToParamI64 : MoveToParamInst<Int64Regs, ".b64">; +def MoveToParamI32 : MoveToParamInst<Int32Regs, ".b32">; +def MoveToParamF64 : MoveToParamInst<Float64Regs, ".f64">; +def MoveToParamF32 : MoveToParamInst<Float32Regs, ".f32">; +def MoveToParamI16 : NVPTXInst<(outs), + (ins Int16Regs:$val, i32imm:$a, i32imm:$b), + !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t", + "mov.b32\tparam$a, temp_param_reg;"), + [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; +def MoveToParamI8 : NVPTXInst<(outs), + (ins Int8Regs:$val, i32imm:$a, i32imm:$b), + !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t", + "mov.b32\tparam$a, temp_param_reg;"), + [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; + +def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">; +def StoreRetvalI32 : StoreRetvalInst<Int32Regs, ".b32">; +def StoreRetvalI16 : StoreRetvalInst<Int16Regs, ".b16">; +def StoreRetvalI8 : StoreRetvalInst<Int8Regs, ".b8">; + +//def StoreRetvalI16 : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a), +// !strconcat("\{\n\t", +// !strconcat(".reg .b32 temp_retval_reg;\n\t", +// !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t", +// "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))), +// [(StoreRetval (i32 imm:$a), Int16Regs:$val)]>; +//def StoreRetvalI8 : NVPTXInst<(outs), (ins Int8Regs:$val, i32imm:$a), +// !strconcat("\{\n\t", +// !strconcat(".reg .b32 temp_retval_reg;\n\t", +// !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t", +// "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))), +// [(StoreRetval (i32 imm:$a), Int8Regs:$val)]>; + +def StoreRetvalF64 : StoreRetvalInst<Float64Regs, ".f64">; +def StoreRetvalF32 : StoreRetvalInst<Float32Regs, ".f32">; + +def MoveRetvalI64 : MoveRetvalInst<Int64Regs, ".b64">; +def MoveRetvalI32 : MoveRetvalInst<Int32Regs, ".b32">; +def MoveRetvalI16 : MoveRetvalInst<Int16Regs, ".b16">; +def MoveRetvalI8 : MoveRetvalInst<Int8Regs, ".b8">; +def MoveRetvalF64 : MoveRetvalInst<Float64Regs, ".f64">; +def MoveRetvalF32 : MoveRetvalInst<Float32Regs, ".f32">; + +def MoveToRetvalI64 : MoveToRetvalInst<Int64Regs, ".b64">; +def MoveToRetvalI32 : MoveToRetvalInst<Int32Regs, ".b32">; +def MoveToRetvalF64 : MoveToRetvalInst<Float64Regs, ".f64">; +def MoveToRetvalF32 : MoveToRetvalInst<Float32Regs, ".f32">; +def MoveToRetvalI16 : NVPTXInst<(outs), (ins i32imm:$num, Int16Regs:$val), + "cvt.u32.u16\tfunc_retval$num, $val;", + [(MoveToRetval (i32 imm:$num), Int16Regs:$val)]>; +def MoveToRetvalI8 : NVPTXInst<(outs), (ins i32imm:$num, Int8Regs:$val), + "cvt.u32.u16\tfunc_retval$num, $val;", + [(MoveToRetval (i32 imm:$num), Int8Regs:$val)]>; + +def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>; +def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>; +def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>; +def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>; + +class CallArgInst<NVPTXRegClass regclass> : + NVPTXInst<(outs), (ins regclass:$a), "$a, ", + [(CallArg (i32 0), regclass:$a)]>; + +class LastCallArgInst<NVPTXRegClass regclass> : + NVPTXInst<(outs), (ins regclass:$a), "$a", + [(LastCallArg (i32 0), regclass:$a)]>; + +def CallArgI64 : CallArgInst<Int64Regs>; +def CallArgI32 : CallArgInst<Int32Regs>; +def CallArgI16 : CallArgInst<Int16Regs>; +def CallArgI8 : CallArgInst<Int8Regs>; + +def CallArgF64 : CallArgInst<Float64Regs>; +def CallArgF32 : CallArgInst<Float32Regs>; + +def LastCallArgI64 : LastCallArgInst<Int64Regs>; +def LastCallArgI32 : LastCallArgInst<Int32Regs>; +def LastCallArgI16 : LastCallArgInst<Int16Regs>; +def LastCallArgI8 : LastCallArgInst<Int8Regs>; + +def LastCallArgF64 : LastCallArgInst<Float64Regs>; +def LastCallArgF32 : LastCallArgInst<Float32Regs>; + +def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ", + [(CallArg (i32 0), (i32 imm:$a))]>; +def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a", + [(LastCallArg (i32 0), (i32 imm:$a))]>; + +def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ", + [(CallArg (i32 1), (i32 imm:$a))]>; +def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a", + [(LastCallArg (i32 1), (i32 imm:$a))]>; + +def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), + "$addr, ", + [(CallVoid (Wrapper tglobaladdr:$addr))]>; +def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), + "$addr, ", + [(CallVoid Int32Regs:$addr)]>; +def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), + "$addr, ", + [(CallVoid Int64Regs:$addr)]>; +def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), + ", prototype_$val;", + [(Prototype (i32 imm:$val))]>; + +def DeclareRetMemInst : NVPTXInst<(outs), + (ins i32imm:$align, i32imm:$size, i32imm:$num), + ".param .align $align .b8 retval$num[$size];", + [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>; +def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), + ".param .b$size retval$num;", + [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>; +def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), + ".reg .b$size retval$num;", + [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>; + +def DeclareParamInst : NVPTXInst<(outs), + (ins i32imm:$align, i32imm:$a, i32imm:$size), + ".param .align $align .b8 param$a[$size];", + [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>; +def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), + ".param .b$size param$a;", + [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>; +def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), + ".reg .b$size param$a;", + [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>; + +class MoveParamInst<NVPTXRegClass regclass, string asmstr> : + NVPTXInst<(outs regclass:$dst), (ins regclass:$src), + !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"), + [(set regclass:$dst, (MoveParam regclass:$src))]>; + +def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">; +def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">; +def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), + "cvt.u16.u32\t$dst, $src;", + [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>; +def MoveParamI8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src), + "cvt.u16.u32\t$dst, $src;", + [(set Int8Regs:$dst, (MoveParam Int8Regs:$src))]>; +def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">; +def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">; + +class PseudoUseParamInst<NVPTXRegClass regclass> : + NVPTXInst<(outs), (ins regclass:$src), + "// Pseudo use of $src", + [(PseudoUseParam regclass:$src)]>; + +def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>; +def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>; +def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>; +def PseudoUseParamI8 : PseudoUseParamInst<Int8Regs>; +def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>; +def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>; + + +// +// Load / Store Handling +// +multiclass LD<NVPTXRegClass regclass> { + def _avar : NVPTXInst<(outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr), +!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t$dst, [$addr];"), []>; + def _areg : NVPTXInst<(outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr), +!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t$dst, [$addr];"), []>; + def _ari : NVPTXInst<(outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), +!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t$dst, [$addr+$offset];"), []>; + def _asi : NVPTXInst<(outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr, i32imm:$offset), +!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t$dst, [$addr+$offset];"), []>; +} + +let mayLoad=1, neverHasSideEffects=1 in { +defm LD_i8 : LD<Int8Regs>; +defm LD_i16 : LD<Int16Regs>; +defm LD_i32 : LD<Int32Regs>; +defm LD_i64 : LD<Int64Regs>; +defm LD_f32 : LD<Float32Regs>; +defm LD_f64 : LD<Float64Regs>; +} + +let VecInstType=isVecLD.Value, mayLoad=1, neverHasSideEffects=1 in { +defm LD_v2i8 : LD<V2I8Regs>; +defm LD_v4i8 : LD<V4I8Regs>; +defm LD_v2i16 : LD<V2I16Regs>; +defm LD_v4i16 : LD<V4I16Regs>; +defm LD_v2i32 : LD<V2I32Regs>; +defm LD_v4i32 : LD<V4I32Regs>; +defm LD_v2f32 : LD<V2F32Regs>; +defm LD_v4f32 : LD<V4F32Regs>; +defm LD_v2i64 : LD<V2I64Regs>; +defm LD_v2f64 : LD<V2F64Regs>; +} + +multiclass ST<NVPTXRegClass regclass> { + def _avar : NVPTXInst<(outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$toWidth, imem:$addr), +!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", + " \t[$addr], $src;"), []>; + def _areg : NVPTXInst<(outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr), +!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", + " \t[$addr], $src;"), []>; + def _ari : NVPTXInst<(outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset), +!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", + " \t[$addr+$offset], $src;"), []>; + def _asi : NVPTXInst<(outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset), +!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", + " \t[$addr+$offset], $src;"), []>; +} + +let mayStore=1, neverHasSideEffects=1 in { +defm ST_i8 : ST<Int8Regs>; +defm ST_i16 : ST<Int16Regs>; +defm ST_i32 : ST<Int32Regs>; +defm ST_i64 : ST<Int64Regs>; +defm ST_f32 : ST<Float32Regs>; +defm ST_f64 : ST<Float64Regs>; +} + +let VecInstType=isVecST.Value, mayStore=1, neverHasSideEffects=1 in { +defm ST_v2i8 : ST<V2I8Regs>; +defm ST_v4i8 : ST<V4I8Regs>; +defm ST_v2i16 : ST<V2I16Regs>; +defm ST_v4i16 : ST<V4I16Regs>; +defm ST_v2i32 : ST<V2I32Regs>; +defm ST_v4i32 : ST<V4I32Regs>; +defm ST_v2f32 : ST<V2F32Regs>; +defm ST_v4f32 : ST<V4F32Regs>; +defm ST_v2i64 : ST<V2I64Regs>; +defm ST_v2f64 : ST<V2F64Regs>; +} + +// The following is used only in and after vector elementizations. +// Vector elementization happens at the machine instruction level, so the +// following instruction +// never appears in the DAG. +multiclass LD_VEC<NVPTXRegClass regclass> { + def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; + def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; + def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; + def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr, i32imm:$offset), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; + def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, + regclass:$dst3, regclass:$dst4), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; + def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; + def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), + []>; + def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr, i32imm:$offset), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), + []>; +} +let mayLoad=1, neverHasSideEffects=1 in { +defm LDV_i8 : LD_VEC<Int8Regs>; +defm LDV_i16 : LD_VEC<Int16Regs>; +defm LDV_i32 : LD_VEC<Int32Regs>; +defm LDV_i64 : LD_VEC<Int64Regs>; +defm LDV_f32 : LD_VEC<Float32Regs>; +defm LDV_f64 : LD_VEC<Float64Regs>; +} + +multiclass ST_VEC<NVPTXRegClass regclass> { + def _v2_avar : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; + def _v2_areg : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; + def _v2_ari : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, + i32imm:$offset), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; + def _v2_asi : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, + i32imm:$offset), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; + def _v4_avar : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; + def _v4_areg : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; + def _v4_ari : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), + []>; + def _v4_asi : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr, i32imm:$offset), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), + []>; +} +let mayStore=1, neverHasSideEffects=1 in { +defm STV_i8 : ST_VEC<Int8Regs>; +defm STV_i16 : ST_VEC<Int16Regs>; +defm STV_i32 : ST_VEC<Int32Regs>; +defm STV_i64 : ST_VEC<Int64Regs>; +defm STV_f32 : ST_VEC<Float32Regs>; +defm STV_f64 : ST_VEC<Float64Regs>; +} + + +//---- Conversion ---- + +multiclass CVT_INT_TO_FP <string OpStr, SDNode OpNode> { +// FIXME: need to add f16 support +// def CVTf16i8 : +// NVPTXInst<(outs Float16Regs:$d), (ins Int8Regs:$a), +// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "8 \t$d, $a;"), +// [(set Float16Regs:$d, (OpNode Int8Regs:$a))]>; +// def CVTf16i16 : +// NVPTXInst<(outs Float16Regs:$d), (ins Int16Regs:$a), +// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "16 \t$d, $a;"), +// [(set Float16Regs:$d, (OpNode Int16Regs:$a))]>; +// def CVTf16i32 : +// NVPTXInst<(outs Float16Regs:$d), (ins Int32Regs:$a), +// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "32 \t$d, $a;"), +// [(set Float16Regs:$d, (OpNode Int32Regs:$a))]>; +// def CVTf16i64: +// NVPTXInst<(outs Float16Regs:$d), (ins Int64Regs:$a), +// !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"), +// [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>; + + def CVTf32i1 : + NVPTXInst<(outs Float32Regs:$d), (ins Int1Regs:$a), + "selp.f32 \t$d, 1.0, 0.0, $a;", + [(set Float32Regs:$d, (OpNode Int1Regs:$a))]>; + def CVTf32i8 : + NVPTXInst<(outs Float32Regs:$d), (ins Int8Regs:$a), + !strconcat(!strconcat("cvt.rn.f32.", OpStr), "8 \t$d, $a;"), + [(set Float32Regs:$d, (OpNode Int8Regs:$a))]>; + def CVTf32i16 : + NVPTXInst<(outs Float32Regs:$d), (ins Int16Regs:$a), + !strconcat(!strconcat("cvt.rn.f32.", OpStr), "16 \t$d, $a;"), + [(set Float32Regs:$d, (OpNode Int16Regs:$a))]>; + def CVTf32i32 : + NVPTXInst<(outs Float32Regs:$d), (ins Int32Regs:$a), + !strconcat(!strconcat("cvt.rn.f32.", OpStr), "32 \t$d, $a;"), + [(set Float32Regs:$d, (OpNode Int32Regs:$a))]>; + def CVTf32i64: + NVPTXInst<(outs Float32Regs:$d), (ins Int64Regs:$a), + !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"), + [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>; + + def CVTf64i1 : + NVPTXInst<(outs Float64Regs:$d), (ins Int1Regs:$a), + "selp.f64 \t$d, 1.0, 0.0, $a;", + [(set Float64Regs:$d, (OpNode Int1Regs:$a))]>; + def CVTf64i8 : + NVPTXInst<(outs Float64Regs:$d), (ins Int8Regs:$a), + !strconcat(!strconcat("cvt.rn.f64.", OpStr), "8 \t$d, $a;"), + [(set Float64Regs:$d, (OpNode Int8Regs:$a))]>; + def CVTf64i16 : + NVPTXInst<(outs Float64Regs:$d), (ins Int16Regs:$a), + !strconcat(!strconcat("cvt.rn.f64.", OpStr), "16 \t$d, $a;"), + [(set Float64Regs:$d, (OpNode Int16Regs:$a))]>; + def CVTf64i32 : + NVPTXInst<(outs Float64Regs:$d), (ins Int32Regs:$a), + !strconcat(!strconcat("cvt.rn.f64.", OpStr), "32 \t$d, $a;"), + [(set Float64Regs:$d, (OpNode Int32Regs:$a))]>; + def CVTf64i64: + NVPTXInst<(outs Float64Regs:$d), (ins Int64Regs:$a), + !strconcat(!strconcat("cvt.rn.f64.", OpStr), "64 \t$d, $a;"), + [(set Float64Regs:$d, (OpNode Int64Regs:$a))]>; +} + +defm Sint_to_fp : CVT_INT_TO_FP <"s", sint_to_fp>; +defm Uint_to_fp : CVT_INT_TO_FP <"u", uint_to_fp>; + +multiclass CVT_FP_TO_INT <string OpStr, SDNode OpNode> { +// FIXME: need to add f16 support +// def CVTi8f16: +// NVPTXInst<(outs Int8Regs:$d), (ins Float16Regs:$a), +// !strconcat(!strconcat("cvt.rzi.", OpStr), "8.f16 $d, $a;"), +// [(set Int8Regs:$d, (OpNode Float16Regs:$a))]>; + def CVTi8f32_ftz: + NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a), + !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"), + [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; + def CVTi8f32: + NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a), + !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"), + [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>; + def CVTi8f64: + NVPTXInst<(outs Int8Regs:$d), (ins Float64Regs:$a), + !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"), + [(set Int8Regs:$d, (OpNode Float64Regs:$a))]>; + +// FIXME: need to add f16 support +// def CVTi16f16: +// NVPTXInst<(outs Int16Regs:$d), (ins Float16Regs:$a), +// !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f16 \t$d, $a;"), +// [(set Int16Regs:$d, (OpNode Float16Regs:$a))]>; + def CVTi16f32_ftz: + NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a), + !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"), + [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; + def CVTi16f32: + NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a), + !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"), + [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>; + def CVTi16f64: + NVPTXInst<(outs Int16Regs:$d), (ins Float64Regs:$a), + !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"), + [(set Int16Regs:$d, (OpNode Float64Regs:$a))]>; + +// FIXME: need to add f16 support +// def CVTi32f16: def CVTi32f16: +// NVPTXInst<(outs Int32Regs:$d), (ins Float16Regs:$a), +// !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f16 \t$d, $a;"), +// [(set Int32Regs:$d, (OpNode Float16Regs:$a))]>; + def CVTi32f32_ftz: + NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a), + !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "32.f32 \t$d, $a;"), + [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; + def CVTi32f32: + NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a), + !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f32 \t$d, $a;"), + [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>; + def CVTi32f64: + NVPTXInst<(outs Int32Regs:$d), (ins Float64Regs:$a), + !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f64 \t$d, $a;"), + [(set Int32Regs:$d, (OpNode Float64Regs:$a))]>; + +// FIXME: need to add f16 support +// def CVTi64f16: +// NVPTXInst<(outs Int64Regs:$d), (ins Float16Regs:$a), +// !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f16 \t$d, $a;"), +// [(set Int64Regs:$d, (OpNode Float16Regs:$a))]>; + def CVTi64f32_ftz: + NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a), + !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "64.f32 \t$d, $a;"), + [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; + def CVTi64f32: + NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a), + !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f32 \t$d, $a;"), + [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>; + def CVTi64f64: + NVPTXInst<(outs Int64Regs:$d), (ins Float64Regs:$a), + !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f64 \t$d, $a;"), + [(set Int64Regs:$d, (OpNode Float64Regs:$a))]>; +} + +defm Fp_to_sint : CVT_FP_TO_INT <"s", fp_to_sint>; +defm Fp_to_uint : CVT_FP_TO_INT <"u", fp_to_uint>; + +multiclass INT_EXTEND_UNSIGNED_1 <SDNode OpNode> { + def ext1to8: + NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a), + "selp.u16 \t$d, 1, 0, $a;", + [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>; + def ext1to16: + NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a), + "selp.u16 \t$d, 1, 0, $a;", + [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>; + def ext1to32: + NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a), + "selp.u32 \t$d, 1, 0, $a;", + [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>; + def ext1to64: + NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a), + "selp.u64 \t$d, 1, 0, $a;", + [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>; +} + +multiclass INT_EXTEND_SIGNED_1 <SDNode OpNode> { + def ext1to8: + NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a), + "selp.s16 \t$d, -1, 0, $a;", + [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>; + def ext1to16: + NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a), + "selp.s16 \t$d, -1, 0, $a;", + [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>; + def ext1to32: + NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a), + "selp.s32 \t$d, -1, 0, $a;", + [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>; + def ext1to64: + NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a), + "selp.s64 \t$d, -1, 0, $a;", + [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>; +} + +multiclass INT_EXTEND <string OpStr, SDNode OpNode> { + // All Int8Regs are emiited as 16bit registers in ptx. + // And there is no selp.u8 in ptx. + def ext8to16: + NVPTXInst<(outs Int16Regs:$d), (ins Int8Regs:$a), + !strconcat("cvt.", !strconcat(OpStr, !strconcat("16.", + !strconcat(OpStr, "8 \t$d, $a;")))), + [(set Int16Regs:$d, (OpNode Int8Regs:$a))]>; + def ext8to32: + NVPTXInst<(outs Int32Regs:$d), (ins Int8Regs:$a), + !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.", + !strconcat(OpStr, "8 \t$d, $a;")))), + [(set Int32Regs:$d, (OpNode Int8Regs:$a))]>; + def ext8to64: + NVPTXInst<(outs Int64Regs:$d), (ins Int8Regs:$a), + !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.", + !strconcat(OpStr, "8 \t$d, $a;")))), + [(set Int64Regs:$d, (OpNode Int8Regs:$a))]>; + def ext16to32: + NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$a), + !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.", + !strconcat(OpStr, "16 \t$d, $a;")))), + [(set Int32Regs:$d, (OpNode Int16Regs:$a))]>; + def ext16to64: + NVPTXInst<(outs Int64Regs:$d), (ins Int16Regs:$a), + !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.", + !strconcat(OpStr, "16 \t$d, $a;")))), + [(set Int64Regs:$d, (OpNode Int16Regs:$a))]>; + def ext32to64: + NVPTXInst<(outs Int64Regs:$d), (ins Int32Regs:$a), + !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.", + !strconcat(OpStr, "32 \t$d, $a;")))), + [(set Int64Regs:$d, (OpNode Int32Regs:$a))]>; +} + +defm Sint_extend_1 : INT_EXTEND_SIGNED_1<sext>; +defm Zint_extend_1 : INT_EXTEND_UNSIGNED_1<zext>; +defm Aint_extend_1 : INT_EXTEND_UNSIGNED_1<anyext>; + +defm Sint_extend : INT_EXTEND <"s", sext>; +defm Zint_extend : INT_EXTEND <"u", zext>; +defm Aint_extend : INT_EXTEND <"u", anyext>; + +class TRUNC_to1_asm<string sz> { + string s = !strconcat("{{\n\t", + !strconcat(".reg ", + !strconcat(sz, + !strconcat(" temp;\n\t", + !strconcat("and", + !strconcat(sz, + !strconcat("\t temp, $a, 1;\n\t", + !strconcat("setp", + !strconcat(sz, ".eq \t $d, temp, 1;\n\t}}"))))))))); +} + +def TRUNC_64to32 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "cvt.u32.u64 \t$d, $a;", + [(set Int32Regs:$d, (trunc Int64Regs:$a))]>; +def TRUNC_64to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int64Regs:$a), + "cvt.u16.u64 \t$d, $a;", + [(set Int16Regs:$d, (trunc Int64Regs:$a))]>; +def TRUNC_64to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int64Regs:$a), + "cvt.u8.u64 \t$d, $a;", + [(set Int8Regs:$d, (trunc Int64Regs:$a))]>; +def TRUNC_32to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int32Regs:$a), + "cvt.u16.u32 \t$d, $a;", + [(set Int16Regs:$d, (trunc Int32Regs:$a))]>; +def TRUNC_32to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int32Regs:$a), + "cvt.u8.u32 \t$d, $a;", + [(set Int8Regs:$d, (trunc Int32Regs:$a))]>; +def TRUNC_16to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int16Regs:$a), + "cvt.u8.u16 \t$d, $a;", + [(set Int8Regs:$d, (trunc Int16Regs:$a))]>; +def TRUNC_64to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), + TRUNC_to1_asm<".b64">.s, + [(set Int1Regs:$d, (trunc Int64Regs:$a))]>; +def TRUNC_32to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), + TRUNC_to1_asm<".b32">.s, + [(set Int1Regs:$d, (trunc Int32Regs:$a))]>; +def TRUNC_16to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int16Regs:$a), + TRUNC_to1_asm<".b16">.s, + [(set Int1Regs:$d, (trunc Int16Regs:$a))]>; +def TRUNC_8to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int8Regs:$a), + TRUNC_to1_asm<".b16">.s, + [(set Int1Regs:$d, (trunc Int8Regs:$a))]>; + +// Select instructions +def : Pat<(select Int32Regs:$pred, Int8Regs:$a, Int8Regs:$b), + (SELECTi8rr Int8Regs:$a, Int8Regs:$b, (TRUNC_32to1 Int32Regs:$pred))>; +def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b), + (SELECTi16rr Int16Regs:$a, Int16Regs:$b, + (TRUNC_32to1 Int32Regs:$pred))>; +def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b), + (SELECTi32rr Int32Regs:$a, Int32Regs:$b, + (TRUNC_32to1 Int32Regs:$pred))>; +def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b), + (SELECTi64rr Int64Regs:$a, Int64Regs:$b, + (TRUNC_32to1 Int32Regs:$pred))>; +def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b), + (SELECTf32rr Float32Regs:$a, Float32Regs:$b, + (TRUNC_32to1 Int32Regs:$pred))>; +def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b), + (SELECTf64rr Float64Regs:$a, Float64Regs:$b, + (TRUNC_32to1 Int32Regs:$pred))>; + +class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn, + NVPTXRegClass regclassOut> : + NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a), + !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")), + [(set regclassOut:$d, (bitconvert regclassIn:$a))]>; + +def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>; +def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>; +def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>; +def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>; + +// pack a set of smaller int registers to a larger int register +def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d), + (ins Int8Regs:$s1, Int8Regs:$s2, + Int8Regs:$s3, Int8Regs:$s4), + !strconcat("{{\n\t.reg .b8\t%t<4>;", + !strconcat("\n\tcvt.u8.u8\t%t0, $s1;", + !strconcat("\n\tcvt.u8.u8\t%t1, $s2;", + !strconcat("\n\tcvt.u8.u8\t%t2, $s3;", + !strconcat("\n\tcvt.u8.u8\t%t3, $s4;", + "\n\tmov.b32\t$d, {%t0, %t1, %t2, %t3};\n\t}}"))))), + []>; +def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d), + (ins Int16Regs:$s1, Int16Regs:$s2, + Int16Regs:$s3, Int16Regs:$s4), + "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};", + []>; +def V2I8toI16 : NVPTXInst<(outs Int16Regs:$d), + (ins Int8Regs:$s1, Int8Regs:$s2), + !strconcat("{{\n\t.reg .b8\t%t<2>;", + !strconcat("\n\tcvt.u8.u8\t%t0, $s1;", + !strconcat("\n\tcvt.u8.u8\t%t1, $s2;", + "\n\tmov.b16\t$d, {%t0, %t1};\n\t}}"))), + []>; +def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d), + (ins Int16Regs:$s1, Int16Regs:$s2), + "mov.b32\t$d, {{$s1, $s2}};", + []>; +def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d), + (ins Int32Regs:$s1, Int32Regs:$s2), + "mov.b64\t$d, {{$s1, $s2}};", + []>; +def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d), + (ins Float32Regs:$s1, Float32Regs:$s2), + "mov.b64\t$d, {{$s1, $s2}};", + []>; + +// unpack a larger int register to a set of smaller int registers +def I32toV4I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2, + Int8Regs:$d3, Int8Regs:$d4), + (ins Int32Regs:$s), + !strconcat("{{\n\t.reg .b8\t%t<4>;", + !strconcat("\n\tmov.b32\t{%t0, %t1, %t2, %t3}, $s;", + !strconcat("\n\tcvt.u8.u8\t$d1, %t0;", + !strconcat("\n\tcvt.u8.u8\t$d2, %t1;", + !strconcat("\n\tcvt.u8.u8\t$d3, %t2;", + "\n\tcvt.u8.u8\t$d4, %t3;\n\t}}"))))), + []>; +def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2, + Int16Regs:$d3, Int16Regs:$d4), + (ins Int64Regs:$s), + "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;", + []>; +def I16toV2I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2), + (ins Int16Regs:$s), + !strconcat("{{\n\t.reg .b8\t%t<2>;", + !strconcat("\n\tmov.b16\t{%t0, %t1}, $s;", + !strconcat("\n\tcvt.u8.u8\t$d1, %t0;", + "\n\tcvt.u8.u8\t$d2, %t1;\n\t}}"))), + []>; +def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2), + (ins Int32Regs:$s), + "mov.b32\t{{$d1, $d2}}, $s;", + []>; +def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2), + (ins Int64Regs:$s), + "mov.b64\t{{$d1, $d2}}, $s;", + []>; +def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2), + (ins Float64Regs:$s), + "mov.b64\t{{$d1, $d2}}, $s;", + []>; + +def FPRound_ftz : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a), + "cvt.rn.ftz.f32.f64 \t$d, $a;", + [(set Float32Regs:$d, (fround Float64Regs:$a))]>, Requires<[doF32FTZ]>; + +def FPRound : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a), + "cvt.rn.f32.f64 \t$d, $a;", + [(set Float32Regs:$d, (fround Float64Regs:$a))]>; + +def FPExtend_ftz : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a), + "cvt.ftz.f64.f32 \t$d, $a;", + [(set Float64Regs:$d, (fextend Float32Regs:$a))]>, Requires<[doF32FTZ]>; + +def FPExtend : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a), + "cvt.f64.f32 \t$d, $a;", + [(set Float64Regs:$d, (fextend Float32Regs:$a))]>; + +def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue]>; + +//----------------------------------- +// Control-flow +//----------------------------------- + +let isTerminator=1 in { + let isReturn=1, isBarrier=1 in + def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>; + + let isBranch=1 in + def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), + "@$a bra \t$target;", + [(brcond Int1Regs:$a, bb:$target)]>; + let isBranch=1 in + def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), + "@!$a bra \t$target;", + []>; + + let isBranch=1, isBarrier=1 in + def GOTO : NVPTXInst<(outs), (ins brtarget:$target), + "bra.uni \t$target;", + [(br bb:$target)]>; +} + +def : Pat<(brcond Int32Regs:$a, bb:$target), (CBranch + (ISetUNEi32ri_p Int32Regs:$a, 0), bb:$target)>; + +// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a +// conditional branch if +// the target block is the next block so that the code can fall through to the +// target block. +// The invertion is done by 'xor condition, 1', which will be translated to +// (setne condition, -1). +// Since ptx supports '@!pred bra target', we should use it. +def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target), + (CBranchOther Int1Regs:$a, bb:$target)>; + +// Call +def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; +def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; + +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart, + [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPSideEffect]>; + +def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def calltarget : Operand<i32>; +let isCall=1 in { + def CALL : NVPTXInst<(outs), (ins calltarget:$dst), + "call \t$dst, (1);", []>; +} + +def : Pat<(call tglobaladdr:$dst), + (CALL tglobaladdr:$dst)>; +def : Pat<(call texternalsym:$dst), + (CALL texternalsym:$dst)>; + +// Pseudo instructions. +class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern> + : NVPTXInst<outs, ins, asmstr, pattern>; + +// @TODO: We use some tricks here to emit curly braces. Can we clean this up +// a bit without TableGen modifications? +def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt), + "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}", + [(callseq_start timm:$amt)]>; +def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), + "\n\t//{{\n\t}}// Callseq End $amt1", + [(callseq_end timm:$amt1, timm:$amt2)]>; + +// trap instruction + +def trapinst : NVPTXInst<(outs), (ins), + "trap;", + [(trap)]>; + +include "NVPTXVector.td" + +include "NVPTXIntrinsics.td" + + +//----------------------------------- +// Notes +//----------------------------------- +// BSWAP is currently expanded. The following is a more efficient +// - for < sm_20, use vector scalar mov, as tesla support native 16-bit register +// - for sm_20, use pmpt (use vector scalar mov to get the pack and +// unpack). sm_20 supports native 32-bit register, but not native 16-bit +// register. diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td new file mode 100644 index 0000000..028a94b --- /dev/null +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -0,0 +1,1675 @@ +//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def immFloat0 : PatLeaf<(fpimm), [{ + float f = (float)N->getValueAPF().convertToFloat(); + return (f==0.0f); +}]>; + +def immFloat1 : PatLeaf<(fpimm), [{ + float f = (float)N->getValueAPF().convertToFloat(); + return (f==1.0f); +}]>; + +def immDouble0 : PatLeaf<(fpimm), [{ + double d = (double)N->getValueAPF().convertToDouble(); + return (d==0.0); +}]>; + +def immDouble1 : PatLeaf<(fpimm), [{ + double d = (double)N->getValueAPF().convertToDouble(); + return (d==1.0); +}]>; + + + +//----------------------------------- +// Synchronization Functions +//----------------------------------- +def INT_CUDA_SYNCTHREADS : NVPTXInst<(outs), (ins), + "bar.sync \t0;", + [(int_cuda_syncthreads)]>; +def INT_BARRIER0 : NVPTXInst<(outs), (ins), + "bar.sync \t0;", + [(int_nvvm_barrier0)]>; +def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), + !strconcat("{{ \n\t", + !strconcat(".reg .pred \t%p1; \n\t", + !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t", + !strconcat("bar.red.popc.u32 \t$dst, 0, %p1; \n\t", + !strconcat("}}", ""))))), + [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>; +def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), + !strconcat("{{ \n\t", + !strconcat(".reg .pred \t%p1; \n\t", + !strconcat(".reg .pred \t%p2; \n\t", + !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t", + !strconcat("bar.red.and.pred \t%p2, 0, %p1; \n\t", + !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t", + !strconcat("}}", ""))))))), + [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>; +def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), + !strconcat("{{ \n\t", + !strconcat(".reg .pred \t%p1; \n\t", + !strconcat(".reg .pred \t%p2; \n\t", + !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t", + !strconcat("bar.red.or.pred \t%p2, 0, %p1; \n\t", + !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t", + !strconcat("}}", ""))))))), + [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>; + + +//----------------------------------- +// Explicit Memory Fence Functions +//----------------------------------- +class MEMBAR<string StrOp, Intrinsic IntOP> : + NVPTXInst<(outs), (ins), + StrOp, [(IntOP)]>; + +def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>; +def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>; +def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>; + + +//----------------------------------- +// Math Functions +//----------------------------------- + +// Map min(1.0, max(0.0, x)) to sat(x) +multiclass SAT<NVPTXRegClass regclass, Operand fimm, Intrinsic IntMinOp, + Intrinsic IntMaxOp, PatLeaf f0, PatLeaf f1, string OpStr> { + + // fmin(1.0, fmax(0.0, x)) => sat(x) + def SAT11 : NVPTXInst<(outs regclass:$dst), + (ins fimm:$srcf0, fimm:$srcf1, regclass:$src), + OpStr, + [(set regclass:$dst, (IntMinOp f1:$srcf0 , + (IntMaxOp f0:$srcf1, regclass:$src)))]>; + + // fmin(1.0, fmax(x, 0.0)) => sat(x) + def SAT12 : NVPTXInst<(outs regclass:$dst), + (ins fimm:$srcf0, fimm:$srcf1, regclass:$src), + OpStr, + [(set regclass:$dst, (IntMinOp f1:$srcf0 , + (IntMaxOp regclass:$src, f0:$srcf1)))]>; + + // fmin(fmax(0.0, x), 1.0) => sat(x) + def SAT13 : NVPTXInst<(outs regclass:$dst), + (ins fimm:$srcf0, fimm:$srcf1, regclass:$src), + OpStr, + [(set regclass:$dst, (IntMinOp + (IntMaxOp f0:$srcf0, regclass:$src), f1:$srcf1))]>; + + // fmin(fmax(x, 0.0), 1.0) => sat(x) + def SAT14 : NVPTXInst<(outs regclass:$dst), + (ins fimm:$srcf0, fimm:$srcf1, regclass:$src), + OpStr, + [(set regclass:$dst, (IntMinOp + (IntMaxOp regclass:$src, f0:$srcf0), f1:$srcf1))]>; + +} +// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x +// is NaN +// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0. +// Same story for fmax, fmin. + +defm SAT_fmin_fmax_f : SAT<Float32Regs, f32imm, int_nvvm_fmin_f, + int_nvvm_fmax_f, immFloat0, immFloat1, + "cvt.sat.f32.f32 \t$dst, $src; \n">; +defm SAT_fmin_fmax_d : SAT<Float64Regs, f64imm, int_nvvm_fmin_d, + int_nvvm_fmax_d, immDouble0, immDouble1, + "cvt.sat.f64.f64 \t$dst, $src; \n">; + + +// We need a full string for OpcStr here because we need to deal with case like +// INT_PTX_RECIP. +class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass, + NVPTXRegClass src_regclass, Intrinsic IntOP> + : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0), + OpcStr, + [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>; + +// We need a full string for OpcStr here because we need to deal with the case +// like INT_PTX_NATIVE_POWR_F. +class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass, + NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP> + : NVPTXInst<(outs t_regclass:$dst), + (ins s0_regclass:$src0, s1_regclass:$src1), + OpcStr, + [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>; + +class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass, + NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, + NVPTXRegClass s2_regclass, Intrinsic IntOP> + : NVPTXInst<(outs t_regclass:$dst), + (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2), + OpcStr, + [(set t_regclass:$dst, + (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>; + +// +// MISC +// + +def INT_NVVM_CLZ_I : F_MATH_1<"clz.b32 \t$dst, $src0;", Int32Regs, Int32Regs, + int_nvvm_clz_i>; +def INT_NVVM_CLZ_LL : F_MATH_1<"clz.b64 \t$dst, $src0;", Int32Regs, Int64Regs, + int_nvvm_clz_ll>; + +def INT_NVVM_POPC_I : F_MATH_1<"popc.b32 \t$dst, $src0;", Int32Regs, Int32Regs, + int_nvvm_popc_i>; +def INT_NVVM_POPC_LL : F_MATH_1<"popc.b64 \t$dst, $src0;", Int32Regs, Int64Regs, + int_nvvm_popc_ll>; + +def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs, + Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>; + +// +// Min Max +// + +def INT_NVVM_MIN_I : F_MATH_2<"min.s32 \t$dst, $src0, $src1;", Int32Regs, + Int32Regs, Int32Regs, int_nvvm_min_i>; +def INT_NVVM_MIN_UI : F_MATH_2<"min.u32 \t$dst, $src0, $src1;", Int32Regs, + Int32Regs, Int32Regs, int_nvvm_min_ui>; + +def INT_NVVM_MIN_LL : F_MATH_2<"min.s64 \t$dst, $src0, $src1;", Int64Regs, + Int64Regs, Int64Regs, int_nvvm_min_ll>; +def INT_NVVM_MIN_ULL : F_MATH_2<"min.u64 \t$dst, $src0, $src1;", Int64Regs, + Int64Regs, Int64Regs, int_nvvm_min_ull>; + +def INT_NVVM_MAX_I : F_MATH_2<"max.s32 \t$dst, $src0, $src1;", Int32Regs, + Int32Regs, Int32Regs, int_nvvm_max_i>; +def INT_NVVM_MAX_UI : F_MATH_2<"max.u32 \t$dst, $src0, $src1;", Int32Regs, + Int32Regs, Int32Regs, int_nvvm_max_ui>; + +def INT_NVVM_MAX_LL : F_MATH_2<"max.s64 \t$dst, $src0, $src1;", Int64Regs, + Int64Regs, Int64Regs, int_nvvm_max_ll>; +def INT_NVVM_MAX_ULL : F_MATH_2<"max.u64 \t$dst, $src0, $src1;", Int64Regs, + Int64Regs, Int64Regs, int_nvvm_max_ull>; + +def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs, + Float32Regs, Float32Regs, int_nvvm_fmin_f>; +def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>; + +def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs, + Float32Regs, Float32Regs, int_nvvm_fmax_f>; +def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>; + +def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs, + Float64Regs, Float64Regs, int_nvvm_fmin_d>; +def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs, + Float64Regs, Float64Regs, int_nvvm_fmax_d>; + +// +// Multiplication +// + +def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs, + Int32Regs, Int32Regs, int_nvvm_mulhi_i>; +def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs, + Int32Regs, Int32Regs, int_nvvm_mulhi_ui>; + +def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs, + Int64Regs, Int64Regs, int_nvvm_mulhi_ll>; +def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs, + Int64Regs, Int64Regs, int_nvvm_mulhi_ull>; + +def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>; +def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>; +def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>; +def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>; +def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>; +def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>; +def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>; +def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>; + +def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>; +def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>; +def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>; +def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>; + +def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;", + Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>; +def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;", + Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>; + +// +// Div +// + +def INT_NVVM_DIV_APPROX_FTZ_F + : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs, + Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>; +def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>; + +def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>; +def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>; +def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>; +def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>; +def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>; +def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>; +def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>; +def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>; + +def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>; +def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>; +def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>; +def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; + +// +// Brev +// + +def INT_NVVM_BREV32 : F_MATH_1<"brev.b32 \t$dst, $src0;", Int32Regs, Int32Regs, + int_nvvm_brev32>; +def INT_NVVM_BREV64 : F_MATH_1<"brev.b64 \t$dst, $src0;", Int64Regs, Int64Regs, + int_nvvm_brev64>; + +// +// Sad +// + +def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;", + Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>; +def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;", + Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>; + +// +// Floor Ceil +// + +def INT_NVVM_FLOOR_FTZ_F : F_MATH_1<"cvt.rmi.ftz.f32.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_floor_ftz_f>; +def INT_NVVM_FLOOR_F : F_MATH_1<"cvt.rmi.f32.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_floor_f>; +def INT_NVVM_FLOOR_D : F_MATH_1<"cvt.rmi.f64.f64 \t$dst, $src0;", + Float64Regs, Float64Regs, int_nvvm_floor_d>; + +def INT_NVVM_CEIL_FTZ_F : F_MATH_1<"cvt.rpi.ftz.f32.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_ceil_ftz_f>; +def INT_NVVM_CEIL_F : F_MATH_1<"cvt.rpi.f32.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_ceil_f>; +def INT_NVVM_CEIL_D : F_MATH_1<"cvt.rpi.f64.f64 \t$dst, $src0;", + Float64Regs, Float64Regs, int_nvvm_ceil_d>; + +// +// Abs +// + +def INT_NVVM_ABS_I : F_MATH_1<"abs.s32 \t$dst, $src0;", Int32Regs, Int32Regs, + int_nvvm_abs_i>; +def INT_NVVM_ABS_LL : F_MATH_1<"abs.s64 \t$dst, $src0;", Int64Regs, Int64Regs, + int_nvvm_abs_ll>; + +def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs, + Float32Regs, int_nvvm_fabs_ftz_f>; +def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs, + Float32Regs, int_nvvm_fabs_f>; + +def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_fabs_d>; + +// +// Round +// + +def INT_NVVM_ROUND_FTZ_F : F_MATH_1<"cvt.rni.ftz.f32.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_round_ftz_f>; +def INT_NVVM_ROUND_F : F_MATH_1<"cvt.rni.f32.f32 \t$dst, $src0;", Float32Regs, + Float32Regs, int_nvvm_round_f>; + +def INT_NVVM_ROUND_D : F_MATH_1<"cvt.rni.f64.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_round_d>; + +// +// Trunc +// + +def INT_NVVM_TRUNC_FTZ_F : F_MATH_1<"cvt.rzi.ftz.f32.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_trunc_ftz_f>; +def INT_NVVM_TRUNC_F : F_MATH_1<"cvt.rzi.f32.f32 \t$dst, $src0;", Float32Regs, + Float32Regs, int_nvvm_trunc_f>; + +def INT_NVVM_TRUNC_D : F_MATH_1<"cvt.rzi.f64.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_trunc_d>; + +// +// Saturate +// + +def INT_NVVM_SATURATE_FTZ_F : F_MATH_1<"cvt.sat.ftz.f32.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_saturate_ftz_f>; +def INT_NVVM_SATURATE_F : F_MATH_1<"cvt.sat.f32.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_saturate_f>; + +def INT_NVVM_SATURATE_D : F_MATH_1<"cvt.sat.f64.f64 \t$dst, $src0;", + Float64Regs, Float64Regs, int_nvvm_saturate_d>; + +// +// Exp2 Log2 +// + +def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>; +def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>; +def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", + Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; + +def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; +def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>; +def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;", + Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>; + +// +// Sin Cos +// + +def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>; +def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_sin_approx_f>; + +def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>; +def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_cos_approx_f>; + +// +// Fma +// + +def INT_NVVM_FMA_RN_FTZ_F + : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>; +def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;", + Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>; +def INT_NVVM_FMA_RZ_FTZ_F + : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>; +def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;", + Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>; +def INT_NVVM_FMA_RM_FTZ_F + : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>; +def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;", + Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>; +def INT_NVVM_FMA_RP_FTZ_F + : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>; +def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;", + Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>; + +def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;", + Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>; +def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;", + Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>; +def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;", + Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>; +def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;", + Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>; + +// +// Rcp +// + +def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>; +def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>; +def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>; +def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>; +def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>; +def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>; +def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>; +def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>; + +def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_rcp_rn_d>; +def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_rcp_rz_d>; +def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_rcp_rm_d>; +def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_rcp_rp_d>; + +def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;", + Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>; + +// +// Sqrt +// + +def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>; +def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs, + Float32Regs, int_nvvm_sqrt_rn_f>; +def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>; +def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs, + Float32Regs, int_nvvm_sqrt_rz_f>; +def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>; +def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs, + Float32Regs, int_nvvm_sqrt_rm_f>; +def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>; +def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs, + Float32Regs, int_nvvm_sqrt_rp_f>; +def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>; +def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>; + +def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_sqrt_rn_d>; +def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_sqrt_rz_d>; +def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_sqrt_rm_d>; +def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_sqrt_rp_d>; + +// +// Rsqrt +// + +def INT_NVVM_RSQRT_APPROX_FTZ_F + : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, + int_nvvm_rsqrt_approx_ftz_f>; +def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>; +def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;", + Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>; + +// +// Add +// + +def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>; +def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>; +def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>; +def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>; +def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>; +def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>; +def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>; +def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>; + +def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>; +def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>; +def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>; +def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>; + +// +// Convert +// + +def INT_NVVM_D2F_RN_FTZ : F_MATH_1<"cvt.rn.ftz.f32.f64 \t$dst, $src0;", + Float32Regs, Float64Regs, int_nvvm_d2f_rn_ftz>; +def INT_NVVM_D2F_RN : F_MATH_1<"cvt.rn.f32.f64 \t$dst, $src0;", + Float32Regs, Float64Regs, int_nvvm_d2f_rn>; +def INT_NVVM_D2F_RZ_FTZ : F_MATH_1<"cvt.rz.ftz.f32.f64 \t$dst, $src0;", + Float32Regs, Float64Regs, int_nvvm_d2f_rz_ftz>; +def INT_NVVM_D2F_RZ : F_MATH_1<"cvt.rz.f32.f64 \t$dst, $src0;", + Float32Regs, Float64Regs, int_nvvm_d2f_rz>; +def INT_NVVM_D2F_RM_FTZ : F_MATH_1<"cvt.rm.ftz.f32.f64 \t$dst, $src0;", + Float32Regs, Float64Regs, int_nvvm_d2f_rm_ftz>; +def INT_NVVM_D2F_RM : F_MATH_1<"cvt.rm.f32.f64 \t$dst, $src0;", + Float32Regs, Float64Regs, int_nvvm_d2f_rm>; +def INT_NVVM_D2F_RP_FTZ : F_MATH_1<"cvt.rp.ftz.f32.f64 \t$dst, $src0;", + Float32Regs, Float64Regs, int_nvvm_d2f_rp_ftz>; +def INT_NVVM_D2F_RP : F_MATH_1<"cvt.rp.f32.f64 \t$dst, $src0;", + Float32Regs, Float64Regs, int_nvvm_d2f_rp>; + +def INT_NVVM_D2I_RN : F_MATH_1<"cvt.rni.s32.f64 \t$dst, $src0;", + Int32Regs, Float64Regs, int_nvvm_d2i_rn>; +def INT_NVVM_D2I_RZ : F_MATH_1<"cvt.rzi.s32.f64 \t$dst, $src0;", + Int32Regs, Float64Regs, int_nvvm_d2i_rz>; +def INT_NVVM_D2I_RM : F_MATH_1<"cvt.rmi.s32.f64 \t$dst, $src0;", + Int32Regs, Float64Regs, int_nvvm_d2i_rm>; +def INT_NVVM_D2I_RP : F_MATH_1<"cvt.rpi.s32.f64 \t$dst, $src0;", + Int32Regs, Float64Regs, int_nvvm_d2i_rp>; + +def INT_NVVM_D2UI_RN : F_MATH_1<"cvt.rni.u32.f64 \t$dst, $src0;", + Int32Regs, Float64Regs, int_nvvm_d2ui_rn>; +def INT_NVVM_D2UI_RZ : F_MATH_1<"cvt.rzi.u32.f64 \t$dst, $src0;", + Int32Regs, Float64Regs, int_nvvm_d2ui_rz>; +def INT_NVVM_D2UI_RM : F_MATH_1<"cvt.rmi.u32.f64 \t$dst, $src0;", + Int32Regs, Float64Regs, int_nvvm_d2ui_rm>; +def INT_NVVM_D2UI_RP : F_MATH_1<"cvt.rpi.u32.f64 \t$dst, $src0;", + Int32Regs, Float64Regs, int_nvvm_d2ui_rp>; + +def INT_NVVM_I2D_RN : F_MATH_1<"cvt.rn.f64.s32 \t$dst, $src0;", + Float64Regs, Int32Regs, int_nvvm_i2d_rn>; +def INT_NVVM_I2D_RZ : F_MATH_1<"cvt.rz.f64.s32 \t$dst, $src0;", + Float64Regs, Int32Regs, int_nvvm_i2d_rz>; +def INT_NVVM_I2D_RM : F_MATH_1<"cvt.rm.f64.s32 \t$dst, $src0;", + Float64Regs, Int32Regs, int_nvvm_i2d_rm>; +def INT_NVVM_I2D_RP : F_MATH_1<"cvt.rp.f64.s32 \t$dst, $src0;", + Float64Regs, Int32Regs, int_nvvm_i2d_rp>; + +def INT_NVVM_UI2D_RN : F_MATH_1<"cvt.rn.f64.u32 \t$dst, $src0;", + Float64Regs, Int32Regs, int_nvvm_ui2d_rn>; +def INT_NVVM_UI2D_RZ : F_MATH_1<"cvt.rz.f64.u32 \t$dst, $src0;", + Float64Regs, Int32Regs, int_nvvm_ui2d_rz>; +def INT_NVVM_UI2D_RM : F_MATH_1<"cvt.rm.f64.u32 \t$dst, $src0;", + Float64Regs, Int32Regs, int_nvvm_ui2d_rm>; +def INT_NVVM_UI2D_RP : F_MATH_1<"cvt.rp.f64.u32 \t$dst, $src0;", + Float64Regs, Int32Regs, int_nvvm_ui2d_rp>; + +def INT_NVVM_F2I_RN_FTZ : F_MATH_1<"cvt.rni.ftz.s32.f32 \t$dst, $src0;", + Int32Regs, Float32Regs, int_nvvm_f2i_rn_ftz>; +def INT_NVVM_F2I_RN : F_MATH_1<"cvt.rni.s32.f32 \t$dst, $src0;", Int32Regs, + Float32Regs, int_nvvm_f2i_rn>; +def INT_NVVM_F2I_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.s32.f32 \t$dst, $src0;", + Int32Regs, Float32Regs, int_nvvm_f2i_rz_ftz>; +def INT_NVVM_F2I_RZ : F_MATH_1<"cvt.rzi.s32.f32 \t$dst, $src0;", Int32Regs, + Float32Regs, int_nvvm_f2i_rz>; +def INT_NVVM_F2I_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.s32.f32 \t$dst, $src0;", + Int32Regs, Float32Regs, int_nvvm_f2i_rm_ftz>; +def INT_NVVM_F2I_RM : F_MATH_1<"cvt.rmi.s32.f32 \t$dst, $src0;", Int32Regs, + Float32Regs, int_nvvm_f2i_rm>; +def INT_NVVM_F2I_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.s32.f32 \t$dst, $src0;", + Int32Regs, Float32Regs, int_nvvm_f2i_rp_ftz>; +def INT_NVVM_F2I_RP : F_MATH_1<"cvt.rpi.s32.f32 \t$dst, $src0;", Int32Regs, + Float32Regs, int_nvvm_f2i_rp>; + +def INT_NVVM_F2UI_RN_FTZ : F_MATH_1<"cvt.rni.ftz.u32.f32 \t$dst, $src0;", + Int32Regs, Float32Regs, int_nvvm_f2ui_rn_ftz>; +def INT_NVVM_F2UI_RN : F_MATH_1<"cvt.rni.u32.f32 \t$dst, $src0;", Int32Regs, + Float32Regs, int_nvvm_f2ui_rn>; +def INT_NVVM_F2UI_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.u32.f32 \t$dst, $src0;", + Int32Regs, Float32Regs, int_nvvm_f2ui_rz_ftz>; +def INT_NVVM_F2UI_RZ : F_MATH_1<"cvt.rzi.u32.f32 \t$dst, $src0;", Int32Regs, + Float32Regs, int_nvvm_f2ui_rz>; +def INT_NVVM_F2UI_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.u32.f32 \t$dst, $src0;", + Int32Regs, Float32Regs, int_nvvm_f2ui_rm_ftz>; +def INT_NVVM_F2UI_RM : F_MATH_1<"cvt.rmi.u32.f32 \t$dst, $src0;", Int32Regs, + Float32Regs, int_nvvm_f2ui_rm>; +def INT_NVVM_F2UI_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.u32.f32 \t$dst, $src0;", + Int32Regs, Float32Regs, int_nvvm_f2ui_rp_ftz>; +def INT_NVVM_F2UI_RP : F_MATH_1<"cvt.rpi.u32.f32 \t$dst, $src0;", Int32Regs, + Float32Regs, int_nvvm_f2ui_rp>; + +def INT_NVVM_I2F_RN : F_MATH_1<"cvt.rn.f32.s32 \t$dst, $src0;", Float32Regs, + Int32Regs, int_nvvm_i2f_rn>; +def INT_NVVM_I2F_RZ : F_MATH_1<"cvt.rz.f32.s32 \t$dst, $src0;", Float32Regs, + Int32Regs, int_nvvm_i2f_rz>; +def INT_NVVM_I2F_RM : F_MATH_1<"cvt.rm.f32.s32 \t$dst, $src0;", Float32Regs, + Int32Regs, int_nvvm_i2f_rm>; +def INT_NVVM_I2F_RP : F_MATH_1<"cvt.rp.f32.s32 \t$dst, $src0;", Float32Regs, + Int32Regs, int_nvvm_i2f_rp>; + +def INT_NVVM_UI2F_RN : F_MATH_1<"cvt.rn.f32.u32 \t$dst, $src0;", Float32Regs, + Int32Regs, int_nvvm_ui2f_rn>; +def INT_NVVM_UI2F_RZ : F_MATH_1<"cvt.rz.f32.u32 \t$dst, $src0;", Float32Regs, + Int32Regs, int_nvvm_ui2f_rz>; +def INT_NVVM_UI2F_RM : F_MATH_1<"cvt.rm.f32.u32 \t$dst, $src0;", Float32Regs, + Int32Regs, int_nvvm_ui2f_rm>; +def INT_NVVM_UI2F_RP : F_MATH_1<"cvt.rp.f32.u32 \t$dst, $src0;", Float32Regs, + Int32Regs, int_nvvm_ui2f_rp>; + +def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", + Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; + +def INT_NVVM_D2I_LO : F_MATH_1<!strconcat("{{\n\t", + !strconcat(".reg .b32 %temp; \n\t", + !strconcat("mov.b64 \t{$dst, %temp}, $src0;\n\t", + "}}"))), + Int32Regs, Float64Regs, int_nvvm_d2i_lo>; +def INT_NVVM_D2I_HI : F_MATH_1<!strconcat("{{\n\t", + !strconcat(".reg .b32 %temp; \n\t", + !strconcat("mov.b64 \t{%temp, $dst}, $src0;\n\t", + "}}"))), + Int32Regs, Float64Regs, int_nvvm_d2i_hi>; + +def INT_NVVM_F2LL_RN_FTZ : F_MATH_1<"cvt.rni.ftz.s64.f32 \t$dst, $src0;", + Int64Regs, Float32Regs, int_nvvm_f2ll_rn_ftz>; +def INT_NVVM_F2LL_RN : F_MATH_1<"cvt.rni.s64.f32 \t$dst, $src0;", Int64Regs, + Float32Regs, int_nvvm_f2ll_rn>; +def INT_NVVM_F2LL_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.s64.f32 \t$dst, $src0;", + Int64Regs, Float32Regs, int_nvvm_f2ll_rz_ftz>; +def INT_NVVM_F2LL_RZ : F_MATH_1<"cvt.rzi.s64.f32 \t$dst, $src0;", Int64Regs, + Float32Regs, int_nvvm_f2ll_rz>; +def INT_NVVM_F2LL_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.s64.f32 \t$dst, $src0;", + Int64Regs, Float32Regs, int_nvvm_f2ll_rm_ftz>; +def INT_NVVM_F2LL_RM : F_MATH_1<"cvt.rmi.s64.f32 \t$dst, $src0;", Int64Regs, + Float32Regs, int_nvvm_f2ll_rm>; +def INT_NVVM_F2LL_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.s64.f32 \t$dst, $src0;", + Int64Regs, Float32Regs, int_nvvm_f2ll_rp_ftz>; +def INT_NVVM_F2LL_RP : F_MATH_1<"cvt.rpi.s64.f32 \t$dst, $src0;", Int64Regs, + Float32Regs, int_nvvm_f2ll_rp>; + +def INT_NVVM_F2ULL_RN_FTZ : F_MATH_1<"cvt.rni.ftz.u64.f32 \t$dst, $src0;", + Int64Regs, Float32Regs, int_nvvm_f2ull_rn_ftz>; +def INT_NVVM_F2ULL_RN : F_MATH_1<"cvt.rni.u64.f32 \t$dst, $src0;", Int64Regs, + Float32Regs, int_nvvm_f2ull_rn>; +def INT_NVVM_F2ULL_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.u64.f32 \t$dst, $src0;", + Int64Regs, Float32Regs, int_nvvm_f2ull_rz_ftz>; +def INT_NVVM_F2ULL_RZ : F_MATH_1<"cvt.rzi.u64.f32 \t$dst, $src0;", Int64Regs, + Float32Regs, int_nvvm_f2ull_rz>; +def INT_NVVM_F2ULL_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.u64.f32 \t$dst, $src0;", + Int64Regs, Float32Regs, int_nvvm_f2ull_rm_ftz>; +def INT_NVVM_F2ULL_RM : F_MATH_1<"cvt.rmi.u64.f32 \t$dst, $src0;", Int64Regs, + Float32Regs, int_nvvm_f2ull_rm>; +def INT_NVVM_F2ULL_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.u64.f32 \t$dst, $src0;", + Int64Regs, Float32Regs, int_nvvm_f2ull_rp_ftz>; +def INT_NVVM_F2ULL_RP : F_MATH_1<"cvt.rpi.u64.f32 \t$dst, $src0;", Int64Regs, + Float32Regs, int_nvvm_f2ull_rp>; + +def INT_NVVM_D2LL_RN : F_MATH_1<"cvt.rni.s64.f64 \t$dst, $src0;", Int64Regs, + Float64Regs, int_nvvm_d2ll_rn>; +def INT_NVVM_D2LL_RZ : F_MATH_1<"cvt.rzi.s64.f64 \t$dst, $src0;", Int64Regs, + Float64Regs, int_nvvm_d2ll_rz>; +def INT_NVVM_D2LL_RM : F_MATH_1<"cvt.rmi.s64.f64 \t$dst, $src0;", Int64Regs, + Float64Regs, int_nvvm_d2ll_rm>; +def INT_NVVM_D2LL_RP : F_MATH_1<"cvt.rpi.s64.f64 \t$dst, $src0;", Int64Regs, + Float64Regs, int_nvvm_d2ll_rp>; + +def INT_NVVM_D2ULL_RN : F_MATH_1<"cvt.rni.u64.f64 \t$dst, $src0;", Int64Regs, + Float64Regs, int_nvvm_d2ull_rn>; +def INT_NVVM_D2ULL_RZ : F_MATH_1<"cvt.rzi.u64.f64 \t$dst, $src0;", Int64Regs, + Float64Regs, int_nvvm_d2ull_rz>; +def INT_NVVM_D2ULL_RM : F_MATH_1<"cvt.rmi.u64.f64 \t$dst, $src0;", Int64Regs, + Float64Regs, int_nvvm_d2ull_rm>; +def INT_NVVM_D2ULL_RP : F_MATH_1<"cvt.rpi.u64.f64 \t$dst, $src0;", Int64Regs, + Float64Regs, int_nvvm_d2ull_rp>; + +def INT_NVVM_LL2F_RN : F_MATH_1<"cvt.rn.f32.s64 \t$dst, $src0;", Float32Regs, + Int64Regs, int_nvvm_ll2f_rn>; +def INT_NVVM_LL2F_RZ : F_MATH_1<"cvt.rz.f32.s64 \t$dst, $src0;", Float32Regs, + Int64Regs, int_nvvm_ll2f_rz>; +def INT_NVVM_LL2F_RM : F_MATH_1<"cvt.rm.f32.s64 \t$dst, $src0;", Float32Regs, + Int64Regs, int_nvvm_ll2f_rm>; +def INT_NVVM_LL2F_RP : F_MATH_1<"cvt.rp.f32.s64 \t$dst, $src0;", Float32Regs, + Int64Regs, int_nvvm_ll2f_rp>; +def INT_NVVM_ULL2F_RN : F_MATH_1<"cvt.rn.f32.u64 \t$dst, $src0;", Float32Regs, + Int64Regs, int_nvvm_ull2f_rn>; +def INT_NVVM_ULL2F_RZ : F_MATH_1<"cvt.rz.f32.u64 \t$dst, $src0;", Float32Regs, + Int64Regs, int_nvvm_ull2f_rz>; +def INT_NVVM_ULL2F_RM : F_MATH_1<"cvt.rm.f32.u64 \t$dst, $src0;", Float32Regs, + Int64Regs, int_nvvm_ull2f_rm>; +def INT_NVVM_ULL2F_RP : F_MATH_1<"cvt.rp.f32.u64 \t$dst, $src0;", Float32Regs, + Int64Regs, int_nvvm_ull2f_rp>; + +def INT_NVVM_LL2D_RN : F_MATH_1<"cvt.rn.f64.s64 \t$dst, $src0;", Float64Regs, + Int64Regs, int_nvvm_ll2d_rn>; +def INT_NVVM_LL2D_RZ : F_MATH_1<"cvt.rz.f64.s64 \t$dst, $src0;", Float64Regs, + Int64Regs, int_nvvm_ll2d_rz>; +def INT_NVVM_LL2D_RM : F_MATH_1<"cvt.rm.f64.s64 \t$dst, $src0;", Float64Regs, + Int64Regs, int_nvvm_ll2d_rm>; +def INT_NVVM_LL2D_RP : F_MATH_1<"cvt.rp.f64.s64 \t$dst, $src0;", Float64Regs, + Int64Regs, int_nvvm_ll2d_rp>; +def INT_NVVM_ULL2D_RN : F_MATH_1<"cvt.rn.f64.u64 \t$dst, $src0;", Float64Regs, + Int64Regs, int_nvvm_ull2d_rn>; +def INT_NVVM_ULL2D_RZ : F_MATH_1<"cvt.rz.f64.u64 \t$dst, $src0;", Float64Regs, + Int64Regs, int_nvvm_ull2d_rz>; +def INT_NVVM_ULL2D_RM : F_MATH_1<"cvt.rm.f64.u64 \t$dst, $src0;", Float64Regs, + Int64Regs, int_nvvm_ull2d_rm>; +def INT_NVVM_ULL2D_RP : F_MATH_1<"cvt.rp.f64.u64 \t$dst, $src0;", Float64Regs, + Int64Regs, int_nvvm_ull2d_rp>; + +def INT_NVVM_F2H_RN_FTZ : F_MATH_1<!strconcat("{{\n\t", + !strconcat(".reg .b16 %temp;\n\t", + !strconcat("cvt.rn.ftz.f16.f32 \t%temp, $src0;\n\t", + !strconcat("mov.b16 \t$dst, %temp;\n", + "}}")))), + Int16Regs, Float32Regs, int_nvvm_f2h_rn_ftz>; +def INT_NVVM_F2H_RN : F_MATH_1<!strconcat("{{\n\t", + !strconcat(".reg .b16 %temp;\n\t", + !strconcat("cvt.rn.f16.f32 \t%temp, $src0;\n\t", + !strconcat("mov.b16 \t$dst, %temp;\n", + "}}")))), + Int16Regs, Float32Regs, int_nvvm_f2h_rn>; + +def INT_NVVM_H2F : F_MATH_1<!strconcat("{{\n\t", + !strconcat(".reg .b16 %temp;\n\t", + !strconcat("mov.b16 \t%temp, $src0;\n\t", + !strconcat("cvt.f32.f16 \t$dst, %temp;\n\t", + "}}")))), + Float32Regs, Int16Regs, int_nvvm_h2f>; + +// +// Bitcast +// + +def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs, + Float32Regs, int_nvvm_bitcast_f2i>; +def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs, + Int32Regs, int_nvvm_bitcast_i2f>; + +def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs, + Int64Regs, int_nvvm_bitcast_ll2d>; +def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, + Float64Regs, int_nvvm_bitcast_d2ll>; + +//----------------------------------- +// Atomic Functions +//----------------------------------- + +class ATOMIC_GLOBAL_CHK <dag ops, dag frag> + : PatFrag<ops, frag, [{ + return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL); +}]>; +class ATOMIC_SHARED_CHK <dag ops, dag frag> + : PatFrag<ops, frag, [{ + return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED); +}]>; +class ATOMIC_GENERIC_CHK <dag ops, dag frag> + : PatFrag<ops, frag, [{ + return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC); +}]>; + +multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, + string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, + Operand IMMType, SDNode IMM, Predicate Pred> { + def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), + !strconcat("atom", + !strconcat(SpaceStr, + !strconcat(OpcStr, + !strconcat(TypeStr, + !strconcat(" \t$dst, [$addr], $b;", ""))))), + [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, + Requires<[Pred]>; + def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), + !strconcat("atom", + !strconcat(SpaceStr, + !strconcat(OpcStr, + !strconcat(TypeStr, + !strconcat(" \t$dst, [$addr], $b;", ""))))), + [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>, + Requires<[Pred]>; +} +multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr, + string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, Predicate Pred> { + defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, + IntOp, IMMType, IMM, Pred>; + defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, + IntOp, IMMType, IMM, Pred>; +} + +// has 2 operands, neg the second one +multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, + string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, + Operand IMMType, Predicate Pred> { + def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), + !strconcat("{{ \n\t", + !strconcat(".reg \t.s", + !strconcat(TypeStr, + !strconcat(" temp; \n\t", + !strconcat("neg.s", + !strconcat(TypeStr, + !strconcat(" \ttemp, $b; \n\t", + !strconcat("atom", + !strconcat(SpaceStr, + !strconcat(OpcStr, + !strconcat(".u", + !strconcat(TypeStr, + !strconcat(" \t$dst, [$addr], temp; \n\t", + !strconcat("}}", "")))))))))))))), + [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, + Requires<[Pred]>; +} +multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr, + string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType, + Predicate Pred> { + defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, + IntOp, IMMType, Pred> ; + defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, + IntOp, IMMType, Pred> ; +} + +// has 3 operands +multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, + string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, + Operand IMMType, Predicate Pred> { + def reg : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, regclass:$b, regclass:$c), + !strconcat("atom", + !strconcat(SpaceStr, + !strconcat(OpcStr, + !strconcat(TypeStr, + !strconcat(" \t$dst, [$addr], $b, $c;", ""))))), + [(set regclass:$dst, + (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>, + Requires<[Pred]>; + def imm1 : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, IMMType:$b, regclass:$c), + !strconcat("atom", + !strconcat(SpaceStr, + !strconcat(OpcStr, + !strconcat(TypeStr, + !strconcat(" \t$dst, [$addr], $b, $c;", ""))))), + [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>, + Requires<[Pred]>; + def imm2 : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, regclass:$b, IMMType:$c), + !strconcat("atom", + !strconcat(SpaceStr, + !strconcat(OpcStr, + !strconcat(TypeStr, + !strconcat(" \t$dst, [$addr], $b, $c;", ""))))), + [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>, + Requires<[Pred]>; + def imm3 : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, IMMType:$b, IMMType:$c), + !strconcat("atom", + !strconcat(SpaceStr, + !strconcat(OpcStr, + !strconcat(TypeStr, + !strconcat(" \t$dst, [$addr], $b, $c;", ""))))), + [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>, + Requires<[Pred]>; +} +multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr, + string OpcStr, PatFrag IntOp, Operand IMMType, Predicate Pred> { + defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, + IntOp, IMMType, Pred>; + defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, + IntOp, IMMType, Pred>; +} + +// atom_add + +def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_add_32 node:$a, node:$b)>; +def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_add_32 node:$a, node:$b)>; +def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_add_32 node:$a, node:$b)>; +def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_add_64 node:$a, node:$b)>; +def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_add_64 node:$a, node:$b)>; +def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_add_64 node:$a, node:$b)>; +def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; +def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; +def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; + +defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add", + atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>; +defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add", + atomic_load_add_32_s, i32imm, imm, hasAtomRedS32>; +defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add", + atomic_load_add_32_gen, i32imm, imm, hasAtomRedGen32>; +defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", + ".add", atomic_load_add_32_gen, i32imm, imm, useAtomRedG32forGen32>; + +defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add", + atomic_load_add_64_g, i64imm, imm, hasAtomRedG64>; +defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add", + atomic_load_add_64_s, i64imm, imm, hasAtomRedS64>; +defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add", + atomic_load_add_64_gen, i64imm, imm, hasAtomRedGen64>; +defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64", + ".add", atomic_load_add_64_gen, i64imm, imm, useAtomRedG64forGen64>; + +defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add", + atomic_load_add_f32_g, f32imm, fpimm, hasAtomAddF32>; +defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add", + atomic_load_add_f32_s, f32imm, fpimm, hasAtomAddF32>; +defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add", + atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>; + +// atom_sub + +def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_sub_32 node:$a, node:$b)>; +def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_sub_32 node:$a, node:$b)>; +def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_sub_32 node:$a, node:$b)>; +def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_sub_64 node:$a, node:$b)>; +def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_sub_64 node:$a, node:$b)>; +def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_sub_64 node:$a, node:$b)>; + +defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add", + atomic_load_sub_32_g, i32imm, hasAtomRedG32>; +defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add", + atomic_load_sub_64_g, i64imm, hasAtomRedG64>; +defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add", + atomic_load_sub_32_gen, i32imm, hasAtomRedGen32>; +defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", + ".add", atomic_load_sub_32_gen, i32imm, useAtomRedG32forGen32>; +defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add", + atomic_load_sub_32_s, i32imm, hasAtomRedS32>; +defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add", + atomic_load_sub_64_s, i64imm, hasAtomRedS64>; +defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add", + atomic_load_sub_64_gen, i64imm, hasAtomRedGen64>; +defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", + ".add", atomic_load_sub_64_gen, i64imm, useAtomRedG64forGen64>; + +// atom_swap + +def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_swap_32 node:$a, node:$b)>; +def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_swap_32 node:$a, node:$b)>; +def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_swap_32 node:$a, node:$b)>; +def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_swap_64 node:$a, node:$b)>; +def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_swap_64 node:$a, node:$b)>; +def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_swap_64 node:$a, node:$b)>; + +defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch", + atomic_swap_32_g, i32imm, imm, hasAtomRedG32>; +defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch", + atomic_swap_32_s, i32imm, imm, hasAtomRedS32>; +defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch", + atomic_swap_32_gen, i32imm, imm, hasAtomRedGen32>; +defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", + ".exch", atomic_swap_32_gen, i32imm, imm, useAtomRedG32forGen32>; +defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch", + atomic_swap_64_g, i64imm, imm, hasAtomRedG64>; +defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch", + atomic_swap_64_s, i64imm, imm, hasAtomRedS64>; +defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch", + atomic_swap_64_gen, i64imm, imm, hasAtomRedGen64>; +defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", + ".exch", atomic_swap_64_gen, i64imm, imm, useAtomRedG64forGen64>; + +// atom_max + +def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) + , (atomic_load_max_32 node:$a, node:$b)>; +def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_max_32 node:$a, node:$b)>; +def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_max_32 node:$a, node:$b)>; +def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_umax_32 node:$a, node:$b)>; +def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_umax_32 node:$a, node:$b)>; +def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_umax_32 node:$a, node:$b)>; + +defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", + ".max", atomic_load_max_32_g, i32imm, imm, hasAtomRedG32>; +defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", + ".max", atomic_load_max_32_s, i32imm, imm, hasAtomRedS32>; +defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max", + atomic_load_max_32_gen, i32imm, imm, hasAtomRedGen32>; +defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", + ".s32", ".max", atomic_load_max_32_gen, i32imm, imm, useAtomRedG32forGen32>; +defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", + ".max", atomic_load_umax_32_g, i32imm, imm, hasAtomRedG32>; +defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", + ".max", atomic_load_umax_32_s, i32imm, imm, hasAtomRedS32>; +defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max", + atomic_load_umax_32_gen, i32imm, imm, hasAtomRedGen32>; +defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", + ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, useAtomRedG32forGen32>; + +// atom_min + +def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_min_32 node:$a, node:$b)>; +def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_min_32 node:$a, node:$b)>; +def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_min_32 node:$a, node:$b)>; +def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_umin_32 node:$a, node:$b)>; +def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_umin_32 node:$a, node:$b)>; +def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_umin_32 node:$a, node:$b)>; + +defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", + ".min", atomic_load_min_32_g, i32imm, imm, hasAtomRedG32>; +defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", + ".min", atomic_load_min_32_s, i32imm, imm, hasAtomRedS32>; +defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min", + atomic_load_min_32_gen, i32imm, imm, hasAtomRedGen32>; +defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", + ".s32", ".min", atomic_load_min_32_gen, i32imm, imm, useAtomRedG32forGen32>; +defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", + ".min", atomic_load_umin_32_g, i32imm, imm, hasAtomRedG32>; +defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", + ".min", atomic_load_umin_32_s, i32imm, imm, hasAtomRedS32>; +defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min", + atomic_load_umin_32_gen, i32imm, imm, hasAtomRedGen32>; +defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", + ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, useAtomRedG32forGen32>; + +// atom_inc atom_dec + +def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; +def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; +def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; +def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; +def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; +def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; + +defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc", + atomic_load_inc_32_g, i32imm, imm, hasAtomRedG32>; +defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc", + atomic_load_inc_32_s, i32imm, imm, hasAtomRedS32>; +defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc", + atomic_load_inc_32_gen, i32imm, imm, hasAtomRedGen32>; +defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", + ".inc", atomic_load_inc_32_gen, i32imm, imm, useAtomRedG32forGen32>; +defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec", + atomic_load_dec_32_g, i32imm, imm, hasAtomRedG32>; +defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec", + atomic_load_dec_32_s, i32imm, imm, hasAtomRedS32>; +defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec", + atomic_load_dec_32_gen, i32imm, imm, hasAtomRedGen32>; +defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", + ".dec", atomic_load_dec_32_gen, i32imm, imm, useAtomRedG32forGen32>; + +// atom_and + +def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_and_32 node:$a, node:$b)>; +def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_and_32 node:$a, node:$b)>; +def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_and_32 node:$a, node:$b)>; + +defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and", + atomic_load_and_32_g, i32imm, imm, hasAtomRedG32>; +defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and", + atomic_load_and_32_s, i32imm, imm, hasAtomRedS32>; +defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and", + atomic_load_and_32_gen, i32imm, imm, hasAtomRedGen32>; +defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", + ".and", atomic_load_and_32_gen, i32imm, imm, useAtomRedG32forGen32>; + +// atom_or + +def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_or_32 node:$a, node:$b)>; +def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_or_32 node:$a, node:$b)>; +def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_or_32 node:$a, node:$b)>; + +defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or", + atomic_load_or_32_g, i32imm, imm, hasAtomRedG32>; +defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or", + atomic_load_or_32_gen, i32imm, imm, hasAtomRedGen32>; +defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", + ".or", atomic_load_or_32_gen, i32imm, imm, useAtomRedG32forGen32>; +defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or", + atomic_load_or_32_s, i32imm, imm, hasAtomRedS32>; + +// atom_xor + +def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_xor_32 node:$a, node:$b)>; +def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_xor_32 node:$a, node:$b)>; +def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_xor_32 node:$a, node:$b)>; + +defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor", + atomic_load_xor_32_g, i32imm, imm, hasAtomRedG32>; +defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor", + atomic_load_xor_32_s, i32imm, imm, hasAtomRedS32>; +defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor", + atomic_load_xor_32_gen, i32imm, imm, hasAtomRedGen32>; +defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", + ".xor", atomic_load_xor_32_gen, i32imm, imm, useAtomRedG32forGen32>; + +// atom_cas + +def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), + (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; +def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), + (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; +def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), + (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; +def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), + (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; +def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), + (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; +def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), + (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; + +defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas", + atomic_cmp_swap_32_g, i32imm, hasAtomRedG32>; +defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas", + atomic_cmp_swap_32_s, i32imm, hasAtomRedS32>; +defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas", + atomic_cmp_swap_32_gen, i32imm, hasAtomRedGen32>; +defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32", + ".cas", atomic_cmp_swap_32_gen, i32imm, useAtomRedG32forGen32>; +defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas", + atomic_cmp_swap_64_g, i64imm, hasAtomRedG64>; +defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas", + atomic_cmp_swap_64_s, i64imm, hasAtomRedS64>; +defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas", + atomic_cmp_swap_64_gen, i64imm, hasAtomRedGen64>; +defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64", + ".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>; + + +//----------------------------------- +// Read Special Registers +//----------------------------------- +class F_SREG<string OpStr, NVPTXRegClass regclassOut, Intrinsic IntOp> : + NVPTXInst<(outs regclassOut:$dst), (ins), + OpStr, + [(set regclassOut:$dst, (IntOp))]>; + +def INT_PTX_SREG_TID_X : F_SREG<"mov.u32 \t$dst, %tid.x;", Int32Regs, + int_nvvm_read_ptx_sreg_tid_x>; +def INT_PTX_SREG_TID_Y : F_SREG<"mov.u32 \t$dst, %tid.y;", Int32Regs, + int_nvvm_read_ptx_sreg_tid_y>; +def INT_PTX_SREG_TID_Z : F_SREG<"mov.u32 \t$dst, %tid.z;", Int32Regs, + int_nvvm_read_ptx_sreg_tid_z>; + +def INT_PTX_SREG_NTID_X : F_SREG<"mov.u32 \t$dst, %ntid.x;", Int32Regs, + int_nvvm_read_ptx_sreg_ntid_x>; +def INT_PTX_SREG_NTID_Y : F_SREG<"mov.u32 \t$dst, %ntid.y;", Int32Regs, + int_nvvm_read_ptx_sreg_ntid_y>; +def INT_PTX_SREG_NTID_Z : F_SREG<"mov.u32 \t$dst, %ntid.z;", Int32Regs, + int_nvvm_read_ptx_sreg_ntid_z>; + +def INT_PTX_SREG_CTAID_X : F_SREG<"mov.u32 \t$dst, %ctaid.x;", Int32Regs, + int_nvvm_read_ptx_sreg_ctaid_x>; +def INT_PTX_SREG_CTAID_Y : F_SREG<"mov.u32 \t$dst, %ctaid.y;", Int32Regs, + int_nvvm_read_ptx_sreg_ctaid_y>; +def INT_PTX_SREG_CTAID_Z : F_SREG<"mov.u32 \t$dst, %ctaid.z;", Int32Regs, + int_nvvm_read_ptx_sreg_ctaid_z>; + +def INT_PTX_SREG_NCTAID_X : F_SREG<"mov.u32 \t$dst, %nctaid.x;", Int32Regs, + int_nvvm_read_ptx_sreg_nctaid_x>; +def INT_PTX_SREG_NCTAID_Y : F_SREG<"mov.u32 \t$dst, %nctaid.y;", Int32Regs, + int_nvvm_read_ptx_sreg_nctaid_y>; +def INT_PTX_SREG_NCTAID_Z : F_SREG<"mov.u32 \t$dst, %nctaid.z;", Int32Regs, + int_nvvm_read_ptx_sreg_nctaid_z>; + +def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs, + int_nvvm_read_ptx_sreg_warpsize>; + + +//----------------------------------- +// Support for ldu on sm_20 or later +//----------------------------------- + +// Scalar +// @TODO: Revisit this, Changed imemAny to imem +multiclass LDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> { + def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), + !strconcat("ldu.global.", TyStr), + [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>; + def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), + !strconcat("ldu.global.", TyStr), + [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>; + def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), + !strconcat("ldu.global.", TyStr), + [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, + Requires<[hasLDU]>; + def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), + !strconcat("ldu.global.", TyStr), + [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>; + def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), + !strconcat("ldu.global.", TyStr), + [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>; +} + +defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int8Regs, +int_nvvm_ldu_global_i>; +defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs, +int_nvvm_ldu_global_i>; +defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs, +int_nvvm_ldu_global_i>; +defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs, +int_nvvm_ldu_global_i>; +defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs, +int_nvvm_ldu_global_f>; +defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs, +int_nvvm_ldu_global_f>; +defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs, +int_nvvm_ldu_global_p>; +defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs, +int_nvvm_ldu_global_p>; + +// vector + +// Elementized vector ldu +multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { + def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins Int32Regs:$src), + !strconcat("ldu.global.", TyStr), []>; + def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins Int64Regs:$src), + !strconcat("ldu.global.", TyStr), []>; +} + +multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { + def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins Int32Regs:$src), + !strconcat("ldu.global.", TyStr), []>; + def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins Int64Regs:$src), + !strconcat("ldu.global.", TyStr), []>; +} + +defm INT_PTX_LDU_G_v2i8_ELE + : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int8Regs>; +defm INT_PTX_LDU_G_v2i16_ELE + : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; +defm INT_PTX_LDU_G_v2i32_ELE + : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; +defm INT_PTX_LDU_G_v2f32_ELE + : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; +defm INT_PTX_LDU_G_v2i64_ELE + : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; +defm INT_PTX_LDU_G_v2f64_ELE + : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; +defm INT_PTX_LDU_G_v4i8_ELE + : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int8Regs>; +defm INT_PTX_LDU_G_v4i16_ELE + : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", + Int16Regs>; +defm INT_PTX_LDU_G_v4i32_ELE + : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", + Int32Regs>; +defm INT_PTX_LDU_G_v4f32_ELE + : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", + Float32Regs>; + +// Vector ldu +multiclass VLDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp, + NVPTXInst eleInst, NVPTXInst eleInst64> { + def _32: NVPTXVecInst<(outs regclass:$result), (ins Int32Regs:$src), + !strconcat("ldu.global.", TyStr), + [(set regclass:$result, (IntOp Int32Regs:$src))], eleInst>, + Requires<[hasLDU]>; + def _64: NVPTXVecInst<(outs regclass:$result), (ins Int64Regs:$src), + !strconcat("ldu.global.", TyStr), + [(set regclass:$result, (IntOp Int64Regs:$src))], eleInst64>, + Requires<[hasLDU]>; +} + +let VecInstType=isVecLD.Value in { +defm INT_PTX_LDU_G_v2i8 : VLDU_G<"v2.u8 \t${result:vecfull}, [$src];", + V2I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i8_ELE_32, + INT_PTX_LDU_G_v2i8_ELE_64>; +defm INT_PTX_LDU_G_v4i8 : VLDU_G<"v4.u8 \t${result:vecfull}, [$src];", + V4I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i8_ELE_32, + INT_PTX_LDU_G_v4i8_ELE_64>; +defm INT_PTX_LDU_G_v2i16 : VLDU_G<"v2.u16 \t${result:vecfull}, [$src];", + V2I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i16_ELE_32, + INT_PTX_LDU_G_v2i16_ELE_64>; +defm INT_PTX_LDU_G_v4i16 : VLDU_G<"v4.u16 \t${result:vecfull}, [$src];", + V4I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i16_ELE_32, + INT_PTX_LDU_G_v4i16_ELE_64>; +defm INT_PTX_LDU_G_v2i32 : VLDU_G<"v2.u32 \t${result:vecfull}, [$src];", + V2I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i32_ELE_32, + INT_PTX_LDU_G_v2i32_ELE_64>; +defm INT_PTX_LDU_G_v4i32 : VLDU_G<"v4.u32 \t${result:vecfull}, [$src];", + V4I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i32_ELE_32, + INT_PTX_LDU_G_v4i32_ELE_64>; +defm INT_PTX_LDU_G_v2f32 : VLDU_G<"v2.f32 \t${result:vecfull}, [$src];", + V2F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f32_ELE_32, + INT_PTX_LDU_G_v2f32_ELE_64>; +defm INT_PTX_LDU_G_v4f32 : VLDU_G<"v4.f32 \t${result:vecfull}, [$src];", + V4F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v4f32_ELE_32, + INT_PTX_LDU_G_v4f32_ELE_64>; +defm INT_PTX_LDU_G_v2i64 : VLDU_G<"v2.u64 \t${result:vecfull}, [$src];", + V2I64Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i64_ELE_32, + INT_PTX_LDU_G_v2i64_ELE_64>; +defm INT_PTX_LDU_G_v2f64 : VLDU_G<"v2.f64 \t${result:vecfull}, [$src];", + V2F64Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f64_ELE_32, + INT_PTX_LDU_G_v2f64_ELE_64>; +} + + + +multiclass NG_TO_G<string Str, Intrinsic Intrin> { + def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), + !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")), + [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>, + Requires<[hasGenericLdSt]>; + def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), + !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")), + [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>, + Requires<[hasGenericLdSt]>; + +// @TODO: Are these actually needed? I believe global addresses will be copied +// to register values anyway. + /*def __addr_yes : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src), + !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")), + [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>, + Requires<[hasGenericLdSt]>; + def __addr_yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src), + !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")), + [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>, + Requires<[hasGenericLdSt]>;*/ + + def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), + "mov.u32 \t$result, $src;", + [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; + def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), + "mov.u64 \t$result, $src;", + [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; + +// @TODO: Are these actually needed? I believe global addresses will be copied +// to register values anyway. + /*def _addr_no : NVPTXInst<(outs Int32Regs:$result), (ins imem:$src), + "mov.u32 \t$result, $src;", + [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>; + def _addr_no_64 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), + "mov.u64 \t$result, $src;", + [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;*/ +} + +multiclass G_TO_NG<string Str, Intrinsic Intrin> { + def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), + !strconcat("cvta.to.", !strconcat(Str, ".u32 \t$result, $src;")), + [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>, + Requires<[hasGenericLdSt]>; + def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), + !strconcat("cvta.to.", !strconcat(Str, ".u64 \t$result, $src;")), + [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>, + Requires<[hasGenericLdSt]>; + def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), + "mov.u32 \t$result, $src;", + [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; + def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), + "mov.u64 \t$result, $src;", + [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; +} + +defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>; +defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>; +defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>; + +defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>; +defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>; +defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>; + +def cvta_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), + "mov.u32 \t$result, $src;", + [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen Int32Regs:$src))]>; +def cvta_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), + "mov.u64 \t$result, $src;", + [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen Int64Regs:$src))]>; + + + +// @TODO: Revisit this. There is a type +// contradiction between iPTRAny and iPTR for the def. +/*def cvta_const_addr : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src), + "mov.u32 \t$result, $src;", + [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen + (Wrapper tglobaladdr:$src)))]>; +def cvta_const_addr_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src), + "mov.u64 \t$result, $src;", + [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen + (Wrapper tglobaladdr:$src)))]>;*/ + + +def cvta_to_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), + "mov.u32 \t$result, $src;", + [(set Int32Regs:$result, (int_nvvm_ptr_gen_to_constant Int32Regs:$src))]>; +def cvta_to_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), + "mov.u64 \t$result, $src;", + [(set Int64Regs:$result, (int_nvvm_ptr_gen_to_constant Int64Regs:$src))]>; + + +// nvvm.ptr.gen.to.param +def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), + (ins Int32Regs:$src), + "mov.u32 \t$result, $src;", + [(set Int32Regs:$result, + (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; +def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), + (ins Int64Regs:$src), + "mov.u64 \t$result, $src;", + [(set Int64Regs:$result, + (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; + + +// nvvm.move intrinsicc +def nvvm_move_i8 : NVPTXInst<(outs Int8Regs:$r), (ins Int8Regs:$s), + "mov.b16 \t$r, $s;", + [(set Int8Regs:$r, + (int_nvvm_move_i8 Int8Regs:$s))]>; +def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), + "mov.b16 \t$r, $s;", + [(set Int16Regs:$r, + (int_nvvm_move_i16 Int16Regs:$s))]>; +def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), + "mov.b32 \t$r, $s;", + [(set Int32Regs:$r, + (int_nvvm_move_i32 Int32Regs:$s))]>; +def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), + "mov.b64 \t$r, $s;", + [(set Int64Regs:$r, + (int_nvvm_move_i64 Int64Regs:$s))]>; +def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s), + "mov.f32 \t$r, $s;", + [(set Float32Regs:$r, + (int_nvvm_move_float Float32Regs:$s))]>; +def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s), + "mov.f64 \t$r, $s;", + [(set Float64Regs:$r, + (int_nvvm_move_double Float64Regs:$s))]>; +def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), + "mov.u32 \t$r, $s;", + [(set Int32Regs:$r, + (int_nvvm_move_ptr Int32Regs:$s))]>; +def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), + "mov.u64 \t$r, $s;", + [(set Int64Regs:$r, + (int_nvvm_move_ptr Int64Regs:$s))]>; + +// @TODO: Are these actually needed, or will we always just see symbols +// copied to registers first? +/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s), + "mov.u32 \t$r, $s;", + [(set Int32Regs:$r, + (int_nvvm_move_ptr texternalsym:$s))]>; +def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), + "mov.u64 \t$r, $s;", + [(set Int64Regs:$r, + (int_nvvm_move_ptr texternalsym:$s))]>;*/ + + +// MoveParam %r1, param +// ptr_local_to_gen %r2, %r1 +// ptr_gen_to_local %r3, %r2 +// -> +// mov %r1, param + +// @TODO: Revisit this. There is a type +// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym +// instructions are not currently defined. However, we can use the ptr +// variants and the asm printer will do the right thing. +def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen + (MoveParam texternalsym:$src)))), + (nvvm_move_ptr64 texternalsym:$src)>; +def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen + (MoveParam texternalsym:$src)))), + (nvvm_move_ptr32 texternalsym:$src)>; + + +//----------------------------------- +// Compiler Error Warn +// - Just ignore them in codegen +//----------------------------------- + +def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a), + "// llvm.nvvm.compiler.warn()", + [(int_nvvm_compiler_warn Int32Regs:$a)]>; +def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a), + "// llvm.nvvm.compiler.warn()", + [(int_nvvm_compiler_warn Int64Regs:$a)]>; +def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a), + "// llvm.nvvm.compiler.error()", + [(int_nvvm_compiler_error Int32Regs:$a)]>; +def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), + "// llvm.nvvm.compiler.error()", + [(int_nvvm_compiler_error Int64Regs:$a)]>; + + + +//===-- Old PTX Back-end Intrinsics ---------------------------------------===// + +// These intrinsics are handled to retain compatibility with the old backend. + +// PTX Special Purpose Register Accessor Intrinsics + +class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop> + : NVPTXInst<(outs Int64Regs:$d), (ins), + !strconcat(!strconcat("mov.u64\t$d, %", regname), ";"), + [(set Int64Regs:$d, (intop))]>; + +class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop> + : NVPTXInst<(outs Int32Regs:$d), (ins), + !strconcat(!strconcat("mov.u32\t$d, %", regname), ";"), + [(set Int32Regs:$d, (intop))]>; + +// TODO Add read vector-version of special registers + +def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", + int_ptx_read_tid_x>; +def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", + int_ptx_read_tid_y>; +def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", + int_ptx_read_tid_z>; +def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", + int_ptx_read_tid_w>; + +def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", + int_ptx_read_ntid_x>; +def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", + int_ptx_read_ntid_y>; +def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", + int_ptx_read_ntid_z>; +def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", + int_ptx_read_ntid_w>; + +def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", + int_ptx_read_laneid>; +def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", + int_ptx_read_warpid>; +def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", + int_ptx_read_nwarpid>; + +def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", + int_ptx_read_ctaid_x>; +def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", + int_ptx_read_ctaid_y>; +def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", + int_ptx_read_ctaid_z>; +def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", + int_ptx_read_ctaid_w>; + +def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", + int_ptx_read_nctaid_x>; +def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", + int_ptx_read_nctaid_y>; +def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", + int_ptx_read_nctaid_z>; +def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", + int_ptx_read_nctaid_w>; + +def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", + int_ptx_read_smid>; +def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", + int_ptx_read_nsmid>; +def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", + int_ptx_read_gridid>; + +def PTX_READ_LANEMASK_EQ + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>; +def PTX_READ_LANEMASK_LE + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>; +def PTX_READ_LANEMASK_LT + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>; +def PTX_READ_LANEMASK_GE + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>; +def PTX_READ_LANEMASK_GT + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>; + +def PTX_READ_CLOCK + : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>; +def PTX_READ_CLOCK64 + : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>; + +def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>; +def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>; +def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>; +def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>; + +// PTX Parallel Synchronization and Communication Intrinsics + +def PTX_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;", + [(int_ptx_bar_sync imm:$i)]>; diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp new file mode 100644 index 0000000..84c7232 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -0,0 +1,208 @@ +//===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when +// the size is large or is not a compile-time constant. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Function.h" +#include "llvm/Constants.h" +#include "llvm/Module.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/IRBuilder.h" +#include "NVPTXLowerAggrCopies.h" +#include "llvm/Target/TargetData.h" +#include "llvm/LLVMContext.h" + +using namespace llvm; + +namespace llvm { +FunctionPass *createLowerAggrCopies(); +} + +char NVPTXLowerAggrCopies::ID = 0; + +// Lower MemTransferInst or load-store pair to loop +static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr, + Value *dstAddr, Value *len, + //unsigned numLoads, + bool srcVolatile, bool dstVolatile, + LLVMContext &Context, Function &F) { + Type *indType = len->getType(); + + BasicBlock *origBB = splitAt->getParent(); + BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split"); + BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB); + + origBB->getTerminator()->setSuccessor(0, loopBB); + IRBuilder<> builder(origBB, origBB->getTerminator()); + + // srcAddr and dstAddr are expected to be pointer types, + // so no check is made here. + unsigned srcAS = + dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace(); + unsigned dstAS = + dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); + + // Cast pointers to (char *) + srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS)); + dstAddr = builder.CreateBitCast(dstAddr, Type::getInt8PtrTy(Context, dstAS)); + + IRBuilder<> loop(loopBB); + // The loop index (ind) is a phi node. + PHINode *ind = loop.CreatePHI(indType, 0); + // Incoming value for ind is 0 + ind->addIncoming(ConstantInt::get(indType, 0), origBB); + + // load from srcAddr+ind + Value *val = loop.CreateLoad(loop.CreateGEP(srcAddr, ind), srcVolatile); + // store at dstAddr+ind + loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), dstVolatile); + + // The value for ind coming from backedge is (ind + 1) + Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1)); + ind->addIncoming(newind, loopBB); + + loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB); +} + +// Lower MemSetInst to loop +static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr, + Value *len, Value *val, LLVMContext &Context, + Function &F) { + BasicBlock *origBB = splitAt->getParent(); + BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split"); + BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB); + + origBB->getTerminator()->setSuccessor(0, loopBB); + IRBuilder<> builder(origBB, origBB->getTerminator()); + + unsigned dstAS = + dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); + + // Cast pointer to the type of value getting stored + dstAddr = builder.CreateBitCast(dstAddr, + PointerType::get(val->getType(), dstAS)); + + IRBuilder<> loop(loopBB); + PHINode *ind = loop.CreatePHI(len->getType(), 0); + ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB); + + loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), false); + + Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1)); + ind->addIncoming(newind, loopBB); + + loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB); +} + +bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { + SmallVector<LoadInst *, 4> aggrLoads; + SmallVector<MemTransferInst *, 4> aggrMemcpys; + SmallVector<MemSetInst *, 4> aggrMemsets; + + TargetData *TD = &getAnalysis<TargetData>(); + LLVMContext &Context = F.getParent()->getContext(); + + // + // Collect all the aggrLoads, aggrMemcpys and addrMemsets. + // + //const BasicBlock *firstBB = &F.front(); // first BB in F + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + //BasicBlock *bb = BI; + for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; + ++II) { + if (LoadInst * load = dyn_cast<LoadInst>(II)) { + + if (load->hasOneUse() == false) continue; + + if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue; + + User *use = *(load->use_begin()); + if (StoreInst * store = dyn_cast<StoreInst>(use)) { + if (store->getOperand(0) != load) //getValueOperand + continue; + aggrLoads.push_back(load); + } + } else if (MemTransferInst * intr = dyn_cast<MemTransferInst>(II)) { + Value *len = intr->getLength(); + // If the number of elements being copied is greater + // than MaxAggrCopySize, lower it to a loop + if (ConstantInt * len_int = dyn_cast < ConstantInt > (len)) { + if (len_int->getZExtValue() >= MaxAggrCopySize) { + aggrMemcpys.push_back(intr); + } + } else { + // turn variable length memcpy/memmov into loop + aggrMemcpys.push_back(intr); + } + } else if (MemSetInst * memsetintr = dyn_cast<MemSetInst>(II)) { + Value *len = memsetintr->getLength(); + if (ConstantInt * len_int = dyn_cast<ConstantInt>(len)) { + if (len_int->getZExtValue() >= MaxAggrCopySize) { + aggrMemsets.push_back(memsetintr); + } + } else { + // turn variable length memset into loop + aggrMemsets.push_back(memsetintr); + } + } + } + } + if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) + && (aggrMemsets.size() == 0)) return false; + + // + // Do the transformation of an aggr load/copy/set to a loop + // + for (unsigned i = 0, e = aggrLoads.size(); i != e; ++i) { + LoadInst *load = aggrLoads[i]; + StoreInst *store = dyn_cast<StoreInst>(*load->use_begin()); + Value *srcAddr = load->getOperand(0); + Value *dstAddr = store->getOperand(1); + unsigned numLoads = TD->getTypeStoreSize(load->getType()); + Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads); + + convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(), + store->isVolatile(), Context, F); + + store->eraseFromParent(); + load->eraseFromParent(); + } + + for (unsigned i = 0, e = aggrMemcpys.size(); i != e; ++i) { + MemTransferInst *cpy = aggrMemcpys[i]; + Value *len = cpy->getLength(); + // llvm 2.7 version of memcpy does not have volatile + // operand yet. So always making it non-volatile + // optimistically, so that we don't see unnecessary + // st.volatile in ptx + convertTransferToLoop(cpy, cpy->getSource(), cpy->getDest(), len, false, + false, Context, F); + cpy->eraseFromParent(); + } + + for (unsigned i = 0, e = aggrMemsets.size(); i != e; ++i) { + MemSetInst *memsetinst = aggrMemsets[i]; + Value *len = memsetinst->getLength(); + Value *val = memsetinst->getValue(); + convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context, + F); + memsetinst->eraseFromParent(); + } + + return true; +} + +FunctionPass *llvm::createLowerAggrCopies() { + return new NVPTXLowerAggrCopies(); +} diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h new file mode 100644 index 0000000..ac7f150 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h @@ -0,0 +1,47 @@ +//===-- llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the NVIDIA specific lowering of +// aggregate copies +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTX_LOWER_AGGR_COPIES_H +#define NVPTX_LOWER_AGGR_COPIES_H + +#include "llvm/Pass.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/Target/TargetData.h" + +namespace llvm { + +// actual analysis class, which is a functionpass +struct NVPTXLowerAggrCopies : public FunctionPass { + static char ID; + + NVPTXLowerAggrCopies() : FunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetData>(); + AU.addPreserved<MachineFunctionAnalysis>(); + } + + virtual bool runOnFunction(Function &F); + + static const unsigned MaxAggrCopySize = 128; + + virtual const char *getPassName() const { + return "Lower aggregate copies/intrinsics into loops"; + } +}; + +extern FunctionPass *createLowerAggrCopies(); +} + +#endif diff --git a/lib/Target/NVPTX/NVPTXNumRegisters.h b/lib/Target/NVPTX/NVPTXNumRegisters.h new file mode 100644 index 0000000..b4a4dbc --- /dev/null +++ b/lib/Target/NVPTX/NVPTXNumRegisters.h @@ -0,0 +1,20 @@ + +//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTX_NUM_REGISTERS_H +#define NVPTX_NUM_REGISTERS_H + +namespace llvm { + +const unsigned NVPTXNumRegisters = 396; + +} + +#endif diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp new file mode 100644 index 0000000..2728309 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -0,0 +1,332 @@ +//===- NVPTXRegisterInfo.cpp - NVPTX Register Information -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the NVPTX implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "nvptx-reg-info" + +#include "NVPTX.h" +#include "NVPTXRegisterInfo.h" +#include "NVPTXSubtarget.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Target/TargetInstrInfo.h" + + +using namespace llvm; + +namespace llvm +{ +std::string getNVPTXRegClassName (TargetRegisterClass const *RC) { + if (RC == &NVPTX::Float32RegsRegClass) { + return ".f32"; + } + if (RC == &NVPTX::Float64RegsRegClass) { + return ".f64"; + } + else if (RC == &NVPTX::Int64RegsRegClass) { + return ".s64"; + } + else if (RC == &NVPTX::Int32RegsRegClass) { + return ".s32"; + } + else if (RC == &NVPTX::Int16RegsRegClass) { + return ".s16"; + } + // Int8Regs become 16-bit registers in PTX + else if (RC == &NVPTX::Int8RegsRegClass) { + return ".s16"; + } + else if (RC == &NVPTX::Int1RegsRegClass) { + return ".pred"; + } + else if (RC == &NVPTX::SpecialRegsRegClass) { + return "!Special!"; + } + else if (RC == &NVPTX::V2F32RegsRegClass) { + return ".v2.f32"; + } + else if (RC == &NVPTX::V4F32RegsRegClass) { + return ".v4.f32"; + } + else if (RC == &NVPTX::V2I32RegsRegClass) { + return ".v2.s32"; + } + else if (RC == &NVPTX::V4I32RegsRegClass) { + return ".v4.s32"; + } + else if (RC == &NVPTX::V2F64RegsRegClass) { + return ".v2.f64"; + } + else if (RC == &NVPTX::V2I64RegsRegClass) { + return ".v2.s64"; + } + else if (RC == &NVPTX::V2I16RegsRegClass) { + return ".v2.s16"; + } + else if (RC == &NVPTX::V4I16RegsRegClass) { + return ".v4.s16"; + } + else if (RC == &NVPTX::V2I8RegsRegClass) { + return ".v2.s16"; + } + else if (RC == &NVPTX::V4I8RegsRegClass) { + return ".v4.s16"; + } + else { + return "INTERNAL"; + } + return ""; +} + +std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) { + if (RC == &NVPTX::Float32RegsRegClass) { + return "%f"; + } + if (RC == &NVPTX::Float64RegsRegClass) { + return "%fd"; + } + else if (RC == &NVPTX::Int64RegsRegClass) { + return "%rd"; + } + else if (RC == &NVPTX::Int32RegsRegClass) { + return "%r"; + } + else if (RC == &NVPTX::Int16RegsRegClass) { + return "%rs"; + } + else if (RC == &NVPTX::Int8RegsRegClass) { + return "%rc"; + } + else if (RC == &NVPTX::Int1RegsRegClass) { + return "%p"; + } + else if (RC == &NVPTX::SpecialRegsRegClass) { + return "!Special!"; + } + else if (RC == &NVPTX::V2F32RegsRegClass) { + return "%v2f"; + } + else if (RC == &NVPTX::V4F32RegsRegClass) { + return "%v4f"; + } + else if (RC == &NVPTX::V2I32RegsRegClass) { + return "%v2r"; + } + else if (RC == &NVPTX::V4I32RegsRegClass) { + return "%v4r"; + } + else if (RC == &NVPTX::V2F64RegsRegClass) { + return "%v2fd"; + } + else if (RC == &NVPTX::V2I64RegsRegClass) { + return "%v2rd"; + } + else if (RC == &NVPTX::V2I16RegsRegClass) { + return "%v2s"; + } + else if (RC == &NVPTX::V4I16RegsRegClass) { + return "%v4rs"; + } + else if (RC == &NVPTX::V2I8RegsRegClass) { + return "%v2rc"; + } + else if (RC == &NVPTX::V4I8RegsRegClass) { + return "%v4rc"; + } + else { + return "INTERNAL"; + } + return ""; +} + +bool isNVPTXVectorRegClass(TargetRegisterClass const *RC) { + if (RC->getID() == NVPTX::V2F32RegsRegClassID) + return true; + if (RC->getID() == NVPTX::V2F64RegsRegClassID) + return true; + if (RC->getID() == NVPTX::V2I16RegsRegClassID) + return true; + if (RC->getID() == NVPTX::V2I32RegsRegClassID) + return true; + if (RC->getID() == NVPTX::V2I64RegsRegClassID) + return true; + if (RC->getID() == NVPTX::V2I8RegsRegClassID) + return true; + if (RC->getID() == NVPTX::V4F32RegsRegClassID) + return true; + if (RC->getID() == NVPTX::V4I16RegsRegClassID) + return true; + if (RC->getID() == NVPTX::V4I32RegsRegClassID) + return true; + if (RC->getID() == NVPTX::V4I8RegsRegClassID) + return true; + return false; +} + +std::string getNVPTXElemClassName(TargetRegisterClass const *RC) { + if (RC->getID() == NVPTX::V2F32RegsRegClassID) + return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass); + if (RC->getID() == NVPTX::V2F64RegsRegClassID) + return getNVPTXRegClassName(&NVPTX::Float64RegsRegClass); + if (RC->getID() == NVPTX::V2I16RegsRegClassID) + return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass); + if (RC->getID() == NVPTX::V2I32RegsRegClassID) + return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass); + if (RC->getID() == NVPTX::V2I64RegsRegClassID) + return getNVPTXRegClassName(&NVPTX::Int64RegsRegClass); + if (RC->getID() == NVPTX::V2I8RegsRegClassID) + return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass); + if (RC->getID() == NVPTX::V4F32RegsRegClassID) + return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass); + if (RC->getID() == NVPTX::V4I16RegsRegClassID) + return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass); + if (RC->getID() == NVPTX::V4I32RegsRegClassID) + return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass); + if (RC->getID() == NVPTX::V4I8RegsRegClassID) + return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass); + assert(0 && "Not a vector register class"); + return "Unsupported"; +} + +const TargetRegisterClass *getNVPTXElemClass(TargetRegisterClass const *RC) { + if (RC->getID() == NVPTX::V2F32RegsRegClassID) + return (&NVPTX::Float32RegsRegClass); + if (RC->getID() == NVPTX::V2F64RegsRegClassID) + return (&NVPTX::Float64RegsRegClass); + if (RC->getID() == NVPTX::V2I16RegsRegClassID) + return (&NVPTX::Int16RegsRegClass); + if (RC->getID() == NVPTX::V2I32RegsRegClassID) + return (&NVPTX::Int32RegsRegClass); + if (RC->getID() == NVPTX::V2I64RegsRegClassID) + return (&NVPTX::Int64RegsRegClass); + if (RC->getID() == NVPTX::V2I8RegsRegClassID) + return (&NVPTX::Int8RegsRegClass); + if (RC->getID() == NVPTX::V4F32RegsRegClassID) + return (&NVPTX::Float32RegsRegClass); + if (RC->getID() == NVPTX::V4I16RegsRegClassID) + return (&NVPTX::Int16RegsRegClass); + if (RC->getID() == NVPTX::V4I32RegsRegClassID) + return (&NVPTX::Int32RegsRegClass); + if (RC->getID() == NVPTX::V4I8RegsRegClassID) + return (&NVPTX::Int8RegsRegClass); + assert(0 && "Not a vector register class"); + return 0; +} + +int getNVPTXVectorSize(TargetRegisterClass const *RC) { + if (RC->getID() == NVPTX::V2F32RegsRegClassID) + return 2; + if (RC->getID() == NVPTX::V2F64RegsRegClassID) + return 2; + if (RC->getID() == NVPTX::V2I16RegsRegClassID) + return 2; + if (RC->getID() == NVPTX::V2I32RegsRegClassID) + return 2; + if (RC->getID() == NVPTX::V2I64RegsRegClassID) + return 2; + if (RC->getID() == NVPTX::V2I8RegsRegClassID) + return 2; + if (RC->getID() == NVPTX::V4F32RegsRegClassID) + return 4; + if (RC->getID() == NVPTX::V4I16RegsRegClassID) + return 4; + if (RC->getID() == NVPTX::V4I32RegsRegClassID) + return 4; + if (RC->getID() == NVPTX::V4I8RegsRegClassID) + return 4; + assert(0 && "Not a vector register class"); + return -1; +} +} + +NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii, + const NVPTXSubtarget &st) +: NVPTXGenRegisterInfo(0), + TII(tii), + ST(st) { + Is64Bit = st.is64Bit(); +} + + +#define GET_REGINFO_TARGET_DESC +#include "NVPTXGenRegisterInfo.inc" + +/// NVPTX Callee Saved Registers +const uint16_t* NVPTXRegisterInfo:: +getCalleeSavedRegs(const MachineFunction *MF) const { + static const uint16_t CalleeSavedRegs[] = { 0 }; + return CalleeSavedRegs; +} + +// NVPTX Callee Saved Reg Classes +const TargetRegisterClass* const* +NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { + static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 }; + return CalleeSavedRegClasses; +} + +BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + return Reserved; +} + +void NVPTXRegisterInfo:: +eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, + RegScavenger *RS) const { + assert(SPAdj == 0 && "Unexpected"); + + unsigned i = 0; + MachineInstr &MI = *II; + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + } + + int FrameIndex = MI.getOperand(i).getIndex(); + + MachineFunction &MF = *MI.getParent()->getParent(); + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + + MI.getOperand(i+1).getImm(); + + // Using I0 as the frame pointer + MI.getOperand(i).ChangeToRegister(NVPTX::VRFrame, false); + MI.getOperand(i+1).ChangeToImmediate(Offset); +} + + +int NVPTXRegisterInfo:: +getDwarfRegNum(unsigned RegNum, bool isEH) const { + return 0; +} + +unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + return NVPTX::VRFrame; +} + +unsigned NVPTXRegisterInfo::getRARegister() const { + return 0; +} + +// This function eliminates ADJCALLSTACKDOWN, +// ADJCALLSTACKUP pseudo instructions +void NVPTXRegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + // Simply discard ADJCALLSTACKDOWN, + // ADJCALLSTACKUP instructions. + MBB.erase(I); +} diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h new file mode 100644 index 0000000..e8b587c --- /dev/null +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h @@ -0,0 +1,94 @@ +//===- NVPTXRegisterInfo.h - NVPTX Register Information Impl ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the NVPTX implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTXREGISTERINFO_H +#define NVPTXREGISTERINFO_H + +#include "ManagedStringPool.h" +#include "llvm/Target/TargetRegisterInfo.h" + + +#define GET_REGINFO_HEADER +#include "NVPTXGenRegisterInfo.inc" +#include "llvm/Target/TargetRegisterInfo.h" +#include <sstream> + +namespace llvm { + +// Forward Declarations. +class TargetInstrInfo; +class NVPTXSubtarget; + +class NVPTXRegisterInfo : public NVPTXGenRegisterInfo { +private: + const TargetInstrInfo &TII; + const NVPTXSubtarget &ST; + bool Is64Bit; + // Hold Strings that can be free'd all together with NVPTXRegisterInfo + ManagedStringPool ManagedStrPool; + +public: + NVPTXRegisterInfo(const TargetInstrInfo &tii, + const NVPTXSubtarget &st); + + + //------------------------------------------------------ + // Pure virtual functions from TargetRegisterInfo + //------------------------------------------------------ + + // NVPTX callee saved registers + virtual const uint16_t* + getCalleeSavedRegs(const MachineFunction *MF = 0) const; + + // NVPTX callee saved register classes + virtual const TargetRegisterClass* const * + getCalleeSavedRegClasses(const MachineFunction *MF) const; + + virtual BitVector getReservedRegs(const MachineFunction &MF) const; + + virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, + RegScavenger *RS=NULL) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const; + virtual unsigned getFrameRegister(const MachineFunction &MF) const; + virtual unsigned getRARegister() const; + + ManagedStringPool *getStrPool() const { + return const_cast<ManagedStringPool *>(&ManagedStrPool); + } + + const char *getName(unsigned RegNo) const { + std::stringstream O; + O << "reg" << RegNo; + return getStrPool()->getManagedString(O.str().c_str())->c_str(); + } + +}; + + +std::string getNVPTXRegClassName (const TargetRegisterClass *RC); +std::string getNVPTXRegClassStr (const TargetRegisterClass *RC); +bool isNVPTXVectorRegClass (const TargetRegisterClass *RC); +std::string getNVPTXElemClassName (const TargetRegisterClass *RC); +int getNVPTXVectorSize (const TargetRegisterClass *RC); +const TargetRegisterClass *getNVPTXElemClass(const TargetRegisterClass *RC); + +} // end namespace llvm + + +#endif diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td new file mode 100644 index 0000000..6859ce4 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td @@ -0,0 +1,7235 @@ +//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the PTX register file +//===----------------------------------------------------------------------===// + +class NVPTXReg<string n> : Register<n> { + let Namespace = "NVPTX"; +} + +class NVPTXRegClass<list<ValueType> regTypes, int alignment, dag regList> + : RegisterClass <"NVPTX", regTypes, alignment, regList>; + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// + +// Special Registers used as stack pointer +def VRFrame : NVPTXReg<"%SP">; +def VRFrameLocal : NVPTXReg<"%SPL">; + +// Special Registers used as the stack +def VRDepot : NVPTXReg<"%Depot">; + +//===--- Predicate --------------------------------------------------------===// +def P0 : NVPTXReg<"%p0">; +def P1 : NVPTXReg<"%p1">; +def P2 : NVPTXReg<"%p2">; +def P3 : NVPTXReg<"%p3">; +def P4 : NVPTXReg<"%p4">; +def P5 : NVPTXReg<"%p5">; +def P6 : NVPTXReg<"%p6">; +def P7 : NVPTXReg<"%p7">; +def P8 : NVPTXReg<"%p8">; +def P9 : NVPTXReg<"%p9">; +def P10 : NVPTXReg<"%p10">; +def P11 : NVPTXReg<"%p11">; +def P12 : NVPTXReg<"%p12">; +def P13 : NVPTXReg<"%p13">; +def P14 : NVPTXReg<"%p14">; +def P15 : NVPTXReg<"%p15">; +def P16 : NVPTXReg<"%p16">; +def P17 : NVPTXReg<"%p17">; +def P18 : NVPTXReg<"%p18">; +def P19 : NVPTXReg<"%p19">; +def P20 : NVPTXReg<"%p20">; +def P21 : NVPTXReg<"%p21">; +def P22 : NVPTXReg<"%p22">; +def P23 : NVPTXReg<"%p23">; +def P24 : NVPTXReg<"%p24">; +def P25 : NVPTXReg<"%p25">; +def P26 : NVPTXReg<"%p26">; +def P27 : NVPTXReg<"%p27">; +def P28 : NVPTXReg<"%p28">; +def P29 : NVPTXReg<"%p29">; +def P30 : NVPTXReg<"%p30">; +def P31 : NVPTXReg<"%p31">; +def P32 : NVPTXReg<"%p32">; +def P33 : NVPTXReg<"%p33">; +def P34 : NVPTXReg<"%p34">; +def P35 : NVPTXReg<"%p35">; +def P36 : NVPTXReg<"%p36">; +def P37 : NVPTXReg<"%p37">; +def P38 : NVPTXReg<"%p38">; +def P39 : NVPTXReg<"%p39">; +def P40 : NVPTXReg<"%p40">; +def P41 : NVPTXReg<"%p41">; +def P42 : NVPTXReg<"%p42">; +def P43 : NVPTXReg<"%p43">; +def P44 : NVPTXReg<"%p44">; +def P45 : NVPTXReg<"%p45">; +def P46 : NVPTXReg<"%p46">; +def P47 : NVPTXReg<"%p47">; +def P48 : NVPTXReg<"%p48">; +def P49 : NVPTXReg<"%p49">; +def P50 : NVPTXReg<"%p50">; +def P51 : NVPTXReg<"%p51">; +def P52 : NVPTXReg<"%p52">; +def P53 : NVPTXReg<"%p53">; +def P54 : NVPTXReg<"%p54">; +def P55 : NVPTXReg<"%p55">; +def P56 : NVPTXReg<"%p56">; +def P57 : NVPTXReg<"%p57">; +def P58 : NVPTXReg<"%p58">; +def P59 : NVPTXReg<"%p59">; +def P60 : NVPTXReg<"%p60">; +def P61 : NVPTXReg<"%p61">; +def P62 : NVPTXReg<"%p62">; +def P63 : NVPTXReg<"%p63">; +def P64 : NVPTXReg<"%p64">; +def P65 : NVPTXReg<"%p65">; +def P66 : NVPTXReg<"%p66">; +def P67 : NVPTXReg<"%p67">; +def P68 : NVPTXReg<"%p68">; +def P69 : NVPTXReg<"%p69">; +def P70 : NVPTXReg<"%p70">; +def P71 : NVPTXReg<"%p71">; +def P72 : NVPTXReg<"%p72">; +def P73 : NVPTXReg<"%p73">; +def P74 : NVPTXReg<"%p74">; +def P75 : NVPTXReg<"%p75">; +def P76 : NVPTXReg<"%p76">; +def P77 : NVPTXReg<"%p77">; +def P78 : NVPTXReg<"%p78">; +def P79 : NVPTXReg<"%p79">; +def P80 : NVPTXReg<"%p80">; +def P81 : NVPTXReg<"%p81">; +def P82 : NVPTXReg<"%p82">; +def P83 : NVPTXReg<"%p83">; +def P84 : NVPTXReg<"%p84">; +def P85 : NVPTXReg<"%p85">; +def P86 : NVPTXReg<"%p86">; +def P87 : NVPTXReg<"%p87">; +def P88 : NVPTXReg<"%p88">; +def P89 : NVPTXReg<"%p89">; +def P90 : NVPTXReg<"%p90">; +def P91 : NVPTXReg<"%p91">; +def P92 : NVPTXReg<"%p92">; +def P93 : NVPTXReg<"%p93">; +def P94 : NVPTXReg<"%p94">; +def P95 : NVPTXReg<"%p95">; +def P96 : NVPTXReg<"%p96">; +def P97 : NVPTXReg<"%p97">; +def P98 : NVPTXReg<"%p98">; +def P99 : NVPTXReg<"%p99">; +def P100 : NVPTXReg<"%p100">; +def P101 : NVPTXReg<"%p101">; +def P102 : NVPTXReg<"%p102">; +def P103 : NVPTXReg<"%p103">; +def P104 : NVPTXReg<"%p104">; +def P105 : NVPTXReg<"%p105">; +def P106 : NVPTXReg<"%p106">; +def P107 : NVPTXReg<"%p107">; +def P108 : NVPTXReg<"%p108">; +def P109 : NVPTXReg<"%p109">; +def P110 : NVPTXReg<"%p110">; +def P111 : NVPTXReg<"%p111">; +def P112 : NVPTXReg<"%p112">; +def P113 : NVPTXReg<"%p113">; +def P114 : NVPTXReg<"%p114">; +def P115 : NVPTXReg<"%p115">; +def P116 : NVPTXReg<"%p116">; +def P117 : NVPTXReg<"%p117">; +def P118 : NVPTXReg<"%p118">; +def P119 : NVPTXReg<"%p119">; +def P120 : NVPTXReg<"%p120">; +def P121 : NVPTXReg<"%p121">; +def P122 : NVPTXReg<"%p122">; +def P123 : NVPTXReg<"%p123">; +def P124 : NVPTXReg<"%p124">; +def P125 : NVPTXReg<"%p125">; +def P126 : NVPTXReg<"%p126">; +def P127 : NVPTXReg<"%p127">; +def P128 : NVPTXReg<"%p128">; +def P129 : NVPTXReg<"%p129">; +def P130 : NVPTXReg<"%p130">; +def P131 : NVPTXReg<"%p131">; +def P132 : NVPTXReg<"%p132">; +def P133 : NVPTXReg<"%p133">; +def P134 : NVPTXReg<"%p134">; +def P135 : NVPTXReg<"%p135">; +def P136 : NVPTXReg<"%p136">; +def P137 : NVPTXReg<"%p137">; +def P138 : NVPTXReg<"%p138">; +def P139 : NVPTXReg<"%p139">; +def P140 : NVPTXReg<"%p140">; +def P141 : NVPTXReg<"%p141">; +def P142 : NVPTXReg<"%p142">; +def P143 : NVPTXReg<"%p143">; +def P144 : NVPTXReg<"%p144">; +def P145 : NVPTXReg<"%p145">; +def P146 : NVPTXReg<"%p146">; +def P147 : NVPTXReg<"%p147">; +def P148 : NVPTXReg<"%p148">; +def P149 : NVPTXReg<"%p149">; +def P150 : NVPTXReg<"%p150">; +def P151 : NVPTXReg<"%p151">; +def P152 : NVPTXReg<"%p152">; +def P153 : NVPTXReg<"%p153">; +def P154 : NVPTXReg<"%p154">; +def P155 : NVPTXReg<"%p155">; +def P156 : NVPTXReg<"%p156">; +def P157 : NVPTXReg<"%p157">; +def P158 : NVPTXReg<"%p158">; +def P159 : NVPTXReg<"%p159">; +def P160 : NVPTXReg<"%p160">; +def P161 : NVPTXReg<"%p161">; +def P162 : NVPTXReg<"%p162">; +def P163 : NVPTXReg<"%p163">; +def P164 : NVPTXReg<"%p164">; +def P165 : NVPTXReg<"%p165">; +def P166 : NVPTXReg<"%p166">; +def P167 : NVPTXReg<"%p167">; +def P168 : NVPTXReg<"%p168">; +def P169 : NVPTXReg<"%p169">; +def P170 : NVPTXReg<"%p170">; +def P171 : NVPTXReg<"%p171">; +def P172 : NVPTXReg<"%p172">; +def P173 : NVPTXReg<"%p173">; +def P174 : NVPTXReg<"%p174">; +def P175 : NVPTXReg<"%p175">; +def P176 : NVPTXReg<"%p176">; +def P177 : NVPTXReg<"%p177">; +def P178 : NVPTXReg<"%p178">; +def P179 : NVPTXReg<"%p179">; +def P180 : NVPTXReg<"%p180">; +def P181 : NVPTXReg<"%p181">; +def P182 : NVPTXReg<"%p182">; +def P183 : NVPTXReg<"%p183">; +def P184 : NVPTXReg<"%p184">; +def P185 : NVPTXReg<"%p185">; +def P186 : NVPTXReg<"%p186">; +def P187 : NVPTXReg<"%p187">; +def P188 : NVPTXReg<"%p188">; +def P189 : NVPTXReg<"%p189">; +def P190 : NVPTXReg<"%p190">; +def P191 : NVPTXReg<"%p191">; +def P192 : NVPTXReg<"%p192">; +def P193 : NVPTXReg<"%p193">; +def P194 : NVPTXReg<"%p194">; +def P195 : NVPTXReg<"%p195">; +def P196 : NVPTXReg<"%p196">; +def P197 : NVPTXReg<"%p197">; +def P198 : NVPTXReg<"%p198">; +def P199 : NVPTXReg<"%p199">; +def P200 : NVPTXReg<"%p200">; +def P201 : NVPTXReg<"%p201">; +def P202 : NVPTXReg<"%p202">; +def P203 : NVPTXReg<"%p203">; +def P204 : NVPTXReg<"%p204">; +def P205 : NVPTXReg<"%p205">; +def P206 : NVPTXReg<"%p206">; +def P207 : NVPTXReg<"%p207">; +def P208 : NVPTXReg<"%p208">; +def P209 : NVPTXReg<"%p209">; +def P210 : NVPTXReg<"%p210">; +def P211 : NVPTXReg<"%p211">; +def P212 : NVPTXReg<"%p212">; +def P213 : NVPTXReg<"%p213">; +def P214 : NVPTXReg<"%p214">; +def P215 : NVPTXReg<"%p215">; +def P216 : NVPTXReg<"%p216">; +def P217 : NVPTXReg<"%p217">; +def P218 : NVPTXReg<"%p218">; +def P219 : NVPTXReg<"%p219">; +def P220 : NVPTXReg<"%p220">; +def P221 : NVPTXReg<"%p221">; +def P222 : NVPTXReg<"%p222">; +def P223 : NVPTXReg<"%p223">; +def P224 : NVPTXReg<"%p224">; +def P225 : NVPTXReg<"%p225">; +def P226 : NVPTXReg<"%p226">; +def P227 : NVPTXReg<"%p227">; +def P228 : NVPTXReg<"%p228">; +def P229 : NVPTXReg<"%p229">; +def P230 : NVPTXReg<"%p230">; +def P231 : NVPTXReg<"%p231">; +def P232 : NVPTXReg<"%p232">; +def P233 : NVPTXReg<"%p233">; +def P234 : NVPTXReg<"%p234">; +def P235 : NVPTXReg<"%p235">; +def P236 : NVPTXReg<"%p236">; +def P237 : NVPTXReg<"%p237">; +def P238 : NVPTXReg<"%p238">; +def P239 : NVPTXReg<"%p239">; +def P240 : NVPTXReg<"%p240">; +def P241 : NVPTXReg<"%p241">; +def P242 : NVPTXReg<"%p242">; +def P243 : NVPTXReg<"%p243">; +def P244 : NVPTXReg<"%p244">; +def P245 : NVPTXReg<"%p245">; +def P246 : NVPTXReg<"%p246">; +def P247 : NVPTXReg<"%p247">; +def P248 : NVPTXReg<"%p248">; +def P249 : NVPTXReg<"%p249">; +def P250 : NVPTXReg<"%p250">; +def P251 : NVPTXReg<"%p251">; +def P252 : NVPTXReg<"%p252">; +def P253 : NVPTXReg<"%p253">; +def P254 : NVPTXReg<"%p254">; +def P255 : NVPTXReg<"%p255">; +def P256 : NVPTXReg<"%p256">; +def P257 : NVPTXReg<"%p257">; +def P258 : NVPTXReg<"%p258">; +def P259 : NVPTXReg<"%p259">; +def P260 : NVPTXReg<"%p260">; +def P261 : NVPTXReg<"%p261">; +def P262 : NVPTXReg<"%p262">; +def P263 : NVPTXReg<"%p263">; +def P264 : NVPTXReg<"%p264">; +def P265 : NVPTXReg<"%p265">; +def P266 : NVPTXReg<"%p266">; +def P267 : NVPTXReg<"%p267">; +def P268 : NVPTXReg<"%p268">; +def P269 : NVPTXReg<"%p269">; +def P270 : NVPTXReg<"%p270">; +def P271 : NVPTXReg<"%p271">; +def P272 : NVPTXReg<"%p272">; +def P273 : NVPTXReg<"%p273">; +def P274 : NVPTXReg<"%p274">; +def P275 : NVPTXReg<"%p275">; +def P276 : NVPTXReg<"%p276">; +def P277 : NVPTXReg<"%p277">; +def P278 : NVPTXReg<"%p278">; +def P279 : NVPTXReg<"%p279">; +def P280 : NVPTXReg<"%p280">; +def P281 : NVPTXReg<"%p281">; +def P282 : NVPTXReg<"%p282">; +def P283 : NVPTXReg<"%p283">; +def P284 : NVPTXReg<"%p284">; +def P285 : NVPTXReg<"%p285">; +def P286 : NVPTXReg<"%p286">; +def P287 : NVPTXReg<"%p287">; +def P288 : NVPTXReg<"%p288">; +def P289 : NVPTXReg<"%p289">; +def P290 : NVPTXReg<"%p290">; +def P291 : NVPTXReg<"%p291">; +def P292 : NVPTXReg<"%p292">; +def P293 : NVPTXReg<"%p293">; +def P294 : NVPTXReg<"%p294">; +def P295 : NVPTXReg<"%p295">; +def P296 : NVPTXReg<"%p296">; +def P297 : NVPTXReg<"%p297">; +def P298 : NVPTXReg<"%p298">; +def P299 : NVPTXReg<"%p299">; +def P300 : NVPTXReg<"%p300">; +def P301 : NVPTXReg<"%p301">; +def P302 : NVPTXReg<"%p302">; +def P303 : NVPTXReg<"%p303">; +def P304 : NVPTXReg<"%p304">; +def P305 : NVPTXReg<"%p305">; +def P306 : NVPTXReg<"%p306">; +def P307 : NVPTXReg<"%p307">; +def P308 : NVPTXReg<"%p308">; +def P309 : NVPTXReg<"%p309">; +def P310 : NVPTXReg<"%p310">; +def P311 : NVPTXReg<"%p311">; +def P312 : NVPTXReg<"%p312">; +def P313 : NVPTXReg<"%p313">; +def P314 : NVPTXReg<"%p314">; +def P315 : NVPTXReg<"%p315">; +def P316 : NVPTXReg<"%p316">; +def P317 : NVPTXReg<"%p317">; +def P318 : NVPTXReg<"%p318">; +def P319 : NVPTXReg<"%p319">; +def P320 : NVPTXReg<"%p320">; +def P321 : NVPTXReg<"%p321">; +def P322 : NVPTXReg<"%p322">; +def P323 : NVPTXReg<"%p323">; +def P324 : NVPTXReg<"%p324">; +def P325 : NVPTXReg<"%p325">; +def P326 : NVPTXReg<"%p326">; +def P327 : NVPTXReg<"%p327">; +def P328 : NVPTXReg<"%p328">; +def P329 : NVPTXReg<"%p329">; +def P330 : NVPTXReg<"%p330">; +def P331 : NVPTXReg<"%p331">; +def P332 : NVPTXReg<"%p332">; +def P333 : NVPTXReg<"%p333">; +def P334 : NVPTXReg<"%p334">; +def P335 : NVPTXReg<"%p335">; +def P336 : NVPTXReg<"%p336">; +def P337 : NVPTXReg<"%p337">; +def P338 : NVPTXReg<"%p338">; +def P339 : NVPTXReg<"%p339">; +def P340 : NVPTXReg<"%p340">; +def P341 : NVPTXReg<"%p341">; +def P342 : NVPTXReg<"%p342">; +def P343 : NVPTXReg<"%p343">; +def P344 : NVPTXReg<"%p344">; +def P345 : NVPTXReg<"%p345">; +def P346 : NVPTXReg<"%p346">; +def P347 : NVPTXReg<"%p347">; +def P348 : NVPTXReg<"%p348">; +def P349 : NVPTXReg<"%p349">; +def P350 : NVPTXReg<"%p350">; +def P351 : NVPTXReg<"%p351">; +def P352 : NVPTXReg<"%p352">; +def P353 : NVPTXReg<"%p353">; +def P354 : NVPTXReg<"%p354">; +def P355 : NVPTXReg<"%p355">; +def P356 : NVPTXReg<"%p356">; +def P357 : NVPTXReg<"%p357">; +def P358 : NVPTXReg<"%p358">; +def P359 : NVPTXReg<"%p359">; +def P360 : NVPTXReg<"%p360">; +def P361 : NVPTXReg<"%p361">; +def P362 : NVPTXReg<"%p362">; +def P363 : NVPTXReg<"%p363">; +def P364 : NVPTXReg<"%p364">; +def P365 : NVPTXReg<"%p365">; +def P366 : NVPTXReg<"%p366">; +def P367 : NVPTXReg<"%p367">; +def P368 : NVPTXReg<"%p368">; +def P369 : NVPTXReg<"%p369">; +def P370 : NVPTXReg<"%p370">; +def P371 : NVPTXReg<"%p371">; +def P372 : NVPTXReg<"%p372">; +def P373 : NVPTXReg<"%p373">; +def P374 : NVPTXReg<"%p374">; +def P375 : NVPTXReg<"%p375">; +def P376 : NVPTXReg<"%p376">; +def P377 : NVPTXReg<"%p377">; +def P378 : NVPTXReg<"%p378">; +def P379 : NVPTXReg<"%p379">; +def P380 : NVPTXReg<"%p380">; +def P381 : NVPTXReg<"%p381">; +def P382 : NVPTXReg<"%p382">; +def P383 : NVPTXReg<"%p383">; +def P384 : NVPTXReg<"%p384">; +def P385 : NVPTXReg<"%p385">; +def P386 : NVPTXReg<"%p386">; +def P387 : NVPTXReg<"%p387">; +def P388 : NVPTXReg<"%p388">; +def P389 : NVPTXReg<"%p389">; +def P390 : NVPTXReg<"%p390">; +def P391 : NVPTXReg<"%p391">; +def P392 : NVPTXReg<"%p392">; +def P393 : NVPTXReg<"%p393">; +def P394 : NVPTXReg<"%p394">; +def P395 : NVPTXReg<"%p395">; + +//===--- 8-bit ------------------------------------------------------------===// +def RC0 : NVPTXReg<"%rc0">; +def RC1 : NVPTXReg<"%rc1">; +def RC2 : NVPTXReg<"%rc2">; +def RC3 : NVPTXReg<"%rc3">; +def RC4 : NVPTXReg<"%rc4">; +def RC5 : NVPTXReg<"%rc5">; +def RC6 : NVPTXReg<"%rc6">; +def RC7 : NVPTXReg<"%rc7">; +def RC8 : NVPTXReg<"%rc8">; +def RC9 : NVPTXReg<"%rc9">; +def RC10 : NVPTXReg<"%rc10">; +def RC11 : NVPTXReg<"%rc11">; +def RC12 : NVPTXReg<"%rc12">; +def RC13 : NVPTXReg<"%rc13">; +def RC14 : NVPTXReg<"%rc14">; +def RC15 : NVPTXReg<"%rc15">; +def RC16 : NVPTXReg<"%rc16">; +def RC17 : NVPTXReg<"%rc17">; +def RC18 : NVPTXReg<"%rc18">; +def RC19 : NVPTXReg<"%rc19">; +def RC20 : NVPTXReg<"%rc20">; +def RC21 : NVPTXReg<"%rc21">; +def RC22 : NVPTXReg<"%rc22">; +def RC23 : NVPTXReg<"%rc23">; +def RC24 : NVPTXReg<"%rc24">; +def RC25 : NVPTXReg<"%rc25">; +def RC26 : NVPTXReg<"%rc26">; +def RC27 : NVPTXReg<"%rc27">; +def RC28 : NVPTXReg<"%rc28">; +def RC29 : NVPTXReg<"%rc29">; +def RC30 : NVPTXReg<"%rc30">; +def RC31 : NVPTXReg<"%rc31">; +def RC32 : NVPTXReg<"%rc32">; +def RC33 : NVPTXReg<"%rc33">; +def RC34 : NVPTXReg<"%rc34">; +def RC35 : NVPTXReg<"%rc35">; +def RC36 : NVPTXReg<"%rc36">; +def RC37 : NVPTXReg<"%rc37">; +def RC38 : NVPTXReg<"%rc38">; +def RC39 : NVPTXReg<"%rc39">; +def RC40 : NVPTXReg<"%rc40">; +def RC41 : NVPTXReg<"%rc41">; +def RC42 : NVPTXReg<"%rc42">; +def RC43 : NVPTXReg<"%rc43">; +def RC44 : NVPTXReg<"%rc44">; +def RC45 : NVPTXReg<"%rc45">; +def RC46 : NVPTXReg<"%rc46">; +def RC47 : NVPTXReg<"%rc47">; +def RC48 : NVPTXReg<"%rc48">; +def RC49 : NVPTXReg<"%rc49">; +def RC50 : NVPTXReg<"%rc50">; +def RC51 : NVPTXReg<"%rc51">; +def RC52 : NVPTXReg<"%rc52">; +def RC53 : NVPTXReg<"%rc53">; +def RC54 : NVPTXReg<"%rc54">; +def RC55 : NVPTXReg<"%rc55">; +def RC56 : NVPTXReg<"%rc56">; +def RC57 : NVPTXReg<"%rc57">; +def RC58 : NVPTXReg<"%rc58">; +def RC59 : NVPTXReg<"%rc59">; +def RC60 : NVPTXReg<"%rc60">; +def RC61 : NVPTXReg<"%rc61">; +def RC62 : NVPTXReg<"%rc62">; +def RC63 : NVPTXReg<"%rc63">; +def RC64 : NVPTXReg<"%rc64">; +def RC65 : NVPTXReg<"%rc65">; +def RC66 : NVPTXReg<"%rc66">; +def RC67 : NVPTXReg<"%rc67">; +def RC68 : NVPTXReg<"%rc68">; +def RC69 : NVPTXReg<"%rc69">; +def RC70 : NVPTXReg<"%rc70">; +def RC71 : NVPTXReg<"%rc71">; +def RC72 : NVPTXReg<"%rc72">; +def RC73 : NVPTXReg<"%rc73">; +def RC74 : NVPTXReg<"%rc74">; +def RC75 : NVPTXReg<"%rc75">; +def RC76 : NVPTXReg<"%rc76">; +def RC77 : NVPTXReg<"%rc77">; +def RC78 : NVPTXReg<"%rc78">; +def RC79 : NVPTXReg<"%rc79">; +def RC80 : NVPTXReg<"%rc80">; +def RC81 : NVPTXReg<"%rc81">; +def RC82 : NVPTXReg<"%rc82">; +def RC83 : NVPTXReg<"%rc83">; +def RC84 : NVPTXReg<"%rc84">; +def RC85 : NVPTXReg<"%rc85">; +def RC86 : NVPTXReg<"%rc86">; +def RC87 : NVPTXReg<"%rc87">; +def RC88 : NVPTXReg<"%rc88">; +def RC89 : NVPTXReg<"%rc89">; +def RC90 : NVPTXReg<"%rc90">; +def RC91 : NVPTXReg<"%rc91">; +def RC92 : NVPTXReg<"%rc92">; +def RC93 : NVPTXReg<"%rc93">; +def RC94 : NVPTXReg<"%rc94">; +def RC95 : NVPTXReg<"%rc95">; +def RC96 : NVPTXReg<"%rc96">; +def RC97 : NVPTXReg<"%rc97">; +def RC98 : NVPTXReg<"%rc98">; +def RC99 : NVPTXReg<"%rc99">; +def RC100 : NVPTXReg<"%rc100">; +def RC101 : NVPTXReg<"%rc101">; +def RC102 : NVPTXReg<"%rc102">; +def RC103 : NVPTXReg<"%rc103">; +def RC104 : NVPTXReg<"%rc104">; +def RC105 : NVPTXReg<"%rc105">; +def RC106 : NVPTXReg<"%rc106">; +def RC107 : NVPTXReg<"%rc107">; +def RC108 : NVPTXReg<"%rc108">; +def RC109 : NVPTXReg<"%rc109">; +def RC110 : NVPTXReg<"%rc110">; +def RC111 : NVPTXReg<"%rc111">; +def RC112 : NVPTXReg<"%rc112">; +def RC113 : NVPTXReg<"%rc113">; +def RC114 : NVPTXReg<"%rc114">; +def RC115 : NVPTXReg<"%rc115">; +def RC116 : NVPTXReg<"%rc116">; +def RC117 : NVPTXReg<"%rc117">; +def RC118 : NVPTXReg<"%rc118">; +def RC119 : NVPTXReg<"%rc119">; +def RC120 : NVPTXReg<"%rc120">; +def RC121 : NVPTXReg<"%rc121">; +def RC122 : NVPTXReg<"%rc122">; +def RC123 : NVPTXReg<"%rc123">; +def RC124 : NVPTXReg<"%rc124">; +def RC125 : NVPTXReg<"%rc125">; +def RC126 : NVPTXReg<"%rc126">; +def RC127 : NVPTXReg<"%rc127">; +def RC128 : NVPTXReg<"%rc128">; +def RC129 : NVPTXReg<"%rc129">; +def RC130 : NVPTXReg<"%rc130">; +def RC131 : NVPTXReg<"%rc131">; +def RC132 : NVPTXReg<"%rc132">; +def RC133 : NVPTXReg<"%rc133">; +def RC134 : NVPTXReg<"%rc134">; +def RC135 : NVPTXReg<"%rc135">; +def RC136 : NVPTXReg<"%rc136">; +def RC137 : NVPTXReg<"%rc137">; +def RC138 : NVPTXReg<"%rc138">; +def RC139 : NVPTXReg<"%rc139">; +def RC140 : NVPTXReg<"%rc140">; +def RC141 : NVPTXReg<"%rc141">; +def RC142 : NVPTXReg<"%rc142">; +def RC143 : NVPTXReg<"%rc143">; +def RC144 : NVPTXReg<"%rc144">; +def RC145 : NVPTXReg<"%rc145">; +def RC146 : NVPTXReg<"%rc146">; +def RC147 : NVPTXReg<"%rc147">; +def RC148 : NVPTXReg<"%rc148">; +def RC149 : NVPTXReg<"%rc149">; +def RC150 : NVPTXReg<"%rc150">; +def RC151 : NVPTXReg<"%rc151">; +def RC152 : NVPTXReg<"%rc152">; +def RC153 : NVPTXReg<"%rc153">; +def RC154 : NVPTXReg<"%rc154">; +def RC155 : NVPTXReg<"%rc155">; +def RC156 : NVPTXReg<"%rc156">; +def RC157 : NVPTXReg<"%rc157">; +def RC158 : NVPTXReg<"%rc158">; +def RC159 : NVPTXReg<"%rc159">; +def RC160 : NVPTXReg<"%rc160">; +def RC161 : NVPTXReg<"%rc161">; +def RC162 : NVPTXReg<"%rc162">; +def RC163 : NVPTXReg<"%rc163">; +def RC164 : NVPTXReg<"%rc164">; +def RC165 : NVPTXReg<"%rc165">; +def RC166 : NVPTXReg<"%rc166">; +def RC167 : NVPTXReg<"%rc167">; +def RC168 : NVPTXReg<"%rc168">; +def RC169 : NVPTXReg<"%rc169">; +def RC170 : NVPTXReg<"%rc170">; +def RC171 : NVPTXReg<"%rc171">; +def RC172 : NVPTXReg<"%rc172">; +def RC173 : NVPTXReg<"%rc173">; +def RC174 : NVPTXReg<"%rc174">; +def RC175 : NVPTXReg<"%rc175">; +def RC176 : NVPTXReg<"%rc176">; +def RC177 : NVPTXReg<"%rc177">; +def RC178 : NVPTXReg<"%rc178">; +def RC179 : NVPTXReg<"%rc179">; +def RC180 : NVPTXReg<"%rc180">; +def RC181 : NVPTXReg<"%rc181">; +def RC182 : NVPTXReg<"%rc182">; +def RC183 : NVPTXReg<"%rc183">; +def RC184 : NVPTXReg<"%rc184">; +def RC185 : NVPTXReg<"%rc185">; +def RC186 : NVPTXReg<"%rc186">; +def RC187 : NVPTXReg<"%rc187">; +def RC188 : NVPTXReg<"%rc188">; +def RC189 : NVPTXReg<"%rc189">; +def RC190 : NVPTXReg<"%rc190">; +def RC191 : NVPTXReg<"%rc191">; +def RC192 : NVPTXReg<"%rc192">; +def RC193 : NVPTXReg<"%rc193">; +def RC194 : NVPTXReg<"%rc194">; +def RC195 : NVPTXReg<"%rc195">; +def RC196 : NVPTXReg<"%rc196">; +def RC197 : NVPTXReg<"%rc197">; +def RC198 : NVPTXReg<"%rc198">; +def RC199 : NVPTXReg<"%rc199">; +def RC200 : NVPTXReg<"%rc200">; +def RC201 : NVPTXReg<"%rc201">; +def RC202 : NVPTXReg<"%rc202">; +def RC203 : NVPTXReg<"%rc203">; +def RC204 : NVPTXReg<"%rc204">; +def RC205 : NVPTXReg<"%rc205">; +def RC206 : NVPTXReg<"%rc206">; +def RC207 : NVPTXReg<"%rc207">; +def RC208 : NVPTXReg<"%rc208">; +def RC209 : NVPTXReg<"%rc209">; +def RC210 : NVPTXReg<"%rc210">; +def RC211 : NVPTXReg<"%rc211">; +def RC212 : NVPTXReg<"%rc212">; +def RC213 : NVPTXReg<"%rc213">; +def RC214 : NVPTXReg<"%rc214">; +def RC215 : NVPTXReg<"%rc215">; +def RC216 : NVPTXReg<"%rc216">; +def RC217 : NVPTXReg<"%rc217">; +def RC218 : NVPTXReg<"%rc218">; +def RC219 : NVPTXReg<"%rc219">; +def RC220 : NVPTXReg<"%rc220">; +def RC221 : NVPTXReg<"%rc221">; +def RC222 : NVPTXReg<"%rc222">; +def RC223 : NVPTXReg<"%rc223">; +def RC224 : NVPTXReg<"%rc224">; +def RC225 : NVPTXReg<"%rc225">; +def RC226 : NVPTXReg<"%rc226">; +def RC227 : NVPTXReg<"%rc227">; +def RC228 : NVPTXReg<"%rc228">; +def RC229 : NVPTXReg<"%rc229">; +def RC230 : NVPTXReg<"%rc230">; +def RC231 : NVPTXReg<"%rc231">; +def RC232 : NVPTXReg<"%rc232">; +def RC233 : NVPTXReg<"%rc233">; +def RC234 : NVPTXReg<"%rc234">; +def RC235 : NVPTXReg<"%rc235">; +def RC236 : NVPTXReg<"%rc236">; +def RC237 : NVPTXReg<"%rc237">; +def RC238 : NVPTXReg<"%rc238">; +def RC239 : NVPTXReg<"%rc239">; +def RC240 : NVPTXReg<"%rc240">; +def RC241 : NVPTXReg<"%rc241">; +def RC242 : NVPTXReg<"%rc242">; +def RC243 : NVPTXReg<"%rc243">; +def RC244 : NVPTXReg<"%rc244">; +def RC245 : NVPTXReg<"%rc245">; +def RC246 : NVPTXReg<"%rc246">; +def RC247 : NVPTXReg<"%rc247">; +def RC248 : NVPTXReg<"%rc248">; +def RC249 : NVPTXReg<"%rc249">; +def RC250 : NVPTXReg<"%rc250">; +def RC251 : NVPTXReg<"%rc251">; +def RC252 : NVPTXReg<"%rc252">; +def RC253 : NVPTXReg<"%rc253">; +def RC254 : NVPTXReg<"%rc254">; +def RC255 : NVPTXReg<"%rc255">; +def RC256 : NVPTXReg<"%rc256">; +def RC257 : NVPTXReg<"%rc257">; +def RC258 : NVPTXReg<"%rc258">; +def RC259 : NVPTXReg<"%rc259">; +def RC260 : NVPTXReg<"%rc260">; +def RC261 : NVPTXReg<"%rc261">; +def RC262 : NVPTXReg<"%rc262">; +def RC263 : NVPTXReg<"%rc263">; +def RC264 : NVPTXReg<"%rc264">; +def RC265 : NVPTXReg<"%rc265">; +def RC266 : NVPTXReg<"%rc266">; +def RC267 : NVPTXReg<"%rc267">; +def RC268 : NVPTXReg<"%rc268">; +def RC269 : NVPTXReg<"%rc269">; +def RC270 : NVPTXReg<"%rc270">; +def RC271 : NVPTXReg<"%rc271">; +def RC272 : NVPTXReg<"%rc272">; +def RC273 : NVPTXReg<"%rc273">; +def RC274 : NVPTXReg<"%rc274">; +def RC275 : NVPTXReg<"%rc275">; +def RC276 : NVPTXReg<"%rc276">; +def RC277 : NVPTXReg<"%rc277">; +def RC278 : NVPTXReg<"%rc278">; +def RC279 : NVPTXReg<"%rc279">; +def RC280 : NVPTXReg<"%rc280">; +def RC281 : NVPTXReg<"%rc281">; +def RC282 : NVPTXReg<"%rc282">; +def RC283 : NVPTXReg<"%rc283">; +def RC284 : NVPTXReg<"%rc284">; +def RC285 : NVPTXReg<"%rc285">; +def RC286 : NVPTXReg<"%rc286">; +def RC287 : NVPTXReg<"%rc287">; +def RC288 : NVPTXReg<"%rc288">; +def RC289 : NVPTXReg<"%rc289">; +def RC290 : NVPTXReg<"%rc290">; +def RC291 : NVPTXReg<"%rc291">; +def RC292 : NVPTXReg<"%rc292">; +def RC293 : NVPTXReg<"%rc293">; +def RC294 : NVPTXReg<"%rc294">; +def RC295 : NVPTXReg<"%rc295">; +def RC296 : NVPTXReg<"%rc296">; +def RC297 : NVPTXReg<"%rc297">; +def RC298 : NVPTXReg<"%rc298">; +def RC299 : NVPTXReg<"%rc299">; +def RC300 : NVPTXReg<"%rc300">; +def RC301 : NVPTXReg<"%rc301">; +def RC302 : NVPTXReg<"%rc302">; +def RC303 : NVPTXReg<"%rc303">; +def RC304 : NVPTXReg<"%rc304">; +def RC305 : NVPTXReg<"%rc305">; +def RC306 : NVPTXReg<"%rc306">; +def RC307 : NVPTXReg<"%rc307">; +def RC308 : NVPTXReg<"%rc308">; +def RC309 : NVPTXReg<"%rc309">; +def RC310 : NVPTXReg<"%rc310">; +def RC311 : NVPTXReg<"%rc311">; +def RC312 : NVPTXReg<"%rc312">; +def RC313 : NVPTXReg<"%rc313">; +def RC314 : NVPTXReg<"%rc314">; +def RC315 : NVPTXReg<"%rc315">; +def RC316 : NVPTXReg<"%rc316">; +def RC317 : NVPTXReg<"%rc317">; +def RC318 : NVPTXReg<"%rc318">; +def RC319 : NVPTXReg<"%rc319">; +def RC320 : NVPTXReg<"%rc320">; +def RC321 : NVPTXReg<"%rc321">; +def RC322 : NVPTXReg<"%rc322">; +def RC323 : NVPTXReg<"%rc323">; +def RC324 : NVPTXReg<"%rc324">; +def RC325 : NVPTXReg<"%rc325">; +def RC326 : NVPTXReg<"%rc326">; +def RC327 : NVPTXReg<"%rc327">; +def RC328 : NVPTXReg<"%rc328">; +def RC329 : NVPTXReg<"%rc329">; +def RC330 : NVPTXReg<"%rc330">; +def RC331 : NVPTXReg<"%rc331">; +def RC332 : NVPTXReg<"%rc332">; +def RC333 : NVPTXReg<"%rc333">; +def RC334 : NVPTXReg<"%rc334">; +def RC335 : NVPTXReg<"%rc335">; +def RC336 : NVPTXReg<"%rc336">; +def RC337 : NVPTXReg<"%rc337">; +def RC338 : NVPTXReg<"%rc338">; +def RC339 : NVPTXReg<"%rc339">; +def RC340 : NVPTXReg<"%rc340">; +def RC341 : NVPTXReg<"%rc341">; +def RC342 : NVPTXReg<"%rc342">; +def RC343 : NVPTXReg<"%rc343">; +def RC344 : NVPTXReg<"%rc344">; +def RC345 : NVPTXReg<"%rc345">; +def RC346 : NVPTXReg<"%rc346">; +def RC347 : NVPTXReg<"%rc347">; +def RC348 : NVPTXReg<"%rc348">; +def RC349 : NVPTXReg<"%rc349">; +def RC350 : NVPTXReg<"%rc350">; +def RC351 : NVPTXReg<"%rc351">; +def RC352 : NVPTXReg<"%rc352">; +def RC353 : NVPTXReg<"%rc353">; +def RC354 : NVPTXReg<"%rc354">; +def RC355 : NVPTXReg<"%rc355">; +def RC356 : NVPTXReg<"%rc356">; +def RC357 : NVPTXReg<"%rc357">; +def RC358 : NVPTXReg<"%rc358">; +def RC359 : NVPTXReg<"%rc359">; +def RC360 : NVPTXReg<"%rc360">; +def RC361 : NVPTXReg<"%rc361">; +def RC362 : NVPTXReg<"%rc362">; +def RC363 : NVPTXReg<"%rc363">; +def RC364 : NVPTXReg<"%rc364">; +def RC365 : NVPTXReg<"%rc365">; +def RC366 : NVPTXReg<"%rc366">; +def RC367 : NVPTXReg<"%rc367">; +def RC368 : NVPTXReg<"%rc368">; +def RC369 : NVPTXReg<"%rc369">; +def RC370 : NVPTXReg<"%rc370">; +def RC371 : NVPTXReg<"%rc371">; +def RC372 : NVPTXReg<"%rc372">; +def RC373 : NVPTXReg<"%rc373">; +def RC374 : NVPTXReg<"%rc374">; +def RC375 : NVPTXReg<"%rc375">; +def RC376 : NVPTXReg<"%rc376">; +def RC377 : NVPTXReg<"%rc377">; +def RC378 : NVPTXReg<"%rc378">; +def RC379 : NVPTXReg<"%rc379">; +def RC380 : NVPTXReg<"%rc380">; +def RC381 : NVPTXReg<"%rc381">; +def RC382 : NVPTXReg<"%rc382">; +def RC383 : NVPTXReg<"%rc383">; +def RC384 : NVPTXReg<"%rc384">; +def RC385 : NVPTXReg<"%rc385">; +def RC386 : NVPTXReg<"%rc386">; +def RC387 : NVPTXReg<"%rc387">; +def RC388 : NVPTXReg<"%rc388">; +def RC389 : NVPTXReg<"%rc389">; +def RC390 : NVPTXReg<"%rc390">; +def RC391 : NVPTXReg<"%rc391">; +def RC392 : NVPTXReg<"%rc392">; +def RC393 : NVPTXReg<"%rc393">; +def RC394 : NVPTXReg<"%rc394">; +def RC395 : NVPTXReg<"%rc395">; + +//===--- 16-bit -----------------------------------------------------------===// +def RS0 : NVPTXReg<"%rs0">; +def RS1 : NVPTXReg<"%rs1">; +def RS2 : NVPTXReg<"%rs2">; +def RS3 : NVPTXReg<"%rs3">; +def RS4 : NVPTXReg<"%rs4">; +def RS5 : NVPTXReg<"%rs5">; +def RS6 : NVPTXReg<"%rs6">; +def RS7 : NVPTXReg<"%rs7">; +def RS8 : NVPTXReg<"%rs8">; +def RS9 : NVPTXReg<"%rs9">; +def RS10 : NVPTXReg<"%rs10">; +def RS11 : NVPTXReg<"%rs11">; +def RS12 : NVPTXReg<"%rs12">; +def RS13 : NVPTXReg<"%rs13">; +def RS14 : NVPTXReg<"%rs14">; +def RS15 : NVPTXReg<"%rs15">; +def RS16 : NVPTXReg<"%rs16">; +def RS17 : NVPTXReg<"%rs17">; +def RS18 : NVPTXReg<"%rs18">; +def RS19 : NVPTXReg<"%rs19">; +def RS20 : NVPTXReg<"%rs20">; +def RS21 : NVPTXReg<"%rs21">; +def RS22 : NVPTXReg<"%rs22">; +def RS23 : NVPTXReg<"%rs23">; +def RS24 : NVPTXReg<"%rs24">; +def RS25 : NVPTXReg<"%rs25">; +def RS26 : NVPTXReg<"%rs26">; +def RS27 : NVPTXReg<"%rs27">; +def RS28 : NVPTXReg<"%rs28">; +def RS29 : NVPTXReg<"%rs29">; +def RS30 : NVPTXReg<"%rs30">; +def RS31 : NVPTXReg<"%rs31">; +def RS32 : NVPTXReg<"%rs32">; +def RS33 : NVPTXReg<"%rs33">; +def RS34 : NVPTXReg<"%rs34">; +def RS35 : NVPTXReg<"%rs35">; +def RS36 : NVPTXReg<"%rs36">; +def RS37 : NVPTXReg<"%rs37">; +def RS38 : NVPTXReg<"%rs38">; +def RS39 : NVPTXReg<"%rs39">; +def RS40 : NVPTXReg<"%rs40">; +def RS41 : NVPTXReg<"%rs41">; +def RS42 : NVPTXReg<"%rs42">; +def RS43 : NVPTXReg<"%rs43">; +def RS44 : NVPTXReg<"%rs44">; +def RS45 : NVPTXReg<"%rs45">; +def RS46 : NVPTXReg<"%rs46">; +def RS47 : NVPTXReg<"%rs47">; +def RS48 : NVPTXReg<"%rs48">; +def RS49 : NVPTXReg<"%rs49">; +def RS50 : NVPTXReg<"%rs50">; +def RS51 : NVPTXReg<"%rs51">; +def RS52 : NVPTXReg<"%rs52">; +def RS53 : NVPTXReg<"%rs53">; +def RS54 : NVPTXReg<"%rs54">; +def RS55 : NVPTXReg<"%rs55">; +def RS56 : NVPTXReg<"%rs56">; +def RS57 : NVPTXReg<"%rs57">; +def RS58 : NVPTXReg<"%rs58">; +def RS59 : NVPTXReg<"%rs59">; +def RS60 : NVPTXReg<"%rs60">; +def RS61 : NVPTXReg<"%rs61">; +def RS62 : NVPTXReg<"%rs62">; +def RS63 : NVPTXReg<"%rs63">; +def RS64 : NVPTXReg<"%rs64">; +def RS65 : NVPTXReg<"%rs65">; +def RS66 : NVPTXReg<"%rs66">; +def RS67 : NVPTXReg<"%rs67">; +def RS68 : NVPTXReg<"%rs68">; +def RS69 : NVPTXReg<"%rs69">; +def RS70 : NVPTXReg<"%rs70">; +def RS71 : NVPTXReg<"%rs71">; +def RS72 : NVPTXReg<"%rs72">; +def RS73 : NVPTXReg<"%rs73">; +def RS74 : NVPTXReg<"%rs74">; +def RS75 : NVPTXReg<"%rs75">; +def RS76 : NVPTXReg<"%rs76">; +def RS77 : NVPTXReg<"%rs77">; +def RS78 : NVPTXReg<"%rs78">; +def RS79 : NVPTXReg<"%rs79">; +def RS80 : NVPTXReg<"%rs80">; +def RS81 : NVPTXReg<"%rs81">; +def RS82 : NVPTXReg<"%rs82">; +def RS83 : NVPTXReg<"%rs83">; +def RS84 : NVPTXReg<"%rs84">; +def RS85 : NVPTXReg<"%rs85">; +def RS86 : NVPTXReg<"%rs86">; +def RS87 : NVPTXReg<"%rs87">; +def RS88 : NVPTXReg<"%rs88">; +def RS89 : NVPTXReg<"%rs89">; +def RS90 : NVPTXReg<"%rs90">; +def RS91 : NVPTXReg<"%rs91">; +def RS92 : NVPTXReg<"%rs92">; +def RS93 : NVPTXReg<"%rs93">; +def RS94 : NVPTXReg<"%rs94">; +def RS95 : NVPTXReg<"%rs95">; +def RS96 : NVPTXReg<"%rs96">; +def RS97 : NVPTXReg<"%rs97">; +def RS98 : NVPTXReg<"%rs98">; +def RS99 : NVPTXReg<"%rs99">; +def RS100 : NVPTXReg<"%rs100">; +def RS101 : NVPTXReg<"%rs101">; +def RS102 : NVPTXReg<"%rs102">; +def RS103 : NVPTXReg<"%rs103">; +def RS104 : NVPTXReg<"%rs104">; +def RS105 : NVPTXReg<"%rs105">; +def RS106 : NVPTXReg<"%rs106">; +def RS107 : NVPTXReg<"%rs107">; +def RS108 : NVPTXReg<"%rs108">; +def RS109 : NVPTXReg<"%rs109">; +def RS110 : NVPTXReg<"%rs110">; +def RS111 : NVPTXReg<"%rs111">; +def RS112 : NVPTXReg<"%rs112">; +def RS113 : NVPTXReg<"%rs113">; +def RS114 : NVPTXReg<"%rs114">; +def RS115 : NVPTXReg<"%rs115">; +def RS116 : NVPTXReg<"%rs116">; +def RS117 : NVPTXReg<"%rs117">; +def RS118 : NVPTXReg<"%rs118">; +def RS119 : NVPTXReg<"%rs119">; +def RS120 : NVPTXReg<"%rs120">; +def RS121 : NVPTXReg<"%rs121">; +def RS122 : NVPTXReg<"%rs122">; +def RS123 : NVPTXReg<"%rs123">; +def RS124 : NVPTXReg<"%rs124">; +def RS125 : NVPTXReg<"%rs125">; +def RS126 : NVPTXReg<"%rs126">; +def RS127 : NVPTXReg<"%rs127">; +def RS128 : NVPTXReg<"%rs128">; +def RS129 : NVPTXReg<"%rs129">; +def RS130 : NVPTXReg<"%rs130">; +def RS131 : NVPTXReg<"%rs131">; +def RS132 : NVPTXReg<"%rs132">; +def RS133 : NVPTXReg<"%rs133">; +def RS134 : NVPTXReg<"%rs134">; +def RS135 : NVPTXReg<"%rs135">; +def RS136 : NVPTXReg<"%rs136">; +def RS137 : NVPTXReg<"%rs137">; +def RS138 : NVPTXReg<"%rs138">; +def RS139 : NVPTXReg<"%rs139">; +def RS140 : NVPTXReg<"%rs140">; +def RS141 : NVPTXReg<"%rs141">; +def RS142 : NVPTXReg<"%rs142">; +def RS143 : NVPTXReg<"%rs143">; +def RS144 : NVPTXReg<"%rs144">; +def RS145 : NVPTXReg<"%rs145">; +def RS146 : NVPTXReg<"%rs146">; +def RS147 : NVPTXReg<"%rs147">; +def RS148 : NVPTXReg<"%rs148">; +def RS149 : NVPTXReg<"%rs149">; +def RS150 : NVPTXReg<"%rs150">; +def RS151 : NVPTXReg<"%rs151">; +def RS152 : NVPTXReg<"%rs152">; +def RS153 : NVPTXReg<"%rs153">; +def RS154 : NVPTXReg<"%rs154">; +def RS155 : NVPTXReg<"%rs155">; +def RS156 : NVPTXReg<"%rs156">; +def RS157 : NVPTXReg<"%rs157">; +def RS158 : NVPTXReg<"%rs158">; +def RS159 : NVPTXReg<"%rs159">; +def RS160 : NVPTXReg<"%rs160">; +def RS161 : NVPTXReg<"%rs161">; +def RS162 : NVPTXReg<"%rs162">; +def RS163 : NVPTXReg<"%rs163">; +def RS164 : NVPTXReg<"%rs164">; +def RS165 : NVPTXReg<"%rs165">; +def RS166 : NVPTXReg<"%rs166">; +def RS167 : NVPTXReg<"%rs167">; +def RS168 : NVPTXReg<"%rs168">; +def RS169 : NVPTXReg<"%rs169">; +def RS170 : NVPTXReg<"%rs170">; +def RS171 : NVPTXReg<"%rs171">; +def RS172 : NVPTXReg<"%rs172">; +def RS173 : NVPTXReg<"%rs173">; +def RS174 : NVPTXReg<"%rs174">; +def RS175 : NVPTXReg<"%rs175">; +def RS176 : NVPTXReg<"%rs176">; +def RS177 : NVPTXReg<"%rs177">; +def RS178 : NVPTXReg<"%rs178">; +def RS179 : NVPTXReg<"%rs179">; +def RS180 : NVPTXReg<"%rs180">; +def RS181 : NVPTXReg<"%rs181">; +def RS182 : NVPTXReg<"%rs182">; +def RS183 : NVPTXReg<"%rs183">; +def RS184 : NVPTXReg<"%rs184">; +def RS185 : NVPTXReg<"%rs185">; +def RS186 : NVPTXReg<"%rs186">; +def RS187 : NVPTXReg<"%rs187">; +def RS188 : NVPTXReg<"%rs188">; +def RS189 : NVPTXReg<"%rs189">; +def RS190 : NVPTXReg<"%rs190">; +def RS191 : NVPTXReg<"%rs191">; +def RS192 : NVPTXReg<"%rs192">; +def RS193 : NVPTXReg<"%rs193">; +def RS194 : NVPTXReg<"%rs194">; +def RS195 : NVPTXReg<"%rs195">; +def RS196 : NVPTXReg<"%rs196">; +def RS197 : NVPTXReg<"%rs197">; +def RS198 : NVPTXReg<"%rs198">; +def RS199 : NVPTXReg<"%rs199">; +def RS200 : NVPTXReg<"%rs200">; +def RS201 : NVPTXReg<"%rs201">; +def RS202 : NVPTXReg<"%rs202">; +def RS203 : NVPTXReg<"%rs203">; +def RS204 : NVPTXReg<"%rs204">; +def RS205 : NVPTXReg<"%rs205">; +def RS206 : NVPTXReg<"%rs206">; +def RS207 : NVPTXReg<"%rs207">; +def RS208 : NVPTXReg<"%rs208">; +def RS209 : NVPTXReg<"%rs209">; +def RS210 : NVPTXReg<"%rs210">; +def RS211 : NVPTXReg<"%rs211">; +def RS212 : NVPTXReg<"%rs212">; +def RS213 : NVPTXReg<"%rs213">; +def RS214 : NVPTXReg<"%rs214">; +def RS215 : NVPTXReg<"%rs215">; +def RS216 : NVPTXReg<"%rs216">; +def RS217 : NVPTXReg<"%rs217">; +def RS218 : NVPTXReg<"%rs218">; +def RS219 : NVPTXReg<"%rs219">; +def RS220 : NVPTXReg<"%rs220">; +def RS221 : NVPTXReg<"%rs221">; +def RS222 : NVPTXReg<"%rs222">; +def RS223 : NVPTXReg<"%rs223">; +def RS224 : NVPTXReg<"%rs224">; +def RS225 : NVPTXReg<"%rs225">; +def RS226 : NVPTXReg<"%rs226">; +def RS227 : NVPTXReg<"%rs227">; +def RS228 : NVPTXReg<"%rs228">; +def RS229 : NVPTXReg<"%rs229">; +def RS230 : NVPTXReg<"%rs230">; +def RS231 : NVPTXReg<"%rs231">; +def RS232 : NVPTXReg<"%rs232">; +def RS233 : NVPTXReg<"%rs233">; +def RS234 : NVPTXReg<"%rs234">; +def RS235 : NVPTXReg<"%rs235">; +def RS236 : NVPTXReg<"%rs236">; +def RS237 : NVPTXReg<"%rs237">; +def RS238 : NVPTXReg<"%rs238">; +def RS239 : NVPTXReg<"%rs239">; +def RS240 : NVPTXReg<"%rs240">; +def RS241 : NVPTXReg<"%rs241">; +def RS242 : NVPTXReg<"%rs242">; +def RS243 : NVPTXReg<"%rs243">; +def RS244 : NVPTXReg<"%rs244">; +def RS245 : NVPTXReg<"%rs245">; +def RS246 : NVPTXReg<"%rs246">; +def RS247 : NVPTXReg<"%rs247">; +def RS248 : NVPTXReg<"%rs248">; +def RS249 : NVPTXReg<"%rs249">; +def RS250 : NVPTXReg<"%rs250">; +def RS251 : NVPTXReg<"%rs251">; +def RS252 : NVPTXReg<"%rs252">; +def RS253 : NVPTXReg<"%rs253">; +def RS254 : NVPTXReg<"%rs254">; +def RS255 : NVPTXReg<"%rs255">; +def RS256 : NVPTXReg<"%rs256">; +def RS257 : NVPTXReg<"%rs257">; +def RS258 : NVPTXReg<"%rs258">; +def RS259 : NVPTXReg<"%rs259">; +def RS260 : NVPTXReg<"%rs260">; +def RS261 : NVPTXReg<"%rs261">; +def RS262 : NVPTXReg<"%rs262">; +def RS263 : NVPTXReg<"%rs263">; +def RS264 : NVPTXReg<"%rs264">; +def RS265 : NVPTXReg<"%rs265">; +def RS266 : NVPTXReg<"%rs266">; +def RS267 : NVPTXReg<"%rs267">; +def RS268 : NVPTXReg<"%rs268">; +def RS269 : NVPTXReg<"%rs269">; +def RS270 : NVPTXReg<"%rs270">; +def RS271 : NVPTXReg<"%rs271">; +def RS272 : NVPTXReg<"%rs272">; +def RS273 : NVPTXReg<"%rs273">; +def RS274 : NVPTXReg<"%rs274">; +def RS275 : NVPTXReg<"%rs275">; +def RS276 : NVPTXReg<"%rs276">; +def RS277 : NVPTXReg<"%rs277">; +def RS278 : NVPTXReg<"%rs278">; +def RS279 : NVPTXReg<"%rs279">; +def RS280 : NVPTXReg<"%rs280">; +def RS281 : NVPTXReg<"%rs281">; +def RS282 : NVPTXReg<"%rs282">; +def RS283 : NVPTXReg<"%rs283">; +def RS284 : NVPTXReg<"%rs284">; +def RS285 : NVPTXReg<"%rs285">; +def RS286 : NVPTXReg<"%rs286">; +def RS287 : NVPTXReg<"%rs287">; +def RS288 : NVPTXReg<"%rs288">; +def RS289 : NVPTXReg<"%rs289">; +def RS290 : NVPTXReg<"%rs290">; +def RS291 : NVPTXReg<"%rs291">; +def RS292 : NVPTXReg<"%rs292">; +def RS293 : NVPTXReg<"%rs293">; +def RS294 : NVPTXReg<"%rs294">; +def RS295 : NVPTXReg<"%rs295">; +def RS296 : NVPTXReg<"%rs296">; +def RS297 : NVPTXReg<"%rs297">; +def RS298 : NVPTXReg<"%rs298">; +def RS299 : NVPTXReg<"%rs299">; +def RS300 : NVPTXReg<"%rs300">; +def RS301 : NVPTXReg<"%rs301">; +def RS302 : NVPTXReg<"%rs302">; +def RS303 : NVPTXReg<"%rs303">; +def RS304 : NVPTXReg<"%rs304">; +def RS305 : NVPTXReg<"%rs305">; +def RS306 : NVPTXReg<"%rs306">; +def RS307 : NVPTXReg<"%rs307">; +def RS308 : NVPTXReg<"%rs308">; +def RS309 : NVPTXReg<"%rs309">; +def RS310 : NVPTXReg<"%rs310">; +def RS311 : NVPTXReg<"%rs311">; +def RS312 : NVPTXReg<"%rs312">; +def RS313 : NVPTXReg<"%rs313">; +def RS314 : NVPTXReg<"%rs314">; +def RS315 : NVPTXReg<"%rs315">; +def RS316 : NVPTXReg<"%rs316">; +def RS317 : NVPTXReg<"%rs317">; +def RS318 : NVPTXReg<"%rs318">; +def RS319 : NVPTXReg<"%rs319">; +def RS320 : NVPTXReg<"%rs320">; +def RS321 : NVPTXReg<"%rs321">; +def RS322 : NVPTXReg<"%rs322">; +def RS323 : NVPTXReg<"%rs323">; +def RS324 : NVPTXReg<"%rs324">; +def RS325 : NVPTXReg<"%rs325">; +def RS326 : NVPTXReg<"%rs326">; +def RS327 : NVPTXReg<"%rs327">; +def RS328 : NVPTXReg<"%rs328">; +def RS329 : NVPTXReg<"%rs329">; +def RS330 : NVPTXReg<"%rs330">; +def RS331 : NVPTXReg<"%rs331">; +def RS332 : NVPTXReg<"%rs332">; +def RS333 : NVPTXReg<"%rs333">; +def RS334 : NVPTXReg<"%rs334">; +def RS335 : NVPTXReg<"%rs335">; +def RS336 : NVPTXReg<"%rs336">; +def RS337 : NVPTXReg<"%rs337">; +def RS338 : NVPTXReg<"%rs338">; +def RS339 : NVPTXReg<"%rs339">; +def RS340 : NVPTXReg<"%rs340">; +def RS341 : NVPTXReg<"%rs341">; +def RS342 : NVPTXReg<"%rs342">; +def RS343 : NVPTXReg<"%rs343">; +def RS344 : NVPTXReg<"%rs344">; +def RS345 : NVPTXReg<"%rs345">; +def RS346 : NVPTXReg<"%rs346">; +def RS347 : NVPTXReg<"%rs347">; +def RS348 : NVPTXReg<"%rs348">; +def RS349 : NVPTXReg<"%rs349">; +def RS350 : NVPTXReg<"%rs350">; +def RS351 : NVPTXReg<"%rs351">; +def RS352 : NVPTXReg<"%rs352">; +def RS353 : NVPTXReg<"%rs353">; +def RS354 : NVPTXReg<"%rs354">; +def RS355 : NVPTXReg<"%rs355">; +def RS356 : NVPTXReg<"%rs356">; +def RS357 : NVPTXReg<"%rs357">; +def RS358 : NVPTXReg<"%rs358">; +def RS359 : NVPTXReg<"%rs359">; +def RS360 : NVPTXReg<"%rs360">; +def RS361 : NVPTXReg<"%rs361">; +def RS362 : NVPTXReg<"%rs362">; +def RS363 : NVPTXReg<"%rs363">; +def RS364 : NVPTXReg<"%rs364">; +def RS365 : NVPTXReg<"%rs365">; +def RS366 : NVPTXReg<"%rs366">; +def RS367 : NVPTXReg<"%rs367">; +def RS368 : NVPTXReg<"%rs368">; +def RS369 : NVPTXReg<"%rs369">; +def RS370 : NVPTXReg<"%rs370">; +def RS371 : NVPTXReg<"%rs371">; +def RS372 : NVPTXReg<"%rs372">; +def RS373 : NVPTXReg<"%rs373">; +def RS374 : NVPTXReg<"%rs374">; +def RS375 : NVPTXReg<"%rs375">; +def RS376 : NVPTXReg<"%rs376">; +def RS377 : NVPTXReg<"%rs377">; +def RS378 : NVPTXReg<"%rs378">; +def RS379 : NVPTXReg<"%rs379">; +def RS380 : NVPTXReg<"%rs380">; +def RS381 : NVPTXReg<"%rs381">; +def RS382 : NVPTXReg<"%rs382">; +def RS383 : NVPTXReg<"%rs383">; +def RS384 : NVPTXReg<"%rs384">; +def RS385 : NVPTXReg<"%rs385">; +def RS386 : NVPTXReg<"%rs386">; +def RS387 : NVPTXReg<"%rs387">; +def RS388 : NVPTXReg<"%rs388">; +def RS389 : NVPTXReg<"%rs389">; +def RS390 : NVPTXReg<"%rs390">; +def RS391 : NVPTXReg<"%rs391">; +def RS392 : NVPTXReg<"%rs392">; +def RS393 : NVPTXReg<"%rs393">; +def RS394 : NVPTXReg<"%rs394">; +def RS395 : NVPTXReg<"%rs395">; + +//===--- 32-bit -----------------------------------------------------------===// +def R0 : NVPTXReg<"%r0">; +def R1 : NVPTXReg<"%r1">; +def R2 : NVPTXReg<"%r2">; +def R3 : NVPTXReg<"%r3">; +def R4 : NVPTXReg<"%r4">; +def R5 : NVPTXReg<"%r5">; +def R6 : NVPTXReg<"%r6">; +def R7 : NVPTXReg<"%r7">; +def R8 : NVPTXReg<"%r8">; +def R9 : NVPTXReg<"%r9">; +def R10 : NVPTXReg<"%r10">; +def R11 : NVPTXReg<"%r11">; +def R12 : NVPTXReg<"%r12">; +def R13 : NVPTXReg<"%r13">; +def R14 : NVPTXReg<"%r14">; +def R15 : NVPTXReg<"%r15">; +def R16 : NVPTXReg<"%r16">; +def R17 : NVPTXReg<"%r17">; +def R18 : NVPTXReg<"%r18">; +def R19 : NVPTXReg<"%r19">; +def R20 : NVPTXReg<"%r20">; +def R21 : NVPTXReg<"%r21">; +def R22 : NVPTXReg<"%r22">; +def R23 : NVPTXReg<"%r23">; +def R24 : NVPTXReg<"%r24">; +def R25 : NVPTXReg<"%r25">; +def R26 : NVPTXReg<"%r26">; +def R27 : NVPTXReg<"%r27">; +def R28 : NVPTXReg<"%r28">; +def R29 : NVPTXReg<"%r29">; +def R30 : NVPTXReg<"%r30">; +def R31 : NVPTXReg<"%r31">; +def R32 : NVPTXReg<"%r32">; +def R33 : NVPTXReg<"%r33">; +def R34 : NVPTXReg<"%r34">; +def R35 : NVPTXReg<"%r35">; +def R36 : NVPTXReg<"%r36">; +def R37 : NVPTXReg<"%r37">; +def R38 : NVPTXReg<"%r38">; +def R39 : NVPTXReg<"%r39">; +def R40 : NVPTXReg<"%r40">; +def R41 : NVPTXReg<"%r41">; +def R42 : NVPTXReg<"%r42">; +def R43 : NVPTXReg<"%r43">; +def R44 : NVPTXReg<"%r44">; +def R45 : NVPTXReg<"%r45">; +def R46 : NVPTXReg<"%r46">; +def R47 : NVPTXReg<"%r47">; +def R48 : NVPTXReg<"%r48">; +def R49 : NVPTXReg<"%r49">; +def R50 : NVPTXReg<"%r50">; +def R51 : NVPTXReg<"%r51">; +def R52 : NVPTXReg<"%r52">; +def R53 : NVPTXReg<"%r53">; +def R54 : NVPTXReg<"%r54">; +def R55 : NVPTXReg<"%r55">; +def R56 : NVPTXReg<"%r56">; +def R57 : NVPTXReg<"%r57">; +def R58 : NVPTXReg<"%r58">; +def R59 : NVPTXReg<"%r59">; +def R60 : NVPTXReg<"%r60">; +def R61 : NVPTXReg<"%r61">; +def R62 : NVPTXReg<"%r62">; +def R63 : NVPTXReg<"%r63">; +def R64 : NVPTXReg<"%r64">; +def R65 : NVPTXReg<"%r65">; +def R66 : NVPTXReg<"%r66">; +def R67 : NVPTXReg<"%r67">; +def R68 : NVPTXReg<"%r68">; +def R69 : NVPTXReg<"%r69">; +def R70 : NVPTXReg<"%r70">; +def R71 : NVPTXReg<"%r71">; +def R72 : NVPTXReg<"%r72">; +def R73 : NVPTXReg<"%r73">; +def R74 : NVPTXReg<"%r74">; +def R75 : NVPTXReg<"%r75">; +def R76 : NVPTXReg<"%r76">; +def R77 : NVPTXReg<"%r77">; +def R78 : NVPTXReg<"%r78">; +def R79 : NVPTXReg<"%r79">; +def R80 : NVPTXReg<"%r80">; +def R81 : NVPTXReg<"%r81">; +def R82 : NVPTXReg<"%r82">; +def R83 : NVPTXReg<"%r83">; +def R84 : NVPTXReg<"%r84">; +def R85 : NVPTXReg<"%r85">; +def R86 : NVPTXReg<"%r86">; +def R87 : NVPTXReg<"%r87">; +def R88 : NVPTXReg<"%r88">; +def R89 : NVPTXReg<"%r89">; +def R90 : NVPTXReg<"%r90">; +def R91 : NVPTXReg<"%r91">; +def R92 : NVPTXReg<"%r92">; +def R93 : NVPTXReg<"%r93">; +def R94 : NVPTXReg<"%r94">; +def R95 : NVPTXReg<"%r95">; +def R96 : NVPTXReg<"%r96">; +def R97 : NVPTXReg<"%r97">; +def R98 : NVPTXReg<"%r98">; +def R99 : NVPTXReg<"%r99">; +def R100 : NVPTXReg<"%r100">; +def R101 : NVPTXReg<"%r101">; +def R102 : NVPTXReg<"%r102">; +def R103 : NVPTXReg<"%r103">; +def R104 : NVPTXReg<"%r104">; +def R105 : NVPTXReg<"%r105">; +def R106 : NVPTXReg<"%r106">; +def R107 : NVPTXReg<"%r107">; +def R108 : NVPTXReg<"%r108">; +def R109 : NVPTXReg<"%r109">; +def R110 : NVPTXReg<"%r110">; +def R111 : NVPTXReg<"%r111">; +def R112 : NVPTXReg<"%r112">; +def R113 : NVPTXReg<"%r113">; +def R114 : NVPTXReg<"%r114">; +def R115 : NVPTXReg<"%r115">; +def R116 : NVPTXReg<"%r116">; +def R117 : NVPTXReg<"%r117">; +def R118 : NVPTXReg<"%r118">; +def R119 : NVPTXReg<"%r119">; +def R120 : NVPTXReg<"%r120">; +def R121 : NVPTXReg<"%r121">; +def R122 : NVPTXReg<"%r122">; +def R123 : NVPTXReg<"%r123">; +def R124 : NVPTXReg<"%r124">; +def R125 : NVPTXReg<"%r125">; +def R126 : NVPTXReg<"%r126">; +def R127 : NVPTXReg<"%r127">; +def R128 : NVPTXReg<"%r128">; +def R129 : NVPTXReg<"%r129">; +def R130 : NVPTXReg<"%r130">; +def R131 : NVPTXReg<"%r131">; +def R132 : NVPTXReg<"%r132">; +def R133 : NVPTXReg<"%r133">; +def R134 : NVPTXReg<"%r134">; +def R135 : NVPTXReg<"%r135">; +def R136 : NVPTXReg<"%r136">; +def R137 : NVPTXReg<"%r137">; +def R138 : NVPTXReg<"%r138">; +def R139 : NVPTXReg<"%r139">; +def R140 : NVPTXReg<"%r140">; +def R141 : NVPTXReg<"%r141">; +def R142 : NVPTXReg<"%r142">; +def R143 : NVPTXReg<"%r143">; +def R144 : NVPTXReg<"%r144">; +def R145 : NVPTXReg<"%r145">; +def R146 : NVPTXReg<"%r146">; +def R147 : NVPTXReg<"%r147">; +def R148 : NVPTXReg<"%r148">; +def R149 : NVPTXReg<"%r149">; +def R150 : NVPTXReg<"%r150">; +def R151 : NVPTXReg<"%r151">; +def R152 : NVPTXReg<"%r152">; +def R153 : NVPTXReg<"%r153">; +def R154 : NVPTXReg<"%r154">; +def R155 : NVPTXReg<"%r155">; +def R156 : NVPTXReg<"%r156">; +def R157 : NVPTXReg<"%r157">; +def R158 : NVPTXReg<"%r158">; +def R159 : NVPTXReg<"%r159">; +def R160 : NVPTXReg<"%r160">; +def R161 : NVPTXReg<"%r161">; +def R162 : NVPTXReg<"%r162">; +def R163 : NVPTXReg<"%r163">; +def R164 : NVPTXReg<"%r164">; +def R165 : NVPTXReg<"%r165">; +def R166 : NVPTXReg<"%r166">; +def R167 : NVPTXReg<"%r167">; +def R168 : NVPTXReg<"%r168">; +def R169 : NVPTXReg<"%r169">; +def R170 : NVPTXReg<"%r170">; +def R171 : NVPTXReg<"%r171">; +def R172 : NVPTXReg<"%r172">; +def R173 : NVPTXReg<"%r173">; +def R174 : NVPTXReg<"%r174">; +def R175 : NVPTXReg<"%r175">; +def R176 : NVPTXReg<"%r176">; +def R177 : NVPTXReg<"%r177">; +def R178 : NVPTXReg<"%r178">; +def R179 : NVPTXReg<"%r179">; +def R180 : NVPTXReg<"%r180">; +def R181 : NVPTXReg<"%r181">; +def R182 : NVPTXReg<"%r182">; +def R183 : NVPTXReg<"%r183">; +def R184 : NVPTXReg<"%r184">; +def R185 : NVPTXReg<"%r185">; +def R186 : NVPTXReg<"%r186">; +def R187 : NVPTXReg<"%r187">; +def R188 : NVPTXReg<"%r188">; +def R189 : NVPTXReg<"%r189">; +def R190 : NVPTXReg<"%r190">; +def R191 : NVPTXReg<"%r191">; +def R192 : NVPTXReg<"%r192">; +def R193 : NVPTXReg<"%r193">; +def R194 : NVPTXReg<"%r194">; +def R195 : NVPTXReg<"%r195">; +def R196 : NVPTXReg<"%r196">; +def R197 : NVPTXReg<"%r197">; +def R198 : NVPTXReg<"%r198">; +def R199 : NVPTXReg<"%r199">; +def R200 : NVPTXReg<"%r200">; +def R201 : NVPTXReg<"%r201">; +def R202 : NVPTXReg<"%r202">; +def R203 : NVPTXReg<"%r203">; +def R204 : NVPTXReg<"%r204">; +def R205 : NVPTXReg<"%r205">; +def R206 : NVPTXReg<"%r206">; +def R207 : NVPTXReg<"%r207">; +def R208 : NVPTXReg<"%r208">; +def R209 : NVPTXReg<"%r209">; +def R210 : NVPTXReg<"%r210">; +def R211 : NVPTXReg<"%r211">; +def R212 : NVPTXReg<"%r212">; +def R213 : NVPTXReg<"%r213">; +def R214 : NVPTXReg<"%r214">; +def R215 : NVPTXReg<"%r215">; +def R216 : NVPTXReg<"%r216">; +def R217 : NVPTXReg<"%r217">; +def R218 : NVPTXReg<"%r218">; +def R219 : NVPTXReg<"%r219">; +def R220 : NVPTXReg<"%r220">; +def R221 : NVPTXReg<"%r221">; +def R222 : NVPTXReg<"%r222">; +def R223 : NVPTXReg<"%r223">; +def R224 : NVPTXReg<"%r224">; +def R225 : NVPTXReg<"%r225">; +def R226 : NVPTXReg<"%r226">; +def R227 : NVPTXReg<"%r227">; +def R228 : NVPTXReg<"%r228">; +def R229 : NVPTXReg<"%r229">; +def R230 : NVPTXReg<"%r230">; +def R231 : NVPTXReg<"%r231">; +def R232 : NVPTXReg<"%r232">; +def R233 : NVPTXReg<"%r233">; +def R234 : NVPTXReg<"%r234">; +def R235 : NVPTXReg<"%r235">; +def R236 : NVPTXReg<"%r236">; +def R237 : NVPTXReg<"%r237">; +def R238 : NVPTXReg<"%r238">; +def R239 : NVPTXReg<"%r239">; +def R240 : NVPTXReg<"%r240">; +def R241 : NVPTXReg<"%r241">; +def R242 : NVPTXReg<"%r242">; +def R243 : NVPTXReg<"%r243">; +def R244 : NVPTXReg<"%r244">; +def R245 : NVPTXReg<"%r245">; +def R246 : NVPTXReg<"%r246">; +def R247 : NVPTXReg<"%r247">; +def R248 : NVPTXReg<"%r248">; +def R249 : NVPTXReg<"%r249">; +def R250 : NVPTXReg<"%r250">; +def R251 : NVPTXReg<"%r251">; +def R252 : NVPTXReg<"%r252">; +def R253 : NVPTXReg<"%r253">; +def R254 : NVPTXReg<"%r254">; +def R255 : NVPTXReg<"%r255">; +def R256 : NVPTXReg<"%r256">; +def R257 : NVPTXReg<"%r257">; +def R258 : NVPTXReg<"%r258">; +def R259 : NVPTXReg<"%r259">; +def R260 : NVPTXReg<"%r260">; +def R261 : NVPTXReg<"%r261">; +def R262 : NVPTXReg<"%r262">; +def R263 : NVPTXReg<"%r263">; +def R264 : NVPTXReg<"%r264">; +def R265 : NVPTXReg<"%r265">; +def R266 : NVPTXReg<"%r266">; +def R267 : NVPTXReg<"%r267">; +def R268 : NVPTXReg<"%r268">; +def R269 : NVPTXReg<"%r269">; +def R270 : NVPTXReg<"%r270">; +def R271 : NVPTXReg<"%r271">; +def R272 : NVPTXReg<"%r272">; +def R273 : NVPTXReg<"%r273">; +def R274 : NVPTXReg<"%r274">; +def R275 : NVPTXReg<"%r275">; +def R276 : NVPTXReg<"%r276">; +def R277 : NVPTXReg<"%r277">; +def R278 : NVPTXReg<"%r278">; +def R279 : NVPTXReg<"%r279">; +def R280 : NVPTXReg<"%r280">; +def R281 : NVPTXReg<"%r281">; +def R282 : NVPTXReg<"%r282">; +def R283 : NVPTXReg<"%r283">; +def R284 : NVPTXReg<"%r284">; +def R285 : NVPTXReg<"%r285">; +def R286 : NVPTXReg<"%r286">; +def R287 : NVPTXReg<"%r287">; +def R288 : NVPTXReg<"%r288">; +def R289 : NVPTXReg<"%r289">; +def R290 : NVPTXReg<"%r290">; +def R291 : NVPTXReg<"%r291">; +def R292 : NVPTXReg<"%r292">; +def R293 : NVPTXReg<"%r293">; +def R294 : NVPTXReg<"%r294">; +def R295 : NVPTXReg<"%r295">; +def R296 : NVPTXReg<"%r296">; +def R297 : NVPTXReg<"%r297">; +def R298 : NVPTXReg<"%r298">; +def R299 : NVPTXReg<"%r299">; +def R300 : NVPTXReg<"%r300">; +def R301 : NVPTXReg<"%r301">; +def R302 : NVPTXReg<"%r302">; +def R303 : NVPTXReg<"%r303">; +def R304 : NVPTXReg<"%r304">; +def R305 : NVPTXReg<"%r305">; +def R306 : NVPTXReg<"%r306">; +def R307 : NVPTXReg<"%r307">; +def R308 : NVPTXReg<"%r308">; +def R309 : NVPTXReg<"%r309">; +def R310 : NVPTXReg<"%r310">; +def R311 : NVPTXReg<"%r311">; +def R312 : NVPTXReg<"%r312">; +def R313 : NVPTXReg<"%r313">; +def R314 : NVPTXReg<"%r314">; +def R315 : NVPTXReg<"%r315">; +def R316 : NVPTXReg<"%r316">; +def R317 : NVPTXReg<"%r317">; +def R318 : NVPTXReg<"%r318">; +def R319 : NVPTXReg<"%r319">; +def R320 : NVPTXReg<"%r320">; +def R321 : NVPTXReg<"%r321">; +def R322 : NVPTXReg<"%r322">; +def R323 : NVPTXReg<"%r323">; +def R324 : NVPTXReg<"%r324">; +def R325 : NVPTXReg<"%r325">; +def R326 : NVPTXReg<"%r326">; +def R327 : NVPTXReg<"%r327">; +def R328 : NVPTXReg<"%r328">; +def R329 : NVPTXReg<"%r329">; +def R330 : NVPTXReg<"%r330">; +def R331 : NVPTXReg<"%r331">; +def R332 : NVPTXReg<"%r332">; +def R333 : NVPTXReg<"%r333">; +def R334 : NVPTXReg<"%r334">; +def R335 : NVPTXReg<"%r335">; +def R336 : NVPTXReg<"%r336">; +def R337 : NVPTXReg<"%r337">; +def R338 : NVPTXReg<"%r338">; +def R339 : NVPTXReg<"%r339">; +def R340 : NVPTXReg<"%r340">; +def R341 : NVPTXReg<"%r341">; +def R342 : NVPTXReg<"%r342">; +def R343 : NVPTXReg<"%r343">; +def R344 : NVPTXReg<"%r344">; +def R345 : NVPTXReg<"%r345">; +def R346 : NVPTXReg<"%r346">; +def R347 : NVPTXReg<"%r347">; +def R348 : NVPTXReg<"%r348">; +def R349 : NVPTXReg<"%r349">; +def R350 : NVPTXReg<"%r350">; +def R351 : NVPTXReg<"%r351">; +def R352 : NVPTXReg<"%r352">; +def R353 : NVPTXReg<"%r353">; +def R354 : NVPTXReg<"%r354">; +def R355 : NVPTXReg<"%r355">; +def R356 : NVPTXReg<"%r356">; +def R357 : NVPTXReg<"%r357">; +def R358 : NVPTXReg<"%r358">; +def R359 : NVPTXReg<"%r359">; +def R360 : NVPTXReg<"%r360">; +def R361 : NVPTXReg<"%r361">; +def R362 : NVPTXReg<"%r362">; +def R363 : NVPTXReg<"%r363">; +def R364 : NVPTXReg<"%r364">; +def R365 : NVPTXReg<"%r365">; +def R366 : NVPTXReg<"%r366">; +def R367 : NVPTXReg<"%r367">; +def R368 : NVPTXReg<"%r368">; +def R369 : NVPTXReg<"%r369">; +def R370 : NVPTXReg<"%r370">; +def R371 : NVPTXReg<"%r371">; +def R372 : NVPTXReg<"%r372">; +def R373 : NVPTXReg<"%r373">; +def R374 : NVPTXReg<"%r374">; +def R375 : NVPTXReg<"%r375">; +def R376 : NVPTXReg<"%r376">; +def R377 : NVPTXReg<"%r377">; +def R378 : NVPTXReg<"%r378">; +def R379 : NVPTXReg<"%r379">; +def R380 : NVPTXReg<"%r380">; +def R381 : NVPTXReg<"%r381">; +def R382 : NVPTXReg<"%r382">; +def R383 : NVPTXReg<"%r383">; +def R384 : NVPTXReg<"%r384">; +def R385 : NVPTXReg<"%r385">; +def R386 : NVPTXReg<"%r386">; +def R387 : NVPTXReg<"%r387">; +def R388 : NVPTXReg<"%r388">; +def R389 : NVPTXReg<"%r389">; +def R390 : NVPTXReg<"%r390">; +def R391 : NVPTXReg<"%r391">; +def R392 : NVPTXReg<"%r392">; +def R393 : NVPTXReg<"%r393">; +def R394 : NVPTXReg<"%r394">; +def R395 : NVPTXReg<"%r395">; + +//===--- 64-bit -----------------------------------------------------------===// +def RL0 : NVPTXReg<"%rl0">; +def RL1 : NVPTXReg<"%rl1">; +def RL2 : NVPTXReg<"%rl2">; +def RL3 : NVPTXReg<"%rl3">; +def RL4 : NVPTXReg<"%rl4">; +def RL5 : NVPTXReg<"%rl5">; +def RL6 : NVPTXReg<"%rl6">; +def RL7 : NVPTXReg<"%rl7">; +def RL8 : NVPTXReg<"%rl8">; +def RL9 : NVPTXReg<"%rl9">; +def RL10 : NVPTXReg<"%rl10">; +def RL11 : NVPTXReg<"%rl11">; +def RL12 : NVPTXReg<"%rl12">; +def RL13 : NVPTXReg<"%rl13">; +def RL14 : NVPTXReg<"%rl14">; +def RL15 : NVPTXReg<"%rl15">; +def RL16 : NVPTXReg<"%rl16">; +def RL17 : NVPTXReg<"%rl17">; +def RL18 : NVPTXReg<"%rl18">; +def RL19 : NVPTXReg<"%rl19">; +def RL20 : NVPTXReg<"%rl20">; +def RL21 : NVPTXReg<"%rl21">; +def RL22 : NVPTXReg<"%rl22">; +def RL23 : NVPTXReg<"%rl23">; +def RL24 : NVPTXReg<"%rl24">; +def RL25 : NVPTXReg<"%rl25">; +def RL26 : NVPTXReg<"%rl26">; +def RL27 : NVPTXReg<"%rl27">; +def RL28 : NVPTXReg<"%rl28">; +def RL29 : NVPTXReg<"%rl29">; +def RL30 : NVPTXReg<"%rl30">; +def RL31 : NVPTXReg<"%rl31">; +def RL32 : NVPTXReg<"%rl32">; +def RL33 : NVPTXReg<"%rl33">; +def RL34 : NVPTXReg<"%rl34">; +def RL35 : NVPTXReg<"%rl35">; +def RL36 : NVPTXReg<"%rl36">; +def RL37 : NVPTXReg<"%rl37">; +def RL38 : NVPTXReg<"%rl38">; +def RL39 : NVPTXReg<"%rl39">; +def RL40 : NVPTXReg<"%rl40">; +def RL41 : NVPTXReg<"%rl41">; +def RL42 : NVPTXReg<"%rl42">; +def RL43 : NVPTXReg<"%rl43">; +def RL44 : NVPTXReg<"%rl44">; +def RL45 : NVPTXReg<"%rl45">; +def RL46 : NVPTXReg<"%rl46">; +def RL47 : NVPTXReg<"%rl47">; +def RL48 : NVPTXReg<"%rl48">; +def RL49 : NVPTXReg<"%rl49">; +def RL50 : NVPTXReg<"%rl50">; +def RL51 : NVPTXReg<"%rl51">; +def RL52 : NVPTXReg<"%rl52">; +def RL53 : NVPTXReg<"%rl53">; +def RL54 : NVPTXReg<"%rl54">; +def RL55 : NVPTXReg<"%rl55">; +def RL56 : NVPTXReg<"%rl56">; +def RL57 : NVPTXReg<"%rl57">; +def RL58 : NVPTXReg<"%rl58">; +def RL59 : NVPTXReg<"%rl59">; +def RL60 : NVPTXReg<"%rl60">; +def RL61 : NVPTXReg<"%rl61">; +def RL62 : NVPTXReg<"%rl62">; +def RL63 : NVPTXReg<"%rl63">; +def RL64 : NVPTXReg<"%rl64">; +def RL65 : NVPTXReg<"%rl65">; +def RL66 : NVPTXReg<"%rl66">; +def RL67 : NVPTXReg<"%rl67">; +def RL68 : NVPTXReg<"%rl68">; +def RL69 : NVPTXReg<"%rl69">; +def RL70 : NVPTXReg<"%rl70">; +def RL71 : NVPTXReg<"%rl71">; +def RL72 : NVPTXReg<"%rl72">; +def RL73 : NVPTXReg<"%rl73">; +def RL74 : NVPTXReg<"%rl74">; +def RL75 : NVPTXReg<"%rl75">; +def RL76 : NVPTXReg<"%rl76">; +def RL77 : NVPTXReg<"%rl77">; +def RL78 : NVPTXReg<"%rl78">; +def RL79 : NVPTXReg<"%rl79">; +def RL80 : NVPTXReg<"%rl80">; +def RL81 : NVPTXReg<"%rl81">; +def RL82 : NVPTXReg<"%rl82">; +def RL83 : NVPTXReg<"%rl83">; +def RL84 : NVPTXReg<"%rl84">; +def RL85 : NVPTXReg<"%rl85">; +def RL86 : NVPTXReg<"%rl86">; +def RL87 : NVPTXReg<"%rl87">; +def RL88 : NVPTXReg<"%rl88">; +def RL89 : NVPTXReg<"%rl89">; +def RL90 : NVPTXReg<"%rl90">; +def RL91 : NVPTXReg<"%rl91">; +def RL92 : NVPTXReg<"%rl92">; +def RL93 : NVPTXReg<"%rl93">; +def RL94 : NVPTXReg<"%rl94">; +def RL95 : NVPTXReg<"%rl95">; +def RL96 : NVPTXReg<"%rl96">; +def RL97 : NVPTXReg<"%rl97">; +def RL98 : NVPTXReg<"%rl98">; +def RL99 : NVPTXReg<"%rl99">; +def RL100 : NVPTXReg<"%rl100">; +def RL101 : NVPTXReg<"%rl101">; +def RL102 : NVPTXReg<"%rl102">; +def RL103 : NVPTXReg<"%rl103">; +def RL104 : NVPTXReg<"%rl104">; +def RL105 : NVPTXReg<"%rl105">; +def RL106 : NVPTXReg<"%rl106">; +def RL107 : NVPTXReg<"%rl107">; +def RL108 : NVPTXReg<"%rl108">; +def RL109 : NVPTXReg<"%rl109">; +def RL110 : NVPTXReg<"%rl110">; +def RL111 : NVPTXReg<"%rl111">; +def RL112 : NVPTXReg<"%rl112">; +def RL113 : NVPTXReg<"%rl113">; +def RL114 : NVPTXReg<"%rl114">; +def RL115 : NVPTXReg<"%rl115">; +def RL116 : NVPTXReg<"%rl116">; +def RL117 : NVPTXReg<"%rl117">; +def RL118 : NVPTXReg<"%rl118">; +def RL119 : NVPTXReg<"%rl119">; +def RL120 : NVPTXReg<"%rl120">; +def RL121 : NVPTXReg<"%rl121">; +def RL122 : NVPTXReg<"%rl122">; +def RL123 : NVPTXReg<"%rl123">; +def RL124 : NVPTXReg<"%rl124">; +def RL125 : NVPTXReg<"%rl125">; +def RL126 : NVPTXReg<"%rl126">; +def RL127 : NVPTXReg<"%rl127">; +def RL128 : NVPTXReg<"%rl128">; +def RL129 : NVPTXReg<"%rl129">; +def RL130 : NVPTXReg<"%rl130">; +def RL131 : NVPTXReg<"%rl131">; +def RL132 : NVPTXReg<"%rl132">; +def RL133 : NVPTXReg<"%rl133">; +def RL134 : NVPTXReg<"%rl134">; +def RL135 : NVPTXReg<"%rl135">; +def RL136 : NVPTXReg<"%rl136">; +def RL137 : NVPTXReg<"%rl137">; +def RL138 : NVPTXReg<"%rl138">; +def RL139 : NVPTXReg<"%rl139">; +def RL140 : NVPTXReg<"%rl140">; +def RL141 : NVPTXReg<"%rl141">; +def RL142 : NVPTXReg<"%rl142">; +def RL143 : NVPTXReg<"%rl143">; +def RL144 : NVPTXReg<"%rl144">; +def RL145 : NVPTXReg<"%rl145">; +def RL146 : NVPTXReg<"%rl146">; +def RL147 : NVPTXReg<"%rl147">; +def RL148 : NVPTXReg<"%rl148">; +def RL149 : NVPTXReg<"%rl149">; +def RL150 : NVPTXReg<"%rl150">; +def RL151 : NVPTXReg<"%rl151">; +def RL152 : NVPTXReg<"%rl152">; +def RL153 : NVPTXReg<"%rl153">; +def RL154 : NVPTXReg<"%rl154">; +def RL155 : NVPTXReg<"%rl155">; +def RL156 : NVPTXReg<"%rl156">; +def RL157 : NVPTXReg<"%rl157">; +def RL158 : NVPTXReg<"%rl158">; +def RL159 : NVPTXReg<"%rl159">; +def RL160 : NVPTXReg<"%rl160">; +def RL161 : NVPTXReg<"%rl161">; +def RL162 : NVPTXReg<"%rl162">; +def RL163 : NVPTXReg<"%rl163">; +def RL164 : NVPTXReg<"%rl164">; +def RL165 : NVPTXReg<"%rl165">; +def RL166 : NVPTXReg<"%rl166">; +def RL167 : NVPTXReg<"%rl167">; +def RL168 : NVPTXReg<"%rl168">; +def RL169 : NVPTXReg<"%rl169">; +def RL170 : NVPTXReg<"%rl170">; +def RL171 : NVPTXReg<"%rl171">; +def RL172 : NVPTXReg<"%rl172">; +def RL173 : NVPTXReg<"%rl173">; +def RL174 : NVPTXReg<"%rl174">; +def RL175 : NVPTXReg<"%rl175">; +def RL176 : NVPTXReg<"%rl176">; +def RL177 : NVPTXReg<"%rl177">; +def RL178 : NVPTXReg<"%rl178">; +def RL179 : NVPTXReg<"%rl179">; +def RL180 : NVPTXReg<"%rl180">; +def RL181 : NVPTXReg<"%rl181">; +def RL182 : NVPTXReg<"%rl182">; +def RL183 : NVPTXReg<"%rl183">; +def RL184 : NVPTXReg<"%rl184">; +def RL185 : NVPTXReg<"%rl185">; +def RL186 : NVPTXReg<"%rl186">; +def RL187 : NVPTXReg<"%rl187">; +def RL188 : NVPTXReg<"%rl188">; +def RL189 : NVPTXReg<"%rl189">; +def RL190 : NVPTXReg<"%rl190">; +def RL191 : NVPTXReg<"%rl191">; +def RL192 : NVPTXReg<"%rl192">; +def RL193 : NVPTXReg<"%rl193">; +def RL194 : NVPTXReg<"%rl194">; +def RL195 : NVPTXReg<"%rl195">; +def RL196 : NVPTXReg<"%rl196">; +def RL197 : NVPTXReg<"%rl197">; +def RL198 : NVPTXReg<"%rl198">; +def RL199 : NVPTXReg<"%rl199">; +def RL200 : NVPTXReg<"%rl200">; +def RL201 : NVPTXReg<"%rl201">; +def RL202 : NVPTXReg<"%rl202">; +def RL203 : NVPTXReg<"%rl203">; +def RL204 : NVPTXReg<"%rl204">; +def RL205 : NVPTXReg<"%rl205">; +def RL206 : NVPTXReg<"%rl206">; +def RL207 : NVPTXReg<"%rl207">; +def RL208 : NVPTXReg<"%rl208">; +def RL209 : NVPTXReg<"%rl209">; +def RL210 : NVPTXReg<"%rl210">; +def RL211 : NVPTXReg<"%rl211">; +def RL212 : NVPTXReg<"%rl212">; +def RL213 : NVPTXReg<"%rl213">; +def RL214 : NVPTXReg<"%rl214">; +def RL215 : NVPTXReg<"%rl215">; +def RL216 : NVPTXReg<"%rl216">; +def RL217 : NVPTXReg<"%rl217">; +def RL218 : NVPTXReg<"%rl218">; +def RL219 : NVPTXReg<"%rl219">; +def RL220 : NVPTXReg<"%rl220">; +def RL221 : NVPTXReg<"%rl221">; +def RL222 : NVPTXReg<"%rl222">; +def RL223 : NVPTXReg<"%rl223">; +def RL224 : NVPTXReg<"%rl224">; +def RL225 : NVPTXReg<"%rl225">; +def RL226 : NVPTXReg<"%rl226">; +def RL227 : NVPTXReg<"%rl227">; +def RL228 : NVPTXReg<"%rl228">; +def RL229 : NVPTXReg<"%rl229">; +def RL230 : NVPTXReg<"%rl230">; +def RL231 : NVPTXReg<"%rl231">; +def RL232 : NVPTXReg<"%rl232">; +def RL233 : NVPTXReg<"%rl233">; +def RL234 : NVPTXReg<"%rl234">; +def RL235 : NVPTXReg<"%rl235">; +def RL236 : NVPTXReg<"%rl236">; +def RL237 : NVPTXReg<"%rl237">; +def RL238 : NVPTXReg<"%rl238">; +def RL239 : NVPTXReg<"%rl239">; +def RL240 : NVPTXReg<"%rl240">; +def RL241 : NVPTXReg<"%rl241">; +def RL242 : NVPTXReg<"%rl242">; +def RL243 : NVPTXReg<"%rl243">; +def RL244 : NVPTXReg<"%rl244">; +def RL245 : NVPTXReg<"%rl245">; +def RL246 : NVPTXReg<"%rl246">; +def RL247 : NVPTXReg<"%rl247">; +def RL248 : NVPTXReg<"%rl248">; +def RL249 : NVPTXReg<"%rl249">; +def RL250 : NVPTXReg<"%rl250">; +def RL251 : NVPTXReg<"%rl251">; +def RL252 : NVPTXReg<"%rl252">; +def RL253 : NVPTXReg<"%rl253">; +def RL254 : NVPTXReg<"%rl254">; +def RL255 : NVPTXReg<"%rl255">; +def RL256 : NVPTXReg<"%rl256">; +def RL257 : NVPTXReg<"%rl257">; +def RL258 : NVPTXReg<"%rl258">; +def RL259 : NVPTXReg<"%rl259">; +def RL260 : NVPTXReg<"%rl260">; +def RL261 : NVPTXReg<"%rl261">; +def RL262 : NVPTXReg<"%rl262">; +def RL263 : NVPTXReg<"%rl263">; +def RL264 : NVPTXReg<"%rl264">; +def RL265 : NVPTXReg<"%rl265">; +def RL266 : NVPTXReg<"%rl266">; +def RL267 : NVPTXReg<"%rl267">; +def RL268 : NVPTXReg<"%rl268">; +def RL269 : NVPTXReg<"%rl269">; +def RL270 : NVPTXReg<"%rl270">; +def RL271 : NVPTXReg<"%rl271">; +def RL272 : NVPTXReg<"%rl272">; +def RL273 : NVPTXReg<"%rl273">; +def RL274 : NVPTXReg<"%rl274">; +def RL275 : NVPTXReg<"%rl275">; +def RL276 : NVPTXReg<"%rl276">; +def RL277 : NVPTXReg<"%rl277">; +def RL278 : NVPTXReg<"%rl278">; +def RL279 : NVPTXReg<"%rl279">; +def RL280 : NVPTXReg<"%rl280">; +def RL281 : NVPTXReg<"%rl281">; +def RL282 : NVPTXReg<"%rl282">; +def RL283 : NVPTXReg<"%rl283">; +def RL284 : NVPTXReg<"%rl284">; +def RL285 : NVPTXReg<"%rl285">; +def RL286 : NVPTXReg<"%rl286">; +def RL287 : NVPTXReg<"%rl287">; +def RL288 : NVPTXReg<"%rl288">; +def RL289 : NVPTXReg<"%rl289">; +def RL290 : NVPTXReg<"%rl290">; +def RL291 : NVPTXReg<"%rl291">; +def RL292 : NVPTXReg<"%rl292">; +def RL293 : NVPTXReg<"%rl293">; +def RL294 : NVPTXReg<"%rl294">; +def RL295 : NVPTXReg<"%rl295">; +def RL296 : NVPTXReg<"%rl296">; +def RL297 : NVPTXReg<"%rl297">; +def RL298 : NVPTXReg<"%rl298">; +def RL299 : NVPTXReg<"%rl299">; +def RL300 : NVPTXReg<"%rl300">; +def RL301 : NVPTXReg<"%rl301">; +def RL302 : NVPTXReg<"%rl302">; +def RL303 : NVPTXReg<"%rl303">; +def RL304 : NVPTXReg<"%rl304">; +def RL305 : NVPTXReg<"%rl305">; +def RL306 : NVPTXReg<"%rl306">; +def RL307 : NVPTXReg<"%rl307">; +def RL308 : NVPTXReg<"%rl308">; +def RL309 : NVPTXReg<"%rl309">; +def RL310 : NVPTXReg<"%rl310">; +def RL311 : NVPTXReg<"%rl311">; +def RL312 : NVPTXReg<"%rl312">; +def RL313 : NVPTXReg<"%rl313">; +def RL314 : NVPTXReg<"%rl314">; +def RL315 : NVPTXReg<"%rl315">; +def RL316 : NVPTXReg<"%rl316">; +def RL317 : NVPTXReg<"%rl317">; +def RL318 : NVPTXReg<"%rl318">; +def RL319 : NVPTXReg<"%rl319">; +def RL320 : NVPTXReg<"%rl320">; +def RL321 : NVPTXReg<"%rl321">; +def RL322 : NVPTXReg<"%rl322">; +def RL323 : NVPTXReg<"%rl323">; +def RL324 : NVPTXReg<"%rl324">; +def RL325 : NVPTXReg<"%rl325">; +def RL326 : NVPTXReg<"%rl326">; +def RL327 : NVPTXReg<"%rl327">; +def RL328 : NVPTXReg<"%rl328">; +def RL329 : NVPTXReg<"%rl329">; +def RL330 : NVPTXReg<"%rl330">; +def RL331 : NVPTXReg<"%rl331">; +def RL332 : NVPTXReg<"%rl332">; +def RL333 : NVPTXReg<"%rl333">; +def RL334 : NVPTXReg<"%rl334">; +def RL335 : NVPTXReg<"%rl335">; +def RL336 : NVPTXReg<"%rl336">; +def RL337 : NVPTXReg<"%rl337">; +def RL338 : NVPTXReg<"%rl338">; +def RL339 : NVPTXReg<"%rl339">; +def RL340 : NVPTXReg<"%rl340">; +def RL341 : NVPTXReg<"%rl341">; +def RL342 : NVPTXReg<"%rl342">; +def RL343 : NVPTXReg<"%rl343">; +def RL344 : NVPTXReg<"%rl344">; +def RL345 : NVPTXReg<"%rl345">; +def RL346 : NVPTXReg<"%rl346">; +def RL347 : NVPTXReg<"%rl347">; +def RL348 : NVPTXReg<"%rl348">; +def RL349 : NVPTXReg<"%rl349">; +def RL350 : NVPTXReg<"%rl350">; +def RL351 : NVPTXReg<"%rl351">; +def RL352 : NVPTXReg<"%rl352">; +def RL353 : NVPTXReg<"%rl353">; +def RL354 : NVPTXReg<"%rl354">; +def RL355 : NVPTXReg<"%rl355">; +def RL356 : NVPTXReg<"%rl356">; +def RL357 : NVPTXReg<"%rl357">; +def RL358 : NVPTXReg<"%rl358">; +def RL359 : NVPTXReg<"%rl359">; +def RL360 : NVPTXReg<"%rl360">; +def RL361 : NVPTXReg<"%rl361">; +def RL362 : NVPTXReg<"%rl362">; +def RL363 : NVPTXReg<"%rl363">; +def RL364 : NVPTXReg<"%rl364">; +def RL365 : NVPTXReg<"%rl365">; +def RL366 : NVPTXReg<"%rl366">; +def RL367 : NVPTXReg<"%rl367">; +def RL368 : NVPTXReg<"%rl368">; +def RL369 : NVPTXReg<"%rl369">; +def RL370 : NVPTXReg<"%rl370">; +def RL371 : NVPTXReg<"%rl371">; +def RL372 : NVPTXReg<"%rl372">; +def RL373 : NVPTXReg<"%rl373">; +def RL374 : NVPTXReg<"%rl374">; +def RL375 : NVPTXReg<"%rl375">; +def RL376 : NVPTXReg<"%rl376">; +def RL377 : NVPTXReg<"%rl377">; +def RL378 : NVPTXReg<"%rl378">; +def RL379 : NVPTXReg<"%rl379">; +def RL380 : NVPTXReg<"%rl380">; +def RL381 : NVPTXReg<"%rl381">; +def RL382 : NVPTXReg<"%rl382">; +def RL383 : NVPTXReg<"%rl383">; +def RL384 : NVPTXReg<"%rl384">; +def RL385 : NVPTXReg<"%rl385">; +def RL386 : NVPTXReg<"%rl386">; +def RL387 : NVPTXReg<"%rl387">; +def RL388 : NVPTXReg<"%rl388">; +def RL389 : NVPTXReg<"%rl389">; +def RL390 : NVPTXReg<"%rl390">; +def RL391 : NVPTXReg<"%rl391">; +def RL392 : NVPTXReg<"%rl392">; +def RL393 : NVPTXReg<"%rl393">; +def RL394 : NVPTXReg<"%rl394">; +def RL395 : NVPTXReg<"%rl395">; + +//===--- 32-bit float -----------------------------------------------------===// +def F0 : NVPTXReg<"%f0">; +def F1 : NVPTXReg<"%f1">; +def F2 : NVPTXReg<"%f2">; +def F3 : NVPTXReg<"%f3">; +def F4 : NVPTXReg<"%f4">; +def F5 : NVPTXReg<"%f5">; +def F6 : NVPTXReg<"%f6">; +def F7 : NVPTXReg<"%f7">; +def F8 : NVPTXReg<"%f8">; +def F9 : NVPTXReg<"%f9">; +def F10 : NVPTXReg<"%f10">; +def F11 : NVPTXReg<"%f11">; +def F12 : NVPTXReg<"%f12">; +def F13 : NVPTXReg<"%f13">; +def F14 : NVPTXReg<"%f14">; +def F15 : NVPTXReg<"%f15">; +def F16 : NVPTXReg<"%f16">; +def F17 : NVPTXReg<"%f17">; +def F18 : NVPTXReg<"%f18">; +def F19 : NVPTXReg<"%f19">; +def F20 : NVPTXReg<"%f20">; +def F21 : NVPTXReg<"%f21">; +def F22 : NVPTXReg<"%f22">; +def F23 : NVPTXReg<"%f23">; +def F24 : NVPTXReg<"%f24">; +def F25 : NVPTXReg<"%f25">; +def F26 : NVPTXReg<"%f26">; +def F27 : NVPTXReg<"%f27">; +def F28 : NVPTXReg<"%f28">; +def F29 : NVPTXReg<"%f29">; +def F30 : NVPTXReg<"%f30">; +def F31 : NVPTXReg<"%f31">; +def F32 : NVPTXReg<"%f32">; +def F33 : NVPTXReg<"%f33">; +def F34 : NVPTXReg<"%f34">; +def F35 : NVPTXReg<"%f35">; +def F36 : NVPTXReg<"%f36">; +def F37 : NVPTXReg<"%f37">; +def F38 : NVPTXReg<"%f38">; +def F39 : NVPTXReg<"%f39">; +def F40 : NVPTXReg<"%f40">; +def F41 : NVPTXReg<"%f41">; +def F42 : NVPTXReg<"%f42">; +def F43 : NVPTXReg<"%f43">; +def F44 : NVPTXReg<"%f44">; +def F45 : NVPTXReg<"%f45">; +def F46 : NVPTXReg<"%f46">; +def F47 : NVPTXReg<"%f47">; +def F48 : NVPTXReg<"%f48">; +def F49 : NVPTXReg<"%f49">; +def F50 : NVPTXReg<"%f50">; +def F51 : NVPTXReg<"%f51">; +def F52 : NVPTXReg<"%f52">; +def F53 : NVPTXReg<"%f53">; +def F54 : NVPTXReg<"%f54">; +def F55 : NVPTXReg<"%f55">; +def F56 : NVPTXReg<"%f56">; +def F57 : NVPTXReg<"%f57">; +def F58 : NVPTXReg<"%f58">; +def F59 : NVPTXReg<"%f59">; +def F60 : NVPTXReg<"%f60">; +def F61 : NVPTXReg<"%f61">; +def F62 : NVPTXReg<"%f62">; +def F63 : NVPTXReg<"%f63">; +def F64 : NVPTXReg<"%f64">; +def F65 : NVPTXReg<"%f65">; +def F66 : NVPTXReg<"%f66">; +def F67 : NVPTXReg<"%f67">; +def F68 : NVPTXReg<"%f68">; +def F69 : NVPTXReg<"%f69">; +def F70 : NVPTXReg<"%f70">; +def F71 : NVPTXReg<"%f71">; +def F72 : NVPTXReg<"%f72">; +def F73 : NVPTXReg<"%f73">; +def F74 : NVPTXReg<"%f74">; +def F75 : NVPTXReg<"%f75">; +def F76 : NVPTXReg<"%f76">; +def F77 : NVPTXReg<"%f77">; +def F78 : NVPTXReg<"%f78">; +def F79 : NVPTXReg<"%f79">; +def F80 : NVPTXReg<"%f80">; +def F81 : NVPTXReg<"%f81">; +def F82 : NVPTXReg<"%f82">; +def F83 : NVPTXReg<"%f83">; +def F84 : NVPTXReg<"%f84">; +def F85 : NVPTXReg<"%f85">; +def F86 : NVPTXReg<"%f86">; +def F87 : NVPTXReg<"%f87">; +def F88 : NVPTXReg<"%f88">; +def F89 : NVPTXReg<"%f89">; +def F90 : NVPTXReg<"%f90">; +def F91 : NVPTXReg<"%f91">; +def F92 : NVPTXReg<"%f92">; +def F93 : NVPTXReg<"%f93">; +def F94 : NVPTXReg<"%f94">; +def F95 : NVPTXReg<"%f95">; +def F96 : NVPTXReg<"%f96">; +def F97 : NVPTXReg<"%f97">; +def F98 : NVPTXReg<"%f98">; +def F99 : NVPTXReg<"%f99">; +def F100 : NVPTXReg<"%f100">; +def F101 : NVPTXReg<"%f101">; +def F102 : NVPTXReg<"%f102">; +def F103 : NVPTXReg<"%f103">; +def F104 : NVPTXReg<"%f104">; +def F105 : NVPTXReg<"%f105">; +def F106 : NVPTXReg<"%f106">; +def F107 : NVPTXReg<"%f107">; +def F108 : NVPTXReg<"%f108">; +def F109 : NVPTXReg<"%f109">; +def F110 : NVPTXReg<"%f110">; +def F111 : NVPTXReg<"%f111">; +def F112 : NVPTXReg<"%f112">; +def F113 : NVPTXReg<"%f113">; +def F114 : NVPTXReg<"%f114">; +def F115 : NVPTXReg<"%f115">; +def F116 : NVPTXReg<"%f116">; +def F117 : NVPTXReg<"%f117">; +def F118 : NVPTXReg<"%f118">; +def F119 : NVPTXReg<"%f119">; +def F120 : NVPTXReg<"%f120">; +def F121 : NVPTXReg<"%f121">; +def F122 : NVPTXReg<"%f122">; +def F123 : NVPTXReg<"%f123">; +def F124 : NVPTXReg<"%f124">; +def F125 : NVPTXReg<"%f125">; +def F126 : NVPTXReg<"%f126">; +def F127 : NVPTXReg<"%f127">; +def F128 : NVPTXReg<"%f128">; +def F129 : NVPTXReg<"%f129">; +def F130 : NVPTXReg<"%f130">; +def F131 : NVPTXReg<"%f131">; +def F132 : NVPTXReg<"%f132">; +def F133 : NVPTXReg<"%f133">; +def F134 : NVPTXReg<"%f134">; +def F135 : NVPTXReg<"%f135">; +def F136 : NVPTXReg<"%f136">; +def F137 : NVPTXReg<"%f137">; +def F138 : NVPTXReg<"%f138">; +def F139 : NVPTXReg<"%f139">; +def F140 : NVPTXReg<"%f140">; +def F141 : NVPTXReg<"%f141">; +def F142 : NVPTXReg<"%f142">; +def F143 : NVPTXReg<"%f143">; +def F144 : NVPTXReg<"%f144">; +def F145 : NVPTXReg<"%f145">; +def F146 : NVPTXReg<"%f146">; +def F147 : NVPTXReg<"%f147">; +def F148 : NVPTXReg<"%f148">; +def F149 : NVPTXReg<"%f149">; +def F150 : NVPTXReg<"%f150">; +def F151 : NVPTXReg<"%f151">; +def F152 : NVPTXReg<"%f152">; +def F153 : NVPTXReg<"%f153">; +def F154 : NVPTXReg<"%f154">; +def F155 : NVPTXReg<"%f155">; +def F156 : NVPTXReg<"%f156">; +def F157 : NVPTXReg<"%f157">; +def F158 : NVPTXReg<"%f158">; +def F159 : NVPTXReg<"%f159">; +def F160 : NVPTXReg<"%f160">; +def F161 : NVPTXReg<"%f161">; +def F162 : NVPTXReg<"%f162">; +def F163 : NVPTXReg<"%f163">; +def F164 : NVPTXReg<"%f164">; +def F165 : NVPTXReg<"%f165">; +def F166 : NVPTXReg<"%f166">; +def F167 : NVPTXReg<"%f167">; +def F168 : NVPTXReg<"%f168">; +def F169 : NVPTXReg<"%f169">; +def F170 : NVPTXReg<"%f170">; +def F171 : NVPTXReg<"%f171">; +def F172 : NVPTXReg<"%f172">; +def F173 : NVPTXReg<"%f173">; +def F174 : NVPTXReg<"%f174">; +def F175 : NVPTXReg<"%f175">; +def F176 : NVPTXReg<"%f176">; +def F177 : NVPTXReg<"%f177">; +def F178 : NVPTXReg<"%f178">; +def F179 : NVPTXReg<"%f179">; +def F180 : NVPTXReg<"%f180">; +def F181 : NVPTXReg<"%f181">; +def F182 : NVPTXReg<"%f182">; +def F183 : NVPTXReg<"%f183">; +def F184 : NVPTXReg<"%f184">; +def F185 : NVPTXReg<"%f185">; +def F186 : NVPTXReg<"%f186">; +def F187 : NVPTXReg<"%f187">; +def F188 : NVPTXReg<"%f188">; +def F189 : NVPTXReg<"%f189">; +def F190 : NVPTXReg<"%f190">; +def F191 : NVPTXReg<"%f191">; +def F192 : NVPTXReg<"%f192">; +def F193 : NVPTXReg<"%f193">; +def F194 : NVPTXReg<"%f194">; +def F195 : NVPTXReg<"%f195">; +def F196 : NVPTXReg<"%f196">; +def F197 : NVPTXReg<"%f197">; +def F198 : NVPTXReg<"%f198">; +def F199 : NVPTXReg<"%f199">; +def F200 : NVPTXReg<"%f200">; +def F201 : NVPTXReg<"%f201">; +def F202 : NVPTXReg<"%f202">; +def F203 : NVPTXReg<"%f203">; +def F204 : NVPTXReg<"%f204">; +def F205 : NVPTXReg<"%f205">; +def F206 : NVPTXReg<"%f206">; +def F207 : NVPTXReg<"%f207">; +def F208 : NVPTXReg<"%f208">; +def F209 : NVPTXReg<"%f209">; +def F210 : NVPTXReg<"%f210">; +def F211 : NVPTXReg<"%f211">; +def F212 : NVPTXReg<"%f212">; +def F213 : NVPTXReg<"%f213">; +def F214 : NVPTXReg<"%f214">; +def F215 : NVPTXReg<"%f215">; +def F216 : NVPTXReg<"%f216">; +def F217 : NVPTXReg<"%f217">; +def F218 : NVPTXReg<"%f218">; +def F219 : NVPTXReg<"%f219">; +def F220 : NVPTXReg<"%f220">; +def F221 : NVPTXReg<"%f221">; +def F222 : NVPTXReg<"%f222">; +def F223 : NVPTXReg<"%f223">; +def F224 : NVPTXReg<"%f224">; +def F225 : NVPTXReg<"%f225">; +def F226 : NVPTXReg<"%f226">; +def F227 : NVPTXReg<"%f227">; +def F228 : NVPTXReg<"%f228">; +def F229 : NVPTXReg<"%f229">; +def F230 : NVPTXReg<"%f230">; +def F231 : NVPTXReg<"%f231">; +def F232 : NVPTXReg<"%f232">; +def F233 : NVPTXReg<"%f233">; +def F234 : NVPTXReg<"%f234">; +def F235 : NVPTXReg<"%f235">; +def F236 : NVPTXReg<"%f236">; +def F237 : NVPTXReg<"%f237">; +def F238 : NVPTXReg<"%f238">; +def F239 : NVPTXReg<"%f239">; +def F240 : NVPTXReg<"%f240">; +def F241 : NVPTXReg<"%f241">; +def F242 : NVPTXReg<"%f242">; +def F243 : NVPTXReg<"%f243">; +def F244 : NVPTXReg<"%f244">; +def F245 : NVPTXReg<"%f245">; +def F246 : NVPTXReg<"%f246">; +def F247 : NVPTXReg<"%f247">; +def F248 : NVPTXReg<"%f248">; +def F249 : NVPTXReg<"%f249">; +def F250 : NVPTXReg<"%f250">; +def F251 : NVPTXReg<"%f251">; +def F252 : NVPTXReg<"%f252">; +def F253 : NVPTXReg<"%f253">; +def F254 : NVPTXReg<"%f254">; +def F255 : NVPTXReg<"%f255">; +def F256 : NVPTXReg<"%f256">; +def F257 : NVPTXReg<"%f257">; +def F258 : NVPTXReg<"%f258">; +def F259 : NVPTXReg<"%f259">; +def F260 : NVPTXReg<"%f260">; +def F261 : NVPTXReg<"%f261">; +def F262 : NVPTXReg<"%f262">; +def F263 : NVPTXReg<"%f263">; +def F264 : NVPTXReg<"%f264">; +def F265 : NVPTXReg<"%f265">; +def F266 : NVPTXReg<"%f266">; +def F267 : NVPTXReg<"%f267">; +def F268 : NVPTXReg<"%f268">; +def F269 : NVPTXReg<"%f269">; +def F270 : NVPTXReg<"%f270">; +def F271 : NVPTXReg<"%f271">; +def F272 : NVPTXReg<"%f272">; +def F273 : NVPTXReg<"%f273">; +def F274 : NVPTXReg<"%f274">; +def F275 : NVPTXReg<"%f275">; +def F276 : NVPTXReg<"%f276">; +def F277 : NVPTXReg<"%f277">; +def F278 : NVPTXReg<"%f278">; +def F279 : NVPTXReg<"%f279">; +def F280 : NVPTXReg<"%f280">; +def F281 : NVPTXReg<"%f281">; +def F282 : NVPTXReg<"%f282">; +def F283 : NVPTXReg<"%f283">; +def F284 : NVPTXReg<"%f284">; +def F285 : NVPTXReg<"%f285">; +def F286 : NVPTXReg<"%f286">; +def F287 : NVPTXReg<"%f287">; +def F288 : NVPTXReg<"%f288">; +def F289 : NVPTXReg<"%f289">; +def F290 : NVPTXReg<"%f290">; +def F291 : NVPTXReg<"%f291">; +def F292 : NVPTXReg<"%f292">; +def F293 : NVPTXReg<"%f293">; +def F294 : NVPTXReg<"%f294">; +def F295 : NVPTXReg<"%f295">; +def F296 : NVPTXReg<"%f296">; +def F297 : NVPTXReg<"%f297">; +def F298 : NVPTXReg<"%f298">; +def F299 : NVPTXReg<"%f299">; +def F300 : NVPTXReg<"%f300">; +def F301 : NVPTXReg<"%f301">; +def F302 : NVPTXReg<"%f302">; +def F303 : NVPTXReg<"%f303">; +def F304 : NVPTXReg<"%f304">; +def F305 : NVPTXReg<"%f305">; +def F306 : NVPTXReg<"%f306">; +def F307 : NVPTXReg<"%f307">; +def F308 : NVPTXReg<"%f308">; +def F309 : NVPTXReg<"%f309">; +def F310 : NVPTXReg<"%f310">; +def F311 : NVPTXReg<"%f311">; +def F312 : NVPTXReg<"%f312">; +def F313 : NVPTXReg<"%f313">; +def F314 : NVPTXReg<"%f314">; +def F315 : NVPTXReg<"%f315">; +def F316 : NVPTXReg<"%f316">; +def F317 : NVPTXReg<"%f317">; +def F318 : NVPTXReg<"%f318">; +def F319 : NVPTXReg<"%f319">; +def F320 : NVPTXReg<"%f320">; +def F321 : NVPTXReg<"%f321">; +def F322 : NVPTXReg<"%f322">; +def F323 : NVPTXReg<"%f323">; +def F324 : NVPTXReg<"%f324">; +def F325 : NVPTXReg<"%f325">; +def F326 : NVPTXReg<"%f326">; +def F327 : NVPTXReg<"%f327">; +def F328 : NVPTXReg<"%f328">; +def F329 : NVPTXReg<"%f329">; +def F330 : NVPTXReg<"%f330">; +def F331 : NVPTXReg<"%f331">; +def F332 : NVPTXReg<"%f332">; +def F333 : NVPTXReg<"%f333">; +def F334 : NVPTXReg<"%f334">; +def F335 : NVPTXReg<"%f335">; +def F336 : NVPTXReg<"%f336">; +def F337 : NVPTXReg<"%f337">; +def F338 : NVPTXReg<"%f338">; +def F339 : NVPTXReg<"%f339">; +def F340 : NVPTXReg<"%f340">; +def F341 : NVPTXReg<"%f341">; +def F342 : NVPTXReg<"%f342">; +def F343 : NVPTXReg<"%f343">; +def F344 : NVPTXReg<"%f344">; +def F345 : NVPTXReg<"%f345">; +def F346 : NVPTXReg<"%f346">; +def F347 : NVPTXReg<"%f347">; +def F348 : NVPTXReg<"%f348">; +def F349 : NVPTXReg<"%f349">; +def F350 : NVPTXReg<"%f350">; +def F351 : NVPTXReg<"%f351">; +def F352 : NVPTXReg<"%f352">; +def F353 : NVPTXReg<"%f353">; +def F354 : NVPTXReg<"%f354">; +def F355 : NVPTXReg<"%f355">; +def F356 : NVPTXReg<"%f356">; +def F357 : NVPTXReg<"%f357">; +def F358 : NVPTXReg<"%f358">; +def F359 : NVPTXReg<"%f359">; +def F360 : NVPTXReg<"%f360">; +def F361 : NVPTXReg<"%f361">; +def F362 : NVPTXReg<"%f362">; +def F363 : NVPTXReg<"%f363">; +def F364 : NVPTXReg<"%f364">; +def F365 : NVPTXReg<"%f365">; +def F366 : NVPTXReg<"%f366">; +def F367 : NVPTXReg<"%f367">; +def F368 : NVPTXReg<"%f368">; +def F369 : NVPTXReg<"%f369">; +def F370 : NVPTXReg<"%f370">; +def F371 : NVPTXReg<"%f371">; +def F372 : NVPTXReg<"%f372">; +def F373 : NVPTXReg<"%f373">; +def F374 : NVPTXReg<"%f374">; +def F375 : NVPTXReg<"%f375">; +def F376 : NVPTXReg<"%f376">; +def F377 : NVPTXReg<"%f377">; +def F378 : NVPTXReg<"%f378">; +def F379 : NVPTXReg<"%f379">; +def F380 : NVPTXReg<"%f380">; +def F381 : NVPTXReg<"%f381">; +def F382 : NVPTXReg<"%f382">; +def F383 : NVPTXReg<"%f383">; +def F384 : NVPTXReg<"%f384">; +def F385 : NVPTXReg<"%f385">; +def F386 : NVPTXReg<"%f386">; +def F387 : NVPTXReg<"%f387">; +def F388 : NVPTXReg<"%f388">; +def F389 : NVPTXReg<"%f389">; +def F390 : NVPTXReg<"%f390">; +def F391 : NVPTXReg<"%f391">; +def F392 : NVPTXReg<"%f392">; +def F393 : NVPTXReg<"%f393">; +def F394 : NVPTXReg<"%f394">; +def F395 : NVPTXReg<"%f395">; + +//===--- 64-bit float -----------------------------------------------------===// +def FL0 : NVPTXReg<"%fl0">; +def FL1 : NVPTXReg<"%fl1">; +def FL2 : NVPTXReg<"%fl2">; +def FL3 : NVPTXReg<"%fl3">; +def FL4 : NVPTXReg<"%fl4">; +def FL5 : NVPTXReg<"%fl5">; +def FL6 : NVPTXReg<"%fl6">; +def FL7 : NVPTXReg<"%fl7">; +def FL8 : NVPTXReg<"%fl8">; +def FL9 : NVPTXReg<"%fl9">; +def FL10 : NVPTXReg<"%fl10">; +def FL11 : NVPTXReg<"%fl11">; +def FL12 : NVPTXReg<"%fl12">; +def FL13 : NVPTXReg<"%fl13">; +def FL14 : NVPTXReg<"%fl14">; +def FL15 : NVPTXReg<"%fl15">; +def FL16 : NVPTXReg<"%fl16">; +def FL17 : NVPTXReg<"%fl17">; +def FL18 : NVPTXReg<"%fl18">; +def FL19 : NVPTXReg<"%fl19">; +def FL20 : NVPTXReg<"%fl20">; +def FL21 : NVPTXReg<"%fl21">; +def FL22 : NVPTXReg<"%fl22">; +def FL23 : NVPTXReg<"%fl23">; +def FL24 : NVPTXReg<"%fl24">; +def FL25 : NVPTXReg<"%fl25">; +def FL26 : NVPTXReg<"%fl26">; +def FL27 : NVPTXReg<"%fl27">; +def FL28 : NVPTXReg<"%fl28">; +def FL29 : NVPTXReg<"%fl29">; +def FL30 : NVPTXReg<"%fl30">; +def FL31 : NVPTXReg<"%fl31">; +def FL32 : NVPTXReg<"%fl32">; +def FL33 : NVPTXReg<"%fl33">; +def FL34 : NVPTXReg<"%fl34">; +def FL35 : NVPTXReg<"%fl35">; +def FL36 : NVPTXReg<"%fl36">; +def FL37 : NVPTXReg<"%fl37">; +def FL38 : NVPTXReg<"%fl38">; +def FL39 : NVPTXReg<"%fl39">; +def FL40 : NVPTXReg<"%fl40">; +def FL41 : NVPTXReg<"%fl41">; +def FL42 : NVPTXReg<"%fl42">; +def FL43 : NVPTXReg<"%fl43">; +def FL44 : NVPTXReg<"%fl44">; +def FL45 : NVPTXReg<"%fl45">; +def FL46 : NVPTXReg<"%fl46">; +def FL47 : NVPTXReg<"%fl47">; +def FL48 : NVPTXReg<"%fl48">; +def FL49 : NVPTXReg<"%fl49">; +def FL50 : NVPTXReg<"%fl50">; +def FL51 : NVPTXReg<"%fl51">; +def FL52 : NVPTXReg<"%fl52">; +def FL53 : NVPTXReg<"%fl53">; +def FL54 : NVPTXReg<"%fl54">; +def FL55 : NVPTXReg<"%fl55">; +def FL56 : NVPTXReg<"%fl56">; +def FL57 : NVPTXReg<"%fl57">; +def FL58 : NVPTXReg<"%fl58">; +def FL59 : NVPTXReg<"%fl59">; +def FL60 : NVPTXReg<"%fl60">; +def FL61 : NVPTXReg<"%fl61">; +def FL62 : NVPTXReg<"%fl62">; +def FL63 : NVPTXReg<"%fl63">; +def FL64 : NVPTXReg<"%fl64">; +def FL65 : NVPTXReg<"%fl65">; +def FL66 : NVPTXReg<"%fl66">; +def FL67 : NVPTXReg<"%fl67">; +def FL68 : NVPTXReg<"%fl68">; +def FL69 : NVPTXReg<"%fl69">; +def FL70 : NVPTXReg<"%fl70">; +def FL71 : NVPTXReg<"%fl71">; +def FL72 : NVPTXReg<"%fl72">; +def FL73 : NVPTXReg<"%fl73">; +def FL74 : NVPTXReg<"%fl74">; +def FL75 : NVPTXReg<"%fl75">; +def FL76 : NVPTXReg<"%fl76">; +def FL77 : NVPTXReg<"%fl77">; +def FL78 : NVPTXReg<"%fl78">; +def FL79 : NVPTXReg<"%fl79">; +def FL80 : NVPTXReg<"%fl80">; +def FL81 : NVPTXReg<"%fl81">; +def FL82 : NVPTXReg<"%fl82">; +def FL83 : NVPTXReg<"%fl83">; +def FL84 : NVPTXReg<"%fl84">; +def FL85 : NVPTXReg<"%fl85">; +def FL86 : NVPTXReg<"%fl86">; +def FL87 : NVPTXReg<"%fl87">; +def FL88 : NVPTXReg<"%fl88">; +def FL89 : NVPTXReg<"%fl89">; +def FL90 : NVPTXReg<"%fl90">; +def FL91 : NVPTXReg<"%fl91">; +def FL92 : NVPTXReg<"%fl92">; +def FL93 : NVPTXReg<"%fl93">; +def FL94 : NVPTXReg<"%fl94">; +def FL95 : NVPTXReg<"%fl95">; +def FL96 : NVPTXReg<"%fl96">; +def FL97 : NVPTXReg<"%fl97">; +def FL98 : NVPTXReg<"%fl98">; +def FL99 : NVPTXReg<"%fl99">; +def FL100 : NVPTXReg<"%fl100">; +def FL101 : NVPTXReg<"%fl101">; +def FL102 : NVPTXReg<"%fl102">; +def FL103 : NVPTXReg<"%fl103">; +def FL104 : NVPTXReg<"%fl104">; +def FL105 : NVPTXReg<"%fl105">; +def FL106 : NVPTXReg<"%fl106">; +def FL107 : NVPTXReg<"%fl107">; +def FL108 : NVPTXReg<"%fl108">; +def FL109 : NVPTXReg<"%fl109">; +def FL110 : NVPTXReg<"%fl110">; +def FL111 : NVPTXReg<"%fl111">; +def FL112 : NVPTXReg<"%fl112">; +def FL113 : NVPTXReg<"%fl113">; +def FL114 : NVPTXReg<"%fl114">; +def FL115 : NVPTXReg<"%fl115">; +def FL116 : NVPTXReg<"%fl116">; +def FL117 : NVPTXReg<"%fl117">; +def FL118 : NVPTXReg<"%fl118">; +def FL119 : NVPTXReg<"%fl119">; +def FL120 : NVPTXReg<"%fl120">; +def FL121 : NVPTXReg<"%fl121">; +def FL122 : NVPTXReg<"%fl122">; +def FL123 : NVPTXReg<"%fl123">; +def FL124 : NVPTXReg<"%fl124">; +def FL125 : NVPTXReg<"%fl125">; +def FL126 : NVPTXReg<"%fl126">; +def FL127 : NVPTXReg<"%fl127">; +def FL128 : NVPTXReg<"%fl128">; +def FL129 : NVPTXReg<"%fl129">; +def FL130 : NVPTXReg<"%fl130">; +def FL131 : NVPTXReg<"%fl131">; +def FL132 : NVPTXReg<"%fl132">; +def FL133 : NVPTXReg<"%fl133">; +def FL134 : NVPTXReg<"%fl134">; +def FL135 : NVPTXReg<"%fl135">; +def FL136 : NVPTXReg<"%fl136">; +def FL137 : NVPTXReg<"%fl137">; +def FL138 : NVPTXReg<"%fl138">; +def FL139 : NVPTXReg<"%fl139">; +def FL140 : NVPTXReg<"%fl140">; +def FL141 : NVPTXReg<"%fl141">; +def FL142 : NVPTXReg<"%fl142">; +def FL143 : NVPTXReg<"%fl143">; +def FL144 : NVPTXReg<"%fl144">; +def FL145 : NVPTXReg<"%fl145">; +def FL146 : NVPTXReg<"%fl146">; +def FL147 : NVPTXReg<"%fl147">; +def FL148 : NVPTXReg<"%fl148">; +def FL149 : NVPTXReg<"%fl149">; +def FL150 : NVPTXReg<"%fl150">; +def FL151 : NVPTXReg<"%fl151">; +def FL152 : NVPTXReg<"%fl152">; +def FL153 : NVPTXReg<"%fl153">; +def FL154 : NVPTXReg<"%fl154">; +def FL155 : NVPTXReg<"%fl155">; +def FL156 : NVPTXReg<"%fl156">; +def FL157 : NVPTXReg<"%fl157">; +def FL158 : NVPTXReg<"%fl158">; +def FL159 : NVPTXReg<"%fl159">; +def FL160 : NVPTXReg<"%fl160">; +def FL161 : NVPTXReg<"%fl161">; +def FL162 : NVPTXReg<"%fl162">; +def FL163 : NVPTXReg<"%fl163">; +def FL164 : NVPTXReg<"%fl164">; +def FL165 : NVPTXReg<"%fl165">; +def FL166 : NVPTXReg<"%fl166">; +def FL167 : NVPTXReg<"%fl167">; +def FL168 : NVPTXReg<"%fl168">; +def FL169 : NVPTXReg<"%fl169">; +def FL170 : NVPTXReg<"%fl170">; +def FL171 : NVPTXReg<"%fl171">; +def FL172 : NVPTXReg<"%fl172">; +def FL173 : NVPTXReg<"%fl173">; +def FL174 : NVPTXReg<"%fl174">; +def FL175 : NVPTXReg<"%fl175">; +def FL176 : NVPTXReg<"%fl176">; +def FL177 : NVPTXReg<"%fl177">; +def FL178 : NVPTXReg<"%fl178">; +def FL179 : NVPTXReg<"%fl179">; +def FL180 : NVPTXReg<"%fl180">; +def FL181 : NVPTXReg<"%fl181">; +def FL182 : NVPTXReg<"%fl182">; +def FL183 : NVPTXReg<"%fl183">; +def FL184 : NVPTXReg<"%fl184">; +def FL185 : NVPTXReg<"%fl185">; +def FL186 : NVPTXReg<"%fl186">; +def FL187 : NVPTXReg<"%fl187">; +def FL188 : NVPTXReg<"%fl188">; +def FL189 : NVPTXReg<"%fl189">; +def FL190 : NVPTXReg<"%fl190">; +def FL191 : NVPTXReg<"%fl191">; +def FL192 : NVPTXReg<"%fl192">; +def FL193 : NVPTXReg<"%fl193">; +def FL194 : NVPTXReg<"%fl194">; +def FL195 : NVPTXReg<"%fl195">; +def FL196 : NVPTXReg<"%fl196">; +def FL197 : NVPTXReg<"%fl197">; +def FL198 : NVPTXReg<"%fl198">; +def FL199 : NVPTXReg<"%fl199">; +def FL200 : NVPTXReg<"%fl200">; +def FL201 : NVPTXReg<"%fl201">; +def FL202 : NVPTXReg<"%fl202">; +def FL203 : NVPTXReg<"%fl203">; +def FL204 : NVPTXReg<"%fl204">; +def FL205 : NVPTXReg<"%fl205">; +def FL206 : NVPTXReg<"%fl206">; +def FL207 : NVPTXReg<"%fl207">; +def FL208 : NVPTXReg<"%fl208">; +def FL209 : NVPTXReg<"%fl209">; +def FL210 : NVPTXReg<"%fl210">; +def FL211 : NVPTXReg<"%fl211">; +def FL212 : NVPTXReg<"%fl212">; +def FL213 : NVPTXReg<"%fl213">; +def FL214 : NVPTXReg<"%fl214">; +def FL215 : NVPTXReg<"%fl215">; +def FL216 : NVPTXReg<"%fl216">; +def FL217 : NVPTXReg<"%fl217">; +def FL218 : NVPTXReg<"%fl218">; +def FL219 : NVPTXReg<"%fl219">; +def FL220 : NVPTXReg<"%fl220">; +def FL221 : NVPTXReg<"%fl221">; +def FL222 : NVPTXReg<"%fl222">; +def FL223 : NVPTXReg<"%fl223">; +def FL224 : NVPTXReg<"%fl224">; +def FL225 : NVPTXReg<"%fl225">; +def FL226 : NVPTXReg<"%fl226">; +def FL227 : NVPTXReg<"%fl227">; +def FL228 : NVPTXReg<"%fl228">; +def FL229 : NVPTXReg<"%fl229">; +def FL230 : NVPTXReg<"%fl230">; +def FL231 : NVPTXReg<"%fl231">; +def FL232 : NVPTXReg<"%fl232">; +def FL233 : NVPTXReg<"%fl233">; +def FL234 : NVPTXReg<"%fl234">; +def FL235 : NVPTXReg<"%fl235">; +def FL236 : NVPTXReg<"%fl236">; +def FL237 : NVPTXReg<"%fl237">; +def FL238 : NVPTXReg<"%fl238">; +def FL239 : NVPTXReg<"%fl239">; +def FL240 : NVPTXReg<"%fl240">; +def FL241 : NVPTXReg<"%fl241">; +def FL242 : NVPTXReg<"%fl242">; +def FL243 : NVPTXReg<"%fl243">; +def FL244 : NVPTXReg<"%fl244">; +def FL245 : NVPTXReg<"%fl245">; +def FL246 : NVPTXReg<"%fl246">; +def FL247 : NVPTXReg<"%fl247">; +def FL248 : NVPTXReg<"%fl248">; +def FL249 : NVPTXReg<"%fl249">; +def FL250 : NVPTXReg<"%fl250">; +def FL251 : NVPTXReg<"%fl251">; +def FL252 : NVPTXReg<"%fl252">; +def FL253 : NVPTXReg<"%fl253">; +def FL254 : NVPTXReg<"%fl254">; +def FL255 : NVPTXReg<"%fl255">; +def FL256 : NVPTXReg<"%fl256">; +def FL257 : NVPTXReg<"%fl257">; +def FL258 : NVPTXReg<"%fl258">; +def FL259 : NVPTXReg<"%fl259">; +def FL260 : NVPTXReg<"%fl260">; +def FL261 : NVPTXReg<"%fl261">; +def FL262 : NVPTXReg<"%fl262">; +def FL263 : NVPTXReg<"%fl263">; +def FL264 : NVPTXReg<"%fl264">; +def FL265 : NVPTXReg<"%fl265">; +def FL266 : NVPTXReg<"%fl266">; +def FL267 : NVPTXReg<"%fl267">; +def FL268 : NVPTXReg<"%fl268">; +def FL269 : NVPTXReg<"%fl269">; +def FL270 : NVPTXReg<"%fl270">; +def FL271 : NVPTXReg<"%fl271">; +def FL272 : NVPTXReg<"%fl272">; +def FL273 : NVPTXReg<"%fl273">; +def FL274 : NVPTXReg<"%fl274">; +def FL275 : NVPTXReg<"%fl275">; +def FL276 : NVPTXReg<"%fl276">; +def FL277 : NVPTXReg<"%fl277">; +def FL278 : NVPTXReg<"%fl278">; +def FL279 : NVPTXReg<"%fl279">; +def FL280 : NVPTXReg<"%fl280">; +def FL281 : NVPTXReg<"%fl281">; +def FL282 : NVPTXReg<"%fl282">; +def FL283 : NVPTXReg<"%fl283">; +def FL284 : NVPTXReg<"%fl284">; +def FL285 : NVPTXReg<"%fl285">; +def FL286 : NVPTXReg<"%fl286">; +def FL287 : NVPTXReg<"%fl287">; +def FL288 : NVPTXReg<"%fl288">; +def FL289 : NVPTXReg<"%fl289">; +def FL290 : NVPTXReg<"%fl290">; +def FL291 : NVPTXReg<"%fl291">; +def FL292 : NVPTXReg<"%fl292">; +def FL293 : NVPTXReg<"%fl293">; +def FL294 : NVPTXReg<"%fl294">; +def FL295 : NVPTXReg<"%fl295">; +def FL296 : NVPTXReg<"%fl296">; +def FL297 : NVPTXReg<"%fl297">; +def FL298 : NVPTXReg<"%fl298">; +def FL299 : NVPTXReg<"%fl299">; +def FL300 : NVPTXReg<"%fl300">; +def FL301 : NVPTXReg<"%fl301">; +def FL302 : NVPTXReg<"%fl302">; +def FL303 : NVPTXReg<"%fl303">; +def FL304 : NVPTXReg<"%fl304">; +def FL305 : NVPTXReg<"%fl305">; +def FL306 : NVPTXReg<"%fl306">; +def FL307 : NVPTXReg<"%fl307">; +def FL308 : NVPTXReg<"%fl308">; +def FL309 : NVPTXReg<"%fl309">; +def FL310 : NVPTXReg<"%fl310">; +def FL311 : NVPTXReg<"%fl311">; +def FL312 : NVPTXReg<"%fl312">; +def FL313 : NVPTXReg<"%fl313">; +def FL314 : NVPTXReg<"%fl314">; +def FL315 : NVPTXReg<"%fl315">; +def FL316 : NVPTXReg<"%fl316">; +def FL317 : NVPTXReg<"%fl317">; +def FL318 : NVPTXReg<"%fl318">; +def FL319 : NVPTXReg<"%fl319">; +def FL320 : NVPTXReg<"%fl320">; +def FL321 : NVPTXReg<"%fl321">; +def FL322 : NVPTXReg<"%fl322">; +def FL323 : NVPTXReg<"%fl323">; +def FL324 : NVPTXReg<"%fl324">; +def FL325 : NVPTXReg<"%fl325">; +def FL326 : NVPTXReg<"%fl326">; +def FL327 : NVPTXReg<"%fl327">; +def FL328 : NVPTXReg<"%fl328">; +def FL329 : NVPTXReg<"%fl329">; +def FL330 : NVPTXReg<"%fl330">; +def FL331 : NVPTXReg<"%fl331">; +def FL332 : NVPTXReg<"%fl332">; +def FL333 : NVPTXReg<"%fl333">; +def FL334 : NVPTXReg<"%fl334">; +def FL335 : NVPTXReg<"%fl335">; +def FL336 : NVPTXReg<"%fl336">; +def FL337 : NVPTXReg<"%fl337">; +def FL338 : NVPTXReg<"%fl338">; +def FL339 : NVPTXReg<"%fl339">; +def FL340 : NVPTXReg<"%fl340">; +def FL341 : NVPTXReg<"%fl341">; +def FL342 : NVPTXReg<"%fl342">; +def FL343 : NVPTXReg<"%fl343">; +def FL344 : NVPTXReg<"%fl344">; +def FL345 : NVPTXReg<"%fl345">; +def FL346 : NVPTXReg<"%fl346">; +def FL347 : NVPTXReg<"%fl347">; +def FL348 : NVPTXReg<"%fl348">; +def FL349 : NVPTXReg<"%fl349">; +def FL350 : NVPTXReg<"%fl350">; +def FL351 : NVPTXReg<"%fl351">; +def FL352 : NVPTXReg<"%fl352">; +def FL353 : NVPTXReg<"%fl353">; +def FL354 : NVPTXReg<"%fl354">; +def FL355 : NVPTXReg<"%fl355">; +def FL356 : NVPTXReg<"%fl356">; +def FL357 : NVPTXReg<"%fl357">; +def FL358 : NVPTXReg<"%fl358">; +def FL359 : NVPTXReg<"%fl359">; +def FL360 : NVPTXReg<"%fl360">; +def FL361 : NVPTXReg<"%fl361">; +def FL362 : NVPTXReg<"%fl362">; +def FL363 : NVPTXReg<"%fl363">; +def FL364 : NVPTXReg<"%fl364">; +def FL365 : NVPTXReg<"%fl365">; +def FL366 : NVPTXReg<"%fl366">; +def FL367 : NVPTXReg<"%fl367">; +def FL368 : NVPTXReg<"%fl368">; +def FL369 : NVPTXReg<"%fl369">; +def FL370 : NVPTXReg<"%fl370">; +def FL371 : NVPTXReg<"%fl371">; +def FL372 : NVPTXReg<"%fl372">; +def FL373 : NVPTXReg<"%fl373">; +def FL374 : NVPTXReg<"%fl374">; +def FL375 : NVPTXReg<"%fl375">; +def FL376 : NVPTXReg<"%fl376">; +def FL377 : NVPTXReg<"%fl377">; +def FL378 : NVPTXReg<"%fl378">; +def FL379 : NVPTXReg<"%fl379">; +def FL380 : NVPTXReg<"%fl380">; +def FL381 : NVPTXReg<"%fl381">; +def FL382 : NVPTXReg<"%fl382">; +def FL383 : NVPTXReg<"%fl383">; +def FL384 : NVPTXReg<"%fl384">; +def FL385 : NVPTXReg<"%fl385">; +def FL386 : NVPTXReg<"%fl386">; +def FL387 : NVPTXReg<"%fl387">; +def FL388 : NVPTXReg<"%fl388">; +def FL389 : NVPTXReg<"%fl389">; +def FL390 : NVPTXReg<"%fl390">; +def FL391 : NVPTXReg<"%fl391">; +def FL392 : NVPTXReg<"%fl392">; +def FL393 : NVPTXReg<"%fl393">; +def FL394 : NVPTXReg<"%fl394">; +def FL395 : NVPTXReg<"%fl395">; + +//===--- Vector -----------------------------------------------------------===// +def v2b8_0 : NVPTXReg<"%v2b8_0">; +def v2b8_1 : NVPTXReg<"%v2b8_1">; +def v2b8_2 : NVPTXReg<"%v2b8_2">; +def v2b8_3 : NVPTXReg<"%v2b8_3">; +def v2b8_4 : NVPTXReg<"%v2b8_4">; +def v2b8_5 : NVPTXReg<"%v2b8_5">; +def v2b8_6 : NVPTXReg<"%v2b8_6">; +def v2b8_7 : NVPTXReg<"%v2b8_7">; +def v2b8_8 : NVPTXReg<"%v2b8_8">; +def v2b8_9 : NVPTXReg<"%v2b8_9">; +def v2b8_10 : NVPTXReg<"%v2b8_10">; +def v2b8_11 : NVPTXReg<"%v2b8_11">; +def v2b8_12 : NVPTXReg<"%v2b8_12">; +def v2b8_13 : NVPTXReg<"%v2b8_13">; +def v2b8_14 : NVPTXReg<"%v2b8_14">; +def v2b8_15 : NVPTXReg<"%v2b8_15">; +def v2b8_16 : NVPTXReg<"%v2b8_16">; +def v2b8_17 : NVPTXReg<"%v2b8_17">; +def v2b8_18 : NVPTXReg<"%v2b8_18">; +def v2b8_19 : NVPTXReg<"%v2b8_19">; +def v2b8_20 : NVPTXReg<"%v2b8_20">; +def v2b8_21 : NVPTXReg<"%v2b8_21">; +def v2b8_22 : NVPTXReg<"%v2b8_22">; +def v2b8_23 : NVPTXReg<"%v2b8_23">; +def v2b8_24 : NVPTXReg<"%v2b8_24">; +def v2b8_25 : NVPTXReg<"%v2b8_25">; +def v2b8_26 : NVPTXReg<"%v2b8_26">; +def v2b8_27 : NVPTXReg<"%v2b8_27">; +def v2b8_28 : NVPTXReg<"%v2b8_28">; +def v2b8_29 : NVPTXReg<"%v2b8_29">; +def v2b8_30 : NVPTXReg<"%v2b8_30">; +def v2b8_31 : NVPTXReg<"%v2b8_31">; +def v2b8_32 : NVPTXReg<"%v2b8_32">; +def v2b8_33 : NVPTXReg<"%v2b8_33">; +def v2b8_34 : NVPTXReg<"%v2b8_34">; +def v2b8_35 : NVPTXReg<"%v2b8_35">; +def v2b8_36 : NVPTXReg<"%v2b8_36">; +def v2b8_37 : NVPTXReg<"%v2b8_37">; +def v2b8_38 : NVPTXReg<"%v2b8_38">; +def v2b8_39 : NVPTXReg<"%v2b8_39">; +def v2b8_40 : NVPTXReg<"%v2b8_40">; +def v2b8_41 : NVPTXReg<"%v2b8_41">; +def v2b8_42 : NVPTXReg<"%v2b8_42">; +def v2b8_43 : NVPTXReg<"%v2b8_43">; +def v2b8_44 : NVPTXReg<"%v2b8_44">; +def v2b8_45 : NVPTXReg<"%v2b8_45">; +def v2b8_46 : NVPTXReg<"%v2b8_46">; +def v2b8_47 : NVPTXReg<"%v2b8_47">; +def v2b8_48 : NVPTXReg<"%v2b8_48">; +def v2b8_49 : NVPTXReg<"%v2b8_49">; +def v2b8_50 : NVPTXReg<"%v2b8_50">; +def v2b8_51 : NVPTXReg<"%v2b8_51">; +def v2b8_52 : NVPTXReg<"%v2b8_52">; +def v2b8_53 : NVPTXReg<"%v2b8_53">; +def v2b8_54 : NVPTXReg<"%v2b8_54">; +def v2b8_55 : NVPTXReg<"%v2b8_55">; +def v2b8_56 : NVPTXReg<"%v2b8_56">; +def v2b8_57 : NVPTXReg<"%v2b8_57">; +def v2b8_58 : NVPTXReg<"%v2b8_58">; +def v2b8_59 : NVPTXReg<"%v2b8_59">; +def v2b8_60 : NVPTXReg<"%v2b8_60">; +def v2b8_61 : NVPTXReg<"%v2b8_61">; +def v2b8_62 : NVPTXReg<"%v2b8_62">; +def v2b8_63 : NVPTXReg<"%v2b8_63">; +def v2b8_64 : NVPTXReg<"%v2b8_64">; +def v2b8_65 : NVPTXReg<"%v2b8_65">; +def v2b8_66 : NVPTXReg<"%v2b8_66">; +def v2b8_67 : NVPTXReg<"%v2b8_67">; +def v2b8_68 : NVPTXReg<"%v2b8_68">; +def v2b8_69 : NVPTXReg<"%v2b8_69">; +def v2b8_70 : NVPTXReg<"%v2b8_70">; +def v2b8_71 : NVPTXReg<"%v2b8_71">; +def v2b8_72 : NVPTXReg<"%v2b8_72">; +def v2b8_73 : NVPTXReg<"%v2b8_73">; +def v2b8_74 : NVPTXReg<"%v2b8_74">; +def v2b8_75 : NVPTXReg<"%v2b8_75">; +def v2b8_76 : NVPTXReg<"%v2b8_76">; +def v2b8_77 : NVPTXReg<"%v2b8_77">; +def v2b8_78 : NVPTXReg<"%v2b8_78">; +def v2b8_79 : NVPTXReg<"%v2b8_79">; +def v2b8_80 : NVPTXReg<"%v2b8_80">; +def v2b8_81 : NVPTXReg<"%v2b8_81">; +def v2b8_82 : NVPTXReg<"%v2b8_82">; +def v2b8_83 : NVPTXReg<"%v2b8_83">; +def v2b8_84 : NVPTXReg<"%v2b8_84">; +def v2b8_85 : NVPTXReg<"%v2b8_85">; +def v2b8_86 : NVPTXReg<"%v2b8_86">; +def v2b8_87 : NVPTXReg<"%v2b8_87">; +def v2b8_88 : NVPTXReg<"%v2b8_88">; +def v2b8_89 : NVPTXReg<"%v2b8_89">; +def v2b8_90 : NVPTXReg<"%v2b8_90">; +def v2b8_91 : NVPTXReg<"%v2b8_91">; +def v2b8_92 : NVPTXReg<"%v2b8_92">; +def v2b8_93 : NVPTXReg<"%v2b8_93">; +def v2b8_94 : NVPTXReg<"%v2b8_94">; +def v2b8_95 : NVPTXReg<"%v2b8_95">; +def v2b8_96 : NVPTXReg<"%v2b8_96">; +def v2b8_97 : NVPTXReg<"%v2b8_97">; +def v2b8_98 : NVPTXReg<"%v2b8_98">; +def v2b8_99 : NVPTXReg<"%v2b8_99">; +def v2b8_100 : NVPTXReg<"%v2b8_100">; +def v2b8_101 : NVPTXReg<"%v2b8_101">; +def v2b8_102 : NVPTXReg<"%v2b8_102">; +def v2b8_103 : NVPTXReg<"%v2b8_103">; +def v2b8_104 : NVPTXReg<"%v2b8_104">; +def v2b8_105 : NVPTXReg<"%v2b8_105">; +def v2b8_106 : NVPTXReg<"%v2b8_106">; +def v2b8_107 : NVPTXReg<"%v2b8_107">; +def v2b8_108 : NVPTXReg<"%v2b8_108">; +def v2b8_109 : NVPTXReg<"%v2b8_109">; +def v2b8_110 : NVPTXReg<"%v2b8_110">; +def v2b8_111 : NVPTXReg<"%v2b8_111">; +def v2b8_112 : NVPTXReg<"%v2b8_112">; +def v2b8_113 : NVPTXReg<"%v2b8_113">; +def v2b8_114 : NVPTXReg<"%v2b8_114">; +def v2b8_115 : NVPTXReg<"%v2b8_115">; +def v2b8_116 : NVPTXReg<"%v2b8_116">; +def v2b8_117 : NVPTXReg<"%v2b8_117">; +def v2b8_118 : NVPTXReg<"%v2b8_118">; +def v2b8_119 : NVPTXReg<"%v2b8_119">; +def v2b8_120 : NVPTXReg<"%v2b8_120">; +def v2b8_121 : NVPTXReg<"%v2b8_121">; +def v2b8_122 : NVPTXReg<"%v2b8_122">; +def v2b8_123 : NVPTXReg<"%v2b8_123">; +def v2b8_124 : NVPTXReg<"%v2b8_124">; +def v2b8_125 : NVPTXReg<"%v2b8_125">; +def v2b8_126 : NVPTXReg<"%v2b8_126">; +def v2b8_127 : NVPTXReg<"%v2b8_127">; +def v2b8_128 : NVPTXReg<"%v2b8_128">; +def v2b8_129 : NVPTXReg<"%v2b8_129">; +def v2b8_130 : NVPTXReg<"%v2b8_130">; +def v2b8_131 : NVPTXReg<"%v2b8_131">; +def v2b8_132 : NVPTXReg<"%v2b8_132">; +def v2b8_133 : NVPTXReg<"%v2b8_133">; +def v2b8_134 : NVPTXReg<"%v2b8_134">; +def v2b8_135 : NVPTXReg<"%v2b8_135">; +def v2b8_136 : NVPTXReg<"%v2b8_136">; +def v2b8_137 : NVPTXReg<"%v2b8_137">; +def v2b8_138 : NVPTXReg<"%v2b8_138">; +def v2b8_139 : NVPTXReg<"%v2b8_139">; +def v2b8_140 : NVPTXReg<"%v2b8_140">; +def v2b8_141 : NVPTXReg<"%v2b8_141">; +def v2b8_142 : NVPTXReg<"%v2b8_142">; +def v2b8_143 : NVPTXReg<"%v2b8_143">; +def v2b8_144 : NVPTXReg<"%v2b8_144">; +def v2b8_145 : NVPTXReg<"%v2b8_145">; +def v2b8_146 : NVPTXReg<"%v2b8_146">; +def v2b8_147 : NVPTXReg<"%v2b8_147">; +def v2b8_148 : NVPTXReg<"%v2b8_148">; +def v2b8_149 : NVPTXReg<"%v2b8_149">; +def v2b8_150 : NVPTXReg<"%v2b8_150">; +def v2b8_151 : NVPTXReg<"%v2b8_151">; +def v2b8_152 : NVPTXReg<"%v2b8_152">; +def v2b8_153 : NVPTXReg<"%v2b8_153">; +def v2b8_154 : NVPTXReg<"%v2b8_154">; +def v2b8_155 : NVPTXReg<"%v2b8_155">; +def v2b8_156 : NVPTXReg<"%v2b8_156">; +def v2b8_157 : NVPTXReg<"%v2b8_157">; +def v2b8_158 : NVPTXReg<"%v2b8_158">; +def v2b8_159 : NVPTXReg<"%v2b8_159">; +def v2b8_160 : NVPTXReg<"%v2b8_160">; +def v2b8_161 : NVPTXReg<"%v2b8_161">; +def v2b8_162 : NVPTXReg<"%v2b8_162">; +def v2b8_163 : NVPTXReg<"%v2b8_163">; +def v2b8_164 : NVPTXReg<"%v2b8_164">; +def v2b8_165 : NVPTXReg<"%v2b8_165">; +def v2b8_166 : NVPTXReg<"%v2b8_166">; +def v2b8_167 : NVPTXReg<"%v2b8_167">; +def v2b8_168 : NVPTXReg<"%v2b8_168">; +def v2b8_169 : NVPTXReg<"%v2b8_169">; +def v2b8_170 : NVPTXReg<"%v2b8_170">; +def v2b8_171 : NVPTXReg<"%v2b8_171">; +def v2b8_172 : NVPTXReg<"%v2b8_172">; +def v2b8_173 : NVPTXReg<"%v2b8_173">; +def v2b8_174 : NVPTXReg<"%v2b8_174">; +def v2b8_175 : NVPTXReg<"%v2b8_175">; +def v2b8_176 : NVPTXReg<"%v2b8_176">; +def v2b8_177 : NVPTXReg<"%v2b8_177">; +def v2b8_178 : NVPTXReg<"%v2b8_178">; +def v2b8_179 : NVPTXReg<"%v2b8_179">; +def v2b8_180 : NVPTXReg<"%v2b8_180">; +def v2b8_181 : NVPTXReg<"%v2b8_181">; +def v2b8_182 : NVPTXReg<"%v2b8_182">; +def v2b8_183 : NVPTXReg<"%v2b8_183">; +def v2b8_184 : NVPTXReg<"%v2b8_184">; +def v2b8_185 : NVPTXReg<"%v2b8_185">; +def v2b8_186 : NVPTXReg<"%v2b8_186">; +def v2b8_187 : NVPTXReg<"%v2b8_187">; +def v2b8_188 : NVPTXReg<"%v2b8_188">; +def v2b8_189 : NVPTXReg<"%v2b8_189">; +def v2b8_190 : NVPTXReg<"%v2b8_190">; +def v2b8_191 : NVPTXReg<"%v2b8_191">; +def v2b8_192 : NVPTXReg<"%v2b8_192">; +def v2b8_193 : NVPTXReg<"%v2b8_193">; +def v2b8_194 : NVPTXReg<"%v2b8_194">; +def v2b8_195 : NVPTXReg<"%v2b8_195">; +def v2b8_196 : NVPTXReg<"%v2b8_196">; +def v2b8_197 : NVPTXReg<"%v2b8_197">; +def v2b8_198 : NVPTXReg<"%v2b8_198">; +def v2b8_199 : NVPTXReg<"%v2b8_199">; +def v2b8_200 : NVPTXReg<"%v2b8_200">; +def v2b8_201 : NVPTXReg<"%v2b8_201">; +def v2b8_202 : NVPTXReg<"%v2b8_202">; +def v2b8_203 : NVPTXReg<"%v2b8_203">; +def v2b8_204 : NVPTXReg<"%v2b8_204">; +def v2b8_205 : NVPTXReg<"%v2b8_205">; +def v2b8_206 : NVPTXReg<"%v2b8_206">; +def v2b8_207 : NVPTXReg<"%v2b8_207">; +def v2b8_208 : NVPTXReg<"%v2b8_208">; +def v2b8_209 : NVPTXReg<"%v2b8_209">; +def v2b8_210 : NVPTXReg<"%v2b8_210">; +def v2b8_211 : NVPTXReg<"%v2b8_211">; +def v2b8_212 : NVPTXReg<"%v2b8_212">; +def v2b8_213 : NVPTXReg<"%v2b8_213">; +def v2b8_214 : NVPTXReg<"%v2b8_214">; +def v2b8_215 : NVPTXReg<"%v2b8_215">; +def v2b8_216 : NVPTXReg<"%v2b8_216">; +def v2b8_217 : NVPTXReg<"%v2b8_217">; +def v2b8_218 : NVPTXReg<"%v2b8_218">; +def v2b8_219 : NVPTXReg<"%v2b8_219">; +def v2b8_220 : NVPTXReg<"%v2b8_220">; +def v2b8_221 : NVPTXReg<"%v2b8_221">; +def v2b8_222 : NVPTXReg<"%v2b8_222">; +def v2b8_223 : NVPTXReg<"%v2b8_223">; +def v2b8_224 : NVPTXReg<"%v2b8_224">; +def v2b8_225 : NVPTXReg<"%v2b8_225">; +def v2b8_226 : NVPTXReg<"%v2b8_226">; +def v2b8_227 : NVPTXReg<"%v2b8_227">; +def v2b8_228 : NVPTXReg<"%v2b8_228">; +def v2b8_229 : NVPTXReg<"%v2b8_229">; +def v2b8_230 : NVPTXReg<"%v2b8_230">; +def v2b8_231 : NVPTXReg<"%v2b8_231">; +def v2b8_232 : NVPTXReg<"%v2b8_232">; +def v2b8_233 : NVPTXReg<"%v2b8_233">; +def v2b8_234 : NVPTXReg<"%v2b8_234">; +def v2b8_235 : NVPTXReg<"%v2b8_235">; +def v2b8_236 : NVPTXReg<"%v2b8_236">; +def v2b8_237 : NVPTXReg<"%v2b8_237">; +def v2b8_238 : NVPTXReg<"%v2b8_238">; +def v2b8_239 : NVPTXReg<"%v2b8_239">; +def v2b8_240 : NVPTXReg<"%v2b8_240">; +def v2b8_241 : NVPTXReg<"%v2b8_241">; +def v2b8_242 : NVPTXReg<"%v2b8_242">; +def v2b8_243 : NVPTXReg<"%v2b8_243">; +def v2b8_244 : NVPTXReg<"%v2b8_244">; +def v2b8_245 : NVPTXReg<"%v2b8_245">; +def v2b8_246 : NVPTXReg<"%v2b8_246">; +def v2b8_247 : NVPTXReg<"%v2b8_247">; +def v2b8_248 : NVPTXReg<"%v2b8_248">; +def v2b8_249 : NVPTXReg<"%v2b8_249">; +def v2b8_250 : NVPTXReg<"%v2b8_250">; +def v2b8_251 : NVPTXReg<"%v2b8_251">; +def v2b8_252 : NVPTXReg<"%v2b8_252">; +def v2b8_253 : NVPTXReg<"%v2b8_253">; +def v2b8_254 : NVPTXReg<"%v2b8_254">; +def v2b8_255 : NVPTXReg<"%v2b8_255">; +def v2b8_256 : NVPTXReg<"%v2b8_256">; +def v2b8_257 : NVPTXReg<"%v2b8_257">; +def v2b8_258 : NVPTXReg<"%v2b8_258">; +def v2b8_259 : NVPTXReg<"%v2b8_259">; +def v2b8_260 : NVPTXReg<"%v2b8_260">; +def v2b8_261 : NVPTXReg<"%v2b8_261">; +def v2b8_262 : NVPTXReg<"%v2b8_262">; +def v2b8_263 : NVPTXReg<"%v2b8_263">; +def v2b8_264 : NVPTXReg<"%v2b8_264">; +def v2b8_265 : NVPTXReg<"%v2b8_265">; +def v2b8_266 : NVPTXReg<"%v2b8_266">; +def v2b8_267 : NVPTXReg<"%v2b8_267">; +def v2b8_268 : NVPTXReg<"%v2b8_268">; +def v2b8_269 : NVPTXReg<"%v2b8_269">; +def v2b8_270 : NVPTXReg<"%v2b8_270">; +def v2b8_271 : NVPTXReg<"%v2b8_271">; +def v2b8_272 : NVPTXReg<"%v2b8_272">; +def v2b8_273 : NVPTXReg<"%v2b8_273">; +def v2b8_274 : NVPTXReg<"%v2b8_274">; +def v2b8_275 : NVPTXReg<"%v2b8_275">; +def v2b8_276 : NVPTXReg<"%v2b8_276">; +def v2b8_277 : NVPTXReg<"%v2b8_277">; +def v2b8_278 : NVPTXReg<"%v2b8_278">; +def v2b8_279 : NVPTXReg<"%v2b8_279">; +def v2b8_280 : NVPTXReg<"%v2b8_280">; +def v2b8_281 : NVPTXReg<"%v2b8_281">; +def v2b8_282 : NVPTXReg<"%v2b8_282">; +def v2b8_283 : NVPTXReg<"%v2b8_283">; +def v2b8_284 : NVPTXReg<"%v2b8_284">; +def v2b8_285 : NVPTXReg<"%v2b8_285">; +def v2b8_286 : NVPTXReg<"%v2b8_286">; +def v2b8_287 : NVPTXReg<"%v2b8_287">; +def v2b8_288 : NVPTXReg<"%v2b8_288">; +def v2b8_289 : NVPTXReg<"%v2b8_289">; +def v2b8_290 : NVPTXReg<"%v2b8_290">; +def v2b8_291 : NVPTXReg<"%v2b8_291">; +def v2b8_292 : NVPTXReg<"%v2b8_292">; +def v2b8_293 : NVPTXReg<"%v2b8_293">; +def v2b8_294 : NVPTXReg<"%v2b8_294">; +def v2b8_295 : NVPTXReg<"%v2b8_295">; +def v2b8_296 : NVPTXReg<"%v2b8_296">; +def v2b8_297 : NVPTXReg<"%v2b8_297">; +def v2b8_298 : NVPTXReg<"%v2b8_298">; +def v2b8_299 : NVPTXReg<"%v2b8_299">; +def v2b8_300 : NVPTXReg<"%v2b8_300">; +def v2b8_301 : NVPTXReg<"%v2b8_301">; +def v2b8_302 : NVPTXReg<"%v2b8_302">; +def v2b8_303 : NVPTXReg<"%v2b8_303">; +def v2b8_304 : NVPTXReg<"%v2b8_304">; +def v2b8_305 : NVPTXReg<"%v2b8_305">; +def v2b8_306 : NVPTXReg<"%v2b8_306">; +def v2b8_307 : NVPTXReg<"%v2b8_307">; +def v2b8_308 : NVPTXReg<"%v2b8_308">; +def v2b8_309 : NVPTXReg<"%v2b8_309">; +def v2b8_310 : NVPTXReg<"%v2b8_310">; +def v2b8_311 : NVPTXReg<"%v2b8_311">; +def v2b8_312 : NVPTXReg<"%v2b8_312">; +def v2b8_313 : NVPTXReg<"%v2b8_313">; +def v2b8_314 : NVPTXReg<"%v2b8_314">; +def v2b8_315 : NVPTXReg<"%v2b8_315">; +def v2b8_316 : NVPTXReg<"%v2b8_316">; +def v2b8_317 : NVPTXReg<"%v2b8_317">; +def v2b8_318 : NVPTXReg<"%v2b8_318">; +def v2b8_319 : NVPTXReg<"%v2b8_319">; +def v2b8_320 : NVPTXReg<"%v2b8_320">; +def v2b8_321 : NVPTXReg<"%v2b8_321">; +def v2b8_322 : NVPTXReg<"%v2b8_322">; +def v2b8_323 : NVPTXReg<"%v2b8_323">; +def v2b8_324 : NVPTXReg<"%v2b8_324">; +def v2b8_325 : NVPTXReg<"%v2b8_325">; +def v2b8_326 : NVPTXReg<"%v2b8_326">; +def v2b8_327 : NVPTXReg<"%v2b8_327">; +def v2b8_328 : NVPTXReg<"%v2b8_328">; +def v2b8_329 : NVPTXReg<"%v2b8_329">; +def v2b8_330 : NVPTXReg<"%v2b8_330">; +def v2b8_331 : NVPTXReg<"%v2b8_331">; +def v2b8_332 : NVPTXReg<"%v2b8_332">; +def v2b8_333 : NVPTXReg<"%v2b8_333">; +def v2b8_334 : NVPTXReg<"%v2b8_334">; +def v2b8_335 : NVPTXReg<"%v2b8_335">; +def v2b8_336 : NVPTXReg<"%v2b8_336">; +def v2b8_337 : NVPTXReg<"%v2b8_337">; +def v2b8_338 : NVPTXReg<"%v2b8_338">; +def v2b8_339 : NVPTXReg<"%v2b8_339">; +def v2b8_340 : NVPTXReg<"%v2b8_340">; +def v2b8_341 : NVPTXReg<"%v2b8_341">; +def v2b8_342 : NVPTXReg<"%v2b8_342">; +def v2b8_343 : NVPTXReg<"%v2b8_343">; +def v2b8_344 : NVPTXReg<"%v2b8_344">; +def v2b8_345 : NVPTXReg<"%v2b8_345">; +def v2b8_346 : NVPTXReg<"%v2b8_346">; +def v2b8_347 : NVPTXReg<"%v2b8_347">; +def v2b8_348 : NVPTXReg<"%v2b8_348">; +def v2b8_349 : NVPTXReg<"%v2b8_349">; +def v2b8_350 : NVPTXReg<"%v2b8_350">; +def v2b8_351 : NVPTXReg<"%v2b8_351">; +def v2b8_352 : NVPTXReg<"%v2b8_352">; +def v2b8_353 : NVPTXReg<"%v2b8_353">; +def v2b8_354 : NVPTXReg<"%v2b8_354">; +def v2b8_355 : NVPTXReg<"%v2b8_355">; +def v2b8_356 : NVPTXReg<"%v2b8_356">; +def v2b8_357 : NVPTXReg<"%v2b8_357">; +def v2b8_358 : NVPTXReg<"%v2b8_358">; +def v2b8_359 : NVPTXReg<"%v2b8_359">; +def v2b8_360 : NVPTXReg<"%v2b8_360">; +def v2b8_361 : NVPTXReg<"%v2b8_361">; +def v2b8_362 : NVPTXReg<"%v2b8_362">; +def v2b8_363 : NVPTXReg<"%v2b8_363">; +def v2b8_364 : NVPTXReg<"%v2b8_364">; +def v2b8_365 : NVPTXReg<"%v2b8_365">; +def v2b8_366 : NVPTXReg<"%v2b8_366">; +def v2b8_367 : NVPTXReg<"%v2b8_367">; +def v2b8_368 : NVPTXReg<"%v2b8_368">; +def v2b8_369 : NVPTXReg<"%v2b8_369">; +def v2b8_370 : NVPTXReg<"%v2b8_370">; +def v2b8_371 : NVPTXReg<"%v2b8_371">; +def v2b8_372 : NVPTXReg<"%v2b8_372">; +def v2b8_373 : NVPTXReg<"%v2b8_373">; +def v2b8_374 : NVPTXReg<"%v2b8_374">; +def v2b8_375 : NVPTXReg<"%v2b8_375">; +def v2b8_376 : NVPTXReg<"%v2b8_376">; +def v2b8_377 : NVPTXReg<"%v2b8_377">; +def v2b8_378 : NVPTXReg<"%v2b8_378">; +def v2b8_379 : NVPTXReg<"%v2b8_379">; +def v2b8_380 : NVPTXReg<"%v2b8_380">; +def v2b8_381 : NVPTXReg<"%v2b8_381">; +def v2b8_382 : NVPTXReg<"%v2b8_382">; +def v2b8_383 : NVPTXReg<"%v2b8_383">; +def v2b8_384 : NVPTXReg<"%v2b8_384">; +def v2b8_385 : NVPTXReg<"%v2b8_385">; +def v2b8_386 : NVPTXReg<"%v2b8_386">; +def v2b8_387 : NVPTXReg<"%v2b8_387">; +def v2b8_388 : NVPTXReg<"%v2b8_388">; +def v2b8_389 : NVPTXReg<"%v2b8_389">; +def v2b8_390 : NVPTXReg<"%v2b8_390">; +def v2b8_391 : NVPTXReg<"%v2b8_391">; +def v2b8_392 : NVPTXReg<"%v2b8_392">; +def v2b8_393 : NVPTXReg<"%v2b8_393">; +def v2b8_394 : NVPTXReg<"%v2b8_394">; +def v2b8_395 : NVPTXReg<"%v2b8_395">; +def v2b16_0 : NVPTXReg<"%v2b16_0">; +def v2b16_1 : NVPTXReg<"%v2b16_1">; +def v2b16_2 : NVPTXReg<"%v2b16_2">; +def v2b16_3 : NVPTXReg<"%v2b16_3">; +def v2b16_4 : NVPTXReg<"%v2b16_4">; +def v2b16_5 : NVPTXReg<"%v2b16_5">; +def v2b16_6 : NVPTXReg<"%v2b16_6">; +def v2b16_7 : NVPTXReg<"%v2b16_7">; +def v2b16_8 : NVPTXReg<"%v2b16_8">; +def v2b16_9 : NVPTXReg<"%v2b16_9">; +def v2b16_10 : NVPTXReg<"%v2b16_10">; +def v2b16_11 : NVPTXReg<"%v2b16_11">; +def v2b16_12 : NVPTXReg<"%v2b16_12">; +def v2b16_13 : NVPTXReg<"%v2b16_13">; +def v2b16_14 : NVPTXReg<"%v2b16_14">; +def v2b16_15 : NVPTXReg<"%v2b16_15">; +def v2b16_16 : NVPTXReg<"%v2b16_16">; +def v2b16_17 : NVPTXReg<"%v2b16_17">; +def v2b16_18 : NVPTXReg<"%v2b16_18">; +def v2b16_19 : NVPTXReg<"%v2b16_19">; +def v2b16_20 : NVPTXReg<"%v2b16_20">; +def v2b16_21 : NVPTXReg<"%v2b16_21">; +def v2b16_22 : NVPTXReg<"%v2b16_22">; +def v2b16_23 : NVPTXReg<"%v2b16_23">; +def v2b16_24 : NVPTXReg<"%v2b16_24">; +def v2b16_25 : NVPTXReg<"%v2b16_25">; +def v2b16_26 : NVPTXReg<"%v2b16_26">; +def v2b16_27 : NVPTXReg<"%v2b16_27">; +def v2b16_28 : NVPTXReg<"%v2b16_28">; +def v2b16_29 : NVPTXReg<"%v2b16_29">; +def v2b16_30 : NVPTXReg<"%v2b16_30">; +def v2b16_31 : NVPTXReg<"%v2b16_31">; +def v2b16_32 : NVPTXReg<"%v2b16_32">; +def v2b16_33 : NVPTXReg<"%v2b16_33">; +def v2b16_34 : NVPTXReg<"%v2b16_34">; +def v2b16_35 : NVPTXReg<"%v2b16_35">; +def v2b16_36 : NVPTXReg<"%v2b16_36">; +def v2b16_37 : NVPTXReg<"%v2b16_37">; +def v2b16_38 : NVPTXReg<"%v2b16_38">; +def v2b16_39 : NVPTXReg<"%v2b16_39">; +def v2b16_40 : NVPTXReg<"%v2b16_40">; +def v2b16_41 : NVPTXReg<"%v2b16_41">; +def v2b16_42 : NVPTXReg<"%v2b16_42">; +def v2b16_43 : NVPTXReg<"%v2b16_43">; +def v2b16_44 : NVPTXReg<"%v2b16_44">; +def v2b16_45 : NVPTXReg<"%v2b16_45">; +def v2b16_46 : NVPTXReg<"%v2b16_46">; +def v2b16_47 : NVPTXReg<"%v2b16_47">; +def v2b16_48 : NVPTXReg<"%v2b16_48">; +def v2b16_49 : NVPTXReg<"%v2b16_49">; +def v2b16_50 : NVPTXReg<"%v2b16_50">; +def v2b16_51 : NVPTXReg<"%v2b16_51">; +def v2b16_52 : NVPTXReg<"%v2b16_52">; +def v2b16_53 : NVPTXReg<"%v2b16_53">; +def v2b16_54 : NVPTXReg<"%v2b16_54">; +def v2b16_55 : NVPTXReg<"%v2b16_55">; +def v2b16_56 : NVPTXReg<"%v2b16_56">; +def v2b16_57 : NVPTXReg<"%v2b16_57">; +def v2b16_58 : NVPTXReg<"%v2b16_58">; +def v2b16_59 : NVPTXReg<"%v2b16_59">; +def v2b16_60 : NVPTXReg<"%v2b16_60">; +def v2b16_61 : NVPTXReg<"%v2b16_61">; +def v2b16_62 : NVPTXReg<"%v2b16_62">; +def v2b16_63 : NVPTXReg<"%v2b16_63">; +def v2b16_64 : NVPTXReg<"%v2b16_64">; +def v2b16_65 : NVPTXReg<"%v2b16_65">; +def v2b16_66 : NVPTXReg<"%v2b16_66">; +def v2b16_67 : NVPTXReg<"%v2b16_67">; +def v2b16_68 : NVPTXReg<"%v2b16_68">; +def v2b16_69 : NVPTXReg<"%v2b16_69">; +def v2b16_70 : NVPTXReg<"%v2b16_70">; +def v2b16_71 : NVPTXReg<"%v2b16_71">; +def v2b16_72 : NVPTXReg<"%v2b16_72">; +def v2b16_73 : NVPTXReg<"%v2b16_73">; +def v2b16_74 : NVPTXReg<"%v2b16_74">; +def v2b16_75 : NVPTXReg<"%v2b16_75">; +def v2b16_76 : NVPTXReg<"%v2b16_76">; +def v2b16_77 : NVPTXReg<"%v2b16_77">; +def v2b16_78 : NVPTXReg<"%v2b16_78">; +def v2b16_79 : NVPTXReg<"%v2b16_79">; +def v2b16_80 : NVPTXReg<"%v2b16_80">; +def v2b16_81 : NVPTXReg<"%v2b16_81">; +def v2b16_82 : NVPTXReg<"%v2b16_82">; +def v2b16_83 : NVPTXReg<"%v2b16_83">; +def v2b16_84 : NVPTXReg<"%v2b16_84">; +def v2b16_85 : NVPTXReg<"%v2b16_85">; +def v2b16_86 : NVPTXReg<"%v2b16_86">; +def v2b16_87 : NVPTXReg<"%v2b16_87">; +def v2b16_88 : NVPTXReg<"%v2b16_88">; +def v2b16_89 : NVPTXReg<"%v2b16_89">; +def v2b16_90 : NVPTXReg<"%v2b16_90">; +def v2b16_91 : NVPTXReg<"%v2b16_91">; +def v2b16_92 : NVPTXReg<"%v2b16_92">; +def v2b16_93 : NVPTXReg<"%v2b16_93">; +def v2b16_94 : NVPTXReg<"%v2b16_94">; +def v2b16_95 : NVPTXReg<"%v2b16_95">; +def v2b16_96 : NVPTXReg<"%v2b16_96">; +def v2b16_97 : NVPTXReg<"%v2b16_97">; +def v2b16_98 : NVPTXReg<"%v2b16_98">; +def v2b16_99 : NVPTXReg<"%v2b16_99">; +def v2b16_100 : NVPTXReg<"%v2b16_100">; +def v2b16_101 : NVPTXReg<"%v2b16_101">; +def v2b16_102 : NVPTXReg<"%v2b16_102">; +def v2b16_103 : NVPTXReg<"%v2b16_103">; +def v2b16_104 : NVPTXReg<"%v2b16_104">; +def v2b16_105 : NVPTXReg<"%v2b16_105">; +def v2b16_106 : NVPTXReg<"%v2b16_106">; +def v2b16_107 : NVPTXReg<"%v2b16_107">; +def v2b16_108 : NVPTXReg<"%v2b16_108">; +def v2b16_109 : NVPTXReg<"%v2b16_109">; +def v2b16_110 : NVPTXReg<"%v2b16_110">; +def v2b16_111 : NVPTXReg<"%v2b16_111">; +def v2b16_112 : NVPTXReg<"%v2b16_112">; +def v2b16_113 : NVPTXReg<"%v2b16_113">; +def v2b16_114 : NVPTXReg<"%v2b16_114">; +def v2b16_115 : NVPTXReg<"%v2b16_115">; +def v2b16_116 : NVPTXReg<"%v2b16_116">; +def v2b16_117 : NVPTXReg<"%v2b16_117">; +def v2b16_118 : NVPTXReg<"%v2b16_118">; +def v2b16_119 : NVPTXReg<"%v2b16_119">; +def v2b16_120 : NVPTXReg<"%v2b16_120">; +def v2b16_121 : NVPTXReg<"%v2b16_121">; +def v2b16_122 : NVPTXReg<"%v2b16_122">; +def v2b16_123 : NVPTXReg<"%v2b16_123">; +def v2b16_124 : NVPTXReg<"%v2b16_124">; +def v2b16_125 : NVPTXReg<"%v2b16_125">; +def v2b16_126 : NVPTXReg<"%v2b16_126">; +def v2b16_127 : NVPTXReg<"%v2b16_127">; +def v2b16_128 : NVPTXReg<"%v2b16_128">; +def v2b16_129 : NVPTXReg<"%v2b16_129">; +def v2b16_130 : NVPTXReg<"%v2b16_130">; +def v2b16_131 : NVPTXReg<"%v2b16_131">; +def v2b16_132 : NVPTXReg<"%v2b16_132">; +def v2b16_133 : NVPTXReg<"%v2b16_133">; +def v2b16_134 : NVPTXReg<"%v2b16_134">; +def v2b16_135 : NVPTXReg<"%v2b16_135">; +def v2b16_136 : NVPTXReg<"%v2b16_136">; +def v2b16_137 : NVPTXReg<"%v2b16_137">; +def v2b16_138 : NVPTXReg<"%v2b16_138">; +def v2b16_139 : NVPTXReg<"%v2b16_139">; +def v2b16_140 : NVPTXReg<"%v2b16_140">; +def v2b16_141 : NVPTXReg<"%v2b16_141">; +def v2b16_142 : NVPTXReg<"%v2b16_142">; +def v2b16_143 : NVPTXReg<"%v2b16_143">; +def v2b16_144 : NVPTXReg<"%v2b16_144">; +def v2b16_145 : NVPTXReg<"%v2b16_145">; +def v2b16_146 : NVPTXReg<"%v2b16_146">; +def v2b16_147 : NVPTXReg<"%v2b16_147">; +def v2b16_148 : NVPTXReg<"%v2b16_148">; +def v2b16_149 : NVPTXReg<"%v2b16_149">; +def v2b16_150 : NVPTXReg<"%v2b16_150">; +def v2b16_151 : NVPTXReg<"%v2b16_151">; +def v2b16_152 : NVPTXReg<"%v2b16_152">; +def v2b16_153 : NVPTXReg<"%v2b16_153">; +def v2b16_154 : NVPTXReg<"%v2b16_154">; +def v2b16_155 : NVPTXReg<"%v2b16_155">; +def v2b16_156 : NVPTXReg<"%v2b16_156">; +def v2b16_157 : NVPTXReg<"%v2b16_157">; +def v2b16_158 : NVPTXReg<"%v2b16_158">; +def v2b16_159 : NVPTXReg<"%v2b16_159">; +def v2b16_160 : NVPTXReg<"%v2b16_160">; +def v2b16_161 : NVPTXReg<"%v2b16_161">; +def v2b16_162 : NVPTXReg<"%v2b16_162">; +def v2b16_163 : NVPTXReg<"%v2b16_163">; +def v2b16_164 : NVPTXReg<"%v2b16_164">; +def v2b16_165 : NVPTXReg<"%v2b16_165">; +def v2b16_166 : NVPTXReg<"%v2b16_166">; +def v2b16_167 : NVPTXReg<"%v2b16_167">; +def v2b16_168 : NVPTXReg<"%v2b16_168">; +def v2b16_169 : NVPTXReg<"%v2b16_169">; +def v2b16_170 : NVPTXReg<"%v2b16_170">; +def v2b16_171 : NVPTXReg<"%v2b16_171">; +def v2b16_172 : NVPTXReg<"%v2b16_172">; +def v2b16_173 : NVPTXReg<"%v2b16_173">; +def v2b16_174 : NVPTXReg<"%v2b16_174">; +def v2b16_175 : NVPTXReg<"%v2b16_175">; +def v2b16_176 : NVPTXReg<"%v2b16_176">; +def v2b16_177 : NVPTXReg<"%v2b16_177">; +def v2b16_178 : NVPTXReg<"%v2b16_178">; +def v2b16_179 : NVPTXReg<"%v2b16_179">; +def v2b16_180 : NVPTXReg<"%v2b16_180">; +def v2b16_181 : NVPTXReg<"%v2b16_181">; +def v2b16_182 : NVPTXReg<"%v2b16_182">; +def v2b16_183 : NVPTXReg<"%v2b16_183">; +def v2b16_184 : NVPTXReg<"%v2b16_184">; +def v2b16_185 : NVPTXReg<"%v2b16_185">; +def v2b16_186 : NVPTXReg<"%v2b16_186">; +def v2b16_187 : NVPTXReg<"%v2b16_187">; +def v2b16_188 : NVPTXReg<"%v2b16_188">; +def v2b16_189 : NVPTXReg<"%v2b16_189">; +def v2b16_190 : NVPTXReg<"%v2b16_190">; +def v2b16_191 : NVPTXReg<"%v2b16_191">; +def v2b16_192 : NVPTXReg<"%v2b16_192">; +def v2b16_193 : NVPTXReg<"%v2b16_193">; +def v2b16_194 : NVPTXReg<"%v2b16_194">; +def v2b16_195 : NVPTXReg<"%v2b16_195">; +def v2b16_196 : NVPTXReg<"%v2b16_196">; +def v2b16_197 : NVPTXReg<"%v2b16_197">; +def v2b16_198 : NVPTXReg<"%v2b16_198">; +def v2b16_199 : NVPTXReg<"%v2b16_199">; +def v2b16_200 : NVPTXReg<"%v2b16_200">; +def v2b16_201 : NVPTXReg<"%v2b16_201">; +def v2b16_202 : NVPTXReg<"%v2b16_202">; +def v2b16_203 : NVPTXReg<"%v2b16_203">; +def v2b16_204 : NVPTXReg<"%v2b16_204">; +def v2b16_205 : NVPTXReg<"%v2b16_205">; +def v2b16_206 : NVPTXReg<"%v2b16_206">; +def v2b16_207 : NVPTXReg<"%v2b16_207">; +def v2b16_208 : NVPTXReg<"%v2b16_208">; +def v2b16_209 : NVPTXReg<"%v2b16_209">; +def v2b16_210 : NVPTXReg<"%v2b16_210">; +def v2b16_211 : NVPTXReg<"%v2b16_211">; +def v2b16_212 : NVPTXReg<"%v2b16_212">; +def v2b16_213 : NVPTXReg<"%v2b16_213">; +def v2b16_214 : NVPTXReg<"%v2b16_214">; +def v2b16_215 : NVPTXReg<"%v2b16_215">; +def v2b16_216 : NVPTXReg<"%v2b16_216">; +def v2b16_217 : NVPTXReg<"%v2b16_217">; +def v2b16_218 : NVPTXReg<"%v2b16_218">; +def v2b16_219 : NVPTXReg<"%v2b16_219">; +def v2b16_220 : NVPTXReg<"%v2b16_220">; +def v2b16_221 : NVPTXReg<"%v2b16_221">; +def v2b16_222 : NVPTXReg<"%v2b16_222">; +def v2b16_223 : NVPTXReg<"%v2b16_223">; +def v2b16_224 : NVPTXReg<"%v2b16_224">; +def v2b16_225 : NVPTXReg<"%v2b16_225">; +def v2b16_226 : NVPTXReg<"%v2b16_226">; +def v2b16_227 : NVPTXReg<"%v2b16_227">; +def v2b16_228 : NVPTXReg<"%v2b16_228">; +def v2b16_229 : NVPTXReg<"%v2b16_229">; +def v2b16_230 : NVPTXReg<"%v2b16_230">; +def v2b16_231 : NVPTXReg<"%v2b16_231">; +def v2b16_232 : NVPTXReg<"%v2b16_232">; +def v2b16_233 : NVPTXReg<"%v2b16_233">; +def v2b16_234 : NVPTXReg<"%v2b16_234">; +def v2b16_235 : NVPTXReg<"%v2b16_235">; +def v2b16_236 : NVPTXReg<"%v2b16_236">; +def v2b16_237 : NVPTXReg<"%v2b16_237">; +def v2b16_238 : NVPTXReg<"%v2b16_238">; +def v2b16_239 : NVPTXReg<"%v2b16_239">; +def v2b16_240 : NVPTXReg<"%v2b16_240">; +def v2b16_241 : NVPTXReg<"%v2b16_241">; +def v2b16_242 : NVPTXReg<"%v2b16_242">; +def v2b16_243 : NVPTXReg<"%v2b16_243">; +def v2b16_244 : NVPTXReg<"%v2b16_244">; +def v2b16_245 : NVPTXReg<"%v2b16_245">; +def v2b16_246 : NVPTXReg<"%v2b16_246">; +def v2b16_247 : NVPTXReg<"%v2b16_247">; +def v2b16_248 : NVPTXReg<"%v2b16_248">; +def v2b16_249 : NVPTXReg<"%v2b16_249">; +def v2b16_250 : NVPTXReg<"%v2b16_250">; +def v2b16_251 : NVPTXReg<"%v2b16_251">; +def v2b16_252 : NVPTXReg<"%v2b16_252">; +def v2b16_253 : NVPTXReg<"%v2b16_253">; +def v2b16_254 : NVPTXReg<"%v2b16_254">; +def v2b16_255 : NVPTXReg<"%v2b16_255">; +def v2b16_256 : NVPTXReg<"%v2b16_256">; +def v2b16_257 : NVPTXReg<"%v2b16_257">; +def v2b16_258 : NVPTXReg<"%v2b16_258">; +def v2b16_259 : NVPTXReg<"%v2b16_259">; +def v2b16_260 : NVPTXReg<"%v2b16_260">; +def v2b16_261 : NVPTXReg<"%v2b16_261">; +def v2b16_262 : NVPTXReg<"%v2b16_262">; +def v2b16_263 : NVPTXReg<"%v2b16_263">; +def v2b16_264 : NVPTXReg<"%v2b16_264">; +def v2b16_265 : NVPTXReg<"%v2b16_265">; +def v2b16_266 : NVPTXReg<"%v2b16_266">; +def v2b16_267 : NVPTXReg<"%v2b16_267">; +def v2b16_268 : NVPTXReg<"%v2b16_268">; +def v2b16_269 : NVPTXReg<"%v2b16_269">; +def v2b16_270 : NVPTXReg<"%v2b16_270">; +def v2b16_271 : NVPTXReg<"%v2b16_271">; +def v2b16_272 : NVPTXReg<"%v2b16_272">; +def v2b16_273 : NVPTXReg<"%v2b16_273">; +def v2b16_274 : NVPTXReg<"%v2b16_274">; +def v2b16_275 : NVPTXReg<"%v2b16_275">; +def v2b16_276 : NVPTXReg<"%v2b16_276">; +def v2b16_277 : NVPTXReg<"%v2b16_277">; +def v2b16_278 : NVPTXReg<"%v2b16_278">; +def v2b16_279 : NVPTXReg<"%v2b16_279">; +def v2b16_280 : NVPTXReg<"%v2b16_280">; +def v2b16_281 : NVPTXReg<"%v2b16_281">; +def v2b16_282 : NVPTXReg<"%v2b16_282">; +def v2b16_283 : NVPTXReg<"%v2b16_283">; +def v2b16_284 : NVPTXReg<"%v2b16_284">; +def v2b16_285 : NVPTXReg<"%v2b16_285">; +def v2b16_286 : NVPTXReg<"%v2b16_286">; +def v2b16_287 : NVPTXReg<"%v2b16_287">; +def v2b16_288 : NVPTXReg<"%v2b16_288">; +def v2b16_289 : NVPTXReg<"%v2b16_289">; +def v2b16_290 : NVPTXReg<"%v2b16_290">; +def v2b16_291 : NVPTXReg<"%v2b16_291">; +def v2b16_292 : NVPTXReg<"%v2b16_292">; +def v2b16_293 : NVPTXReg<"%v2b16_293">; +def v2b16_294 : NVPTXReg<"%v2b16_294">; +def v2b16_295 : NVPTXReg<"%v2b16_295">; +def v2b16_296 : NVPTXReg<"%v2b16_296">; +def v2b16_297 : NVPTXReg<"%v2b16_297">; +def v2b16_298 : NVPTXReg<"%v2b16_298">; +def v2b16_299 : NVPTXReg<"%v2b16_299">; +def v2b16_300 : NVPTXReg<"%v2b16_300">; +def v2b16_301 : NVPTXReg<"%v2b16_301">; +def v2b16_302 : NVPTXReg<"%v2b16_302">; +def v2b16_303 : NVPTXReg<"%v2b16_303">; +def v2b16_304 : NVPTXReg<"%v2b16_304">; +def v2b16_305 : NVPTXReg<"%v2b16_305">; +def v2b16_306 : NVPTXReg<"%v2b16_306">; +def v2b16_307 : NVPTXReg<"%v2b16_307">; +def v2b16_308 : NVPTXReg<"%v2b16_308">; +def v2b16_309 : NVPTXReg<"%v2b16_309">; +def v2b16_310 : NVPTXReg<"%v2b16_310">; +def v2b16_311 : NVPTXReg<"%v2b16_311">; +def v2b16_312 : NVPTXReg<"%v2b16_312">; +def v2b16_313 : NVPTXReg<"%v2b16_313">; +def v2b16_314 : NVPTXReg<"%v2b16_314">; +def v2b16_315 : NVPTXReg<"%v2b16_315">; +def v2b16_316 : NVPTXReg<"%v2b16_316">; +def v2b16_317 : NVPTXReg<"%v2b16_317">; +def v2b16_318 : NVPTXReg<"%v2b16_318">; +def v2b16_319 : NVPTXReg<"%v2b16_319">; +def v2b16_320 : NVPTXReg<"%v2b16_320">; +def v2b16_321 : NVPTXReg<"%v2b16_321">; +def v2b16_322 : NVPTXReg<"%v2b16_322">; +def v2b16_323 : NVPTXReg<"%v2b16_323">; +def v2b16_324 : NVPTXReg<"%v2b16_324">; +def v2b16_325 : NVPTXReg<"%v2b16_325">; +def v2b16_326 : NVPTXReg<"%v2b16_326">; +def v2b16_327 : NVPTXReg<"%v2b16_327">; +def v2b16_328 : NVPTXReg<"%v2b16_328">; +def v2b16_329 : NVPTXReg<"%v2b16_329">; +def v2b16_330 : NVPTXReg<"%v2b16_330">; +def v2b16_331 : NVPTXReg<"%v2b16_331">; +def v2b16_332 : NVPTXReg<"%v2b16_332">; +def v2b16_333 : NVPTXReg<"%v2b16_333">; +def v2b16_334 : NVPTXReg<"%v2b16_334">; +def v2b16_335 : NVPTXReg<"%v2b16_335">; +def v2b16_336 : NVPTXReg<"%v2b16_336">; +def v2b16_337 : NVPTXReg<"%v2b16_337">; +def v2b16_338 : NVPTXReg<"%v2b16_338">; +def v2b16_339 : NVPTXReg<"%v2b16_339">; +def v2b16_340 : NVPTXReg<"%v2b16_340">; +def v2b16_341 : NVPTXReg<"%v2b16_341">; +def v2b16_342 : NVPTXReg<"%v2b16_342">; +def v2b16_343 : NVPTXReg<"%v2b16_343">; +def v2b16_344 : NVPTXReg<"%v2b16_344">; +def v2b16_345 : NVPTXReg<"%v2b16_345">; +def v2b16_346 : NVPTXReg<"%v2b16_346">; +def v2b16_347 : NVPTXReg<"%v2b16_347">; +def v2b16_348 : NVPTXReg<"%v2b16_348">; +def v2b16_349 : NVPTXReg<"%v2b16_349">; +def v2b16_350 : NVPTXReg<"%v2b16_350">; +def v2b16_351 : NVPTXReg<"%v2b16_351">; +def v2b16_352 : NVPTXReg<"%v2b16_352">; +def v2b16_353 : NVPTXReg<"%v2b16_353">; +def v2b16_354 : NVPTXReg<"%v2b16_354">; +def v2b16_355 : NVPTXReg<"%v2b16_355">; +def v2b16_356 : NVPTXReg<"%v2b16_356">; +def v2b16_357 : NVPTXReg<"%v2b16_357">; +def v2b16_358 : NVPTXReg<"%v2b16_358">; +def v2b16_359 : NVPTXReg<"%v2b16_359">; +def v2b16_360 : NVPTXReg<"%v2b16_360">; +def v2b16_361 : NVPTXReg<"%v2b16_361">; +def v2b16_362 : NVPTXReg<"%v2b16_362">; +def v2b16_363 : NVPTXReg<"%v2b16_363">; +def v2b16_364 : NVPTXReg<"%v2b16_364">; +def v2b16_365 : NVPTXReg<"%v2b16_365">; +def v2b16_366 : NVPTXReg<"%v2b16_366">; +def v2b16_367 : NVPTXReg<"%v2b16_367">; +def v2b16_368 : NVPTXReg<"%v2b16_368">; +def v2b16_369 : NVPTXReg<"%v2b16_369">; +def v2b16_370 : NVPTXReg<"%v2b16_370">; +def v2b16_371 : NVPTXReg<"%v2b16_371">; +def v2b16_372 : NVPTXReg<"%v2b16_372">; +def v2b16_373 : NVPTXReg<"%v2b16_373">; +def v2b16_374 : NVPTXReg<"%v2b16_374">; +def v2b16_375 : NVPTXReg<"%v2b16_375">; +def v2b16_376 : NVPTXReg<"%v2b16_376">; +def v2b16_377 : NVPTXReg<"%v2b16_377">; +def v2b16_378 : NVPTXReg<"%v2b16_378">; +def v2b16_379 : NVPTXReg<"%v2b16_379">; +def v2b16_380 : NVPTXReg<"%v2b16_380">; +def v2b16_381 : NVPTXReg<"%v2b16_381">; +def v2b16_382 : NVPTXReg<"%v2b16_382">; +def v2b16_383 : NVPTXReg<"%v2b16_383">; +def v2b16_384 : NVPTXReg<"%v2b16_384">; +def v2b16_385 : NVPTXReg<"%v2b16_385">; +def v2b16_386 : NVPTXReg<"%v2b16_386">; +def v2b16_387 : NVPTXReg<"%v2b16_387">; +def v2b16_388 : NVPTXReg<"%v2b16_388">; +def v2b16_389 : NVPTXReg<"%v2b16_389">; +def v2b16_390 : NVPTXReg<"%v2b16_390">; +def v2b16_391 : NVPTXReg<"%v2b16_391">; +def v2b16_392 : NVPTXReg<"%v2b16_392">; +def v2b16_393 : NVPTXReg<"%v2b16_393">; +def v2b16_394 : NVPTXReg<"%v2b16_394">; +def v2b16_395 : NVPTXReg<"%v2b16_395">; +def v2b32_0 : NVPTXReg<"%v2b32_0">; +def v2b32_1 : NVPTXReg<"%v2b32_1">; +def v2b32_2 : NVPTXReg<"%v2b32_2">; +def v2b32_3 : NVPTXReg<"%v2b32_3">; +def v2b32_4 : NVPTXReg<"%v2b32_4">; +def v2b32_5 : NVPTXReg<"%v2b32_5">; +def v2b32_6 : NVPTXReg<"%v2b32_6">; +def v2b32_7 : NVPTXReg<"%v2b32_7">; +def v2b32_8 : NVPTXReg<"%v2b32_8">; +def v2b32_9 : NVPTXReg<"%v2b32_9">; +def v2b32_10 : NVPTXReg<"%v2b32_10">; +def v2b32_11 : NVPTXReg<"%v2b32_11">; +def v2b32_12 : NVPTXReg<"%v2b32_12">; +def v2b32_13 : NVPTXReg<"%v2b32_13">; +def v2b32_14 : NVPTXReg<"%v2b32_14">; +def v2b32_15 : NVPTXReg<"%v2b32_15">; +def v2b32_16 : NVPTXReg<"%v2b32_16">; +def v2b32_17 : NVPTXReg<"%v2b32_17">; +def v2b32_18 : NVPTXReg<"%v2b32_18">; +def v2b32_19 : NVPTXReg<"%v2b32_19">; +def v2b32_20 : NVPTXReg<"%v2b32_20">; +def v2b32_21 : NVPTXReg<"%v2b32_21">; +def v2b32_22 : NVPTXReg<"%v2b32_22">; +def v2b32_23 : NVPTXReg<"%v2b32_23">; +def v2b32_24 : NVPTXReg<"%v2b32_24">; +def v2b32_25 : NVPTXReg<"%v2b32_25">; +def v2b32_26 : NVPTXReg<"%v2b32_26">; +def v2b32_27 : NVPTXReg<"%v2b32_27">; +def v2b32_28 : NVPTXReg<"%v2b32_28">; +def v2b32_29 : NVPTXReg<"%v2b32_29">; +def v2b32_30 : NVPTXReg<"%v2b32_30">; +def v2b32_31 : NVPTXReg<"%v2b32_31">; +def v2b32_32 : NVPTXReg<"%v2b32_32">; +def v2b32_33 : NVPTXReg<"%v2b32_33">; +def v2b32_34 : NVPTXReg<"%v2b32_34">; +def v2b32_35 : NVPTXReg<"%v2b32_35">; +def v2b32_36 : NVPTXReg<"%v2b32_36">; +def v2b32_37 : NVPTXReg<"%v2b32_37">; +def v2b32_38 : NVPTXReg<"%v2b32_38">; +def v2b32_39 : NVPTXReg<"%v2b32_39">; +def v2b32_40 : NVPTXReg<"%v2b32_40">; +def v2b32_41 : NVPTXReg<"%v2b32_41">; +def v2b32_42 : NVPTXReg<"%v2b32_42">; +def v2b32_43 : NVPTXReg<"%v2b32_43">; +def v2b32_44 : NVPTXReg<"%v2b32_44">; +def v2b32_45 : NVPTXReg<"%v2b32_45">; +def v2b32_46 : NVPTXReg<"%v2b32_46">; +def v2b32_47 : NVPTXReg<"%v2b32_47">; +def v2b32_48 : NVPTXReg<"%v2b32_48">; +def v2b32_49 : NVPTXReg<"%v2b32_49">; +def v2b32_50 : NVPTXReg<"%v2b32_50">; +def v2b32_51 : NVPTXReg<"%v2b32_51">; +def v2b32_52 : NVPTXReg<"%v2b32_52">; +def v2b32_53 : NVPTXReg<"%v2b32_53">; +def v2b32_54 : NVPTXReg<"%v2b32_54">; +def v2b32_55 : NVPTXReg<"%v2b32_55">; +def v2b32_56 : NVPTXReg<"%v2b32_56">; +def v2b32_57 : NVPTXReg<"%v2b32_57">; +def v2b32_58 : NVPTXReg<"%v2b32_58">; +def v2b32_59 : NVPTXReg<"%v2b32_59">; +def v2b32_60 : NVPTXReg<"%v2b32_60">; +def v2b32_61 : NVPTXReg<"%v2b32_61">; +def v2b32_62 : NVPTXReg<"%v2b32_62">; +def v2b32_63 : NVPTXReg<"%v2b32_63">; +def v2b32_64 : NVPTXReg<"%v2b32_64">; +def v2b32_65 : NVPTXReg<"%v2b32_65">; +def v2b32_66 : NVPTXReg<"%v2b32_66">; +def v2b32_67 : NVPTXReg<"%v2b32_67">; +def v2b32_68 : NVPTXReg<"%v2b32_68">; +def v2b32_69 : NVPTXReg<"%v2b32_69">; +def v2b32_70 : NVPTXReg<"%v2b32_70">; +def v2b32_71 : NVPTXReg<"%v2b32_71">; +def v2b32_72 : NVPTXReg<"%v2b32_72">; +def v2b32_73 : NVPTXReg<"%v2b32_73">; +def v2b32_74 : NVPTXReg<"%v2b32_74">; +def v2b32_75 : NVPTXReg<"%v2b32_75">; +def v2b32_76 : NVPTXReg<"%v2b32_76">; +def v2b32_77 : NVPTXReg<"%v2b32_77">; +def v2b32_78 : NVPTXReg<"%v2b32_78">; +def v2b32_79 : NVPTXReg<"%v2b32_79">; +def v2b32_80 : NVPTXReg<"%v2b32_80">; +def v2b32_81 : NVPTXReg<"%v2b32_81">; +def v2b32_82 : NVPTXReg<"%v2b32_82">; +def v2b32_83 : NVPTXReg<"%v2b32_83">; +def v2b32_84 : NVPTXReg<"%v2b32_84">; +def v2b32_85 : NVPTXReg<"%v2b32_85">; +def v2b32_86 : NVPTXReg<"%v2b32_86">; +def v2b32_87 : NVPTXReg<"%v2b32_87">; +def v2b32_88 : NVPTXReg<"%v2b32_88">; +def v2b32_89 : NVPTXReg<"%v2b32_89">; +def v2b32_90 : NVPTXReg<"%v2b32_90">; +def v2b32_91 : NVPTXReg<"%v2b32_91">; +def v2b32_92 : NVPTXReg<"%v2b32_92">; +def v2b32_93 : NVPTXReg<"%v2b32_93">; +def v2b32_94 : NVPTXReg<"%v2b32_94">; +def v2b32_95 : NVPTXReg<"%v2b32_95">; +def v2b32_96 : NVPTXReg<"%v2b32_96">; +def v2b32_97 : NVPTXReg<"%v2b32_97">; +def v2b32_98 : NVPTXReg<"%v2b32_98">; +def v2b32_99 : NVPTXReg<"%v2b32_99">; +def v2b32_100 : NVPTXReg<"%v2b32_100">; +def v2b32_101 : NVPTXReg<"%v2b32_101">; +def v2b32_102 : NVPTXReg<"%v2b32_102">; +def v2b32_103 : NVPTXReg<"%v2b32_103">; +def v2b32_104 : NVPTXReg<"%v2b32_104">; +def v2b32_105 : NVPTXReg<"%v2b32_105">; +def v2b32_106 : NVPTXReg<"%v2b32_106">; +def v2b32_107 : NVPTXReg<"%v2b32_107">; +def v2b32_108 : NVPTXReg<"%v2b32_108">; +def v2b32_109 : NVPTXReg<"%v2b32_109">; +def v2b32_110 : NVPTXReg<"%v2b32_110">; +def v2b32_111 : NVPTXReg<"%v2b32_111">; +def v2b32_112 : NVPTXReg<"%v2b32_112">; +def v2b32_113 : NVPTXReg<"%v2b32_113">; +def v2b32_114 : NVPTXReg<"%v2b32_114">; +def v2b32_115 : NVPTXReg<"%v2b32_115">; +def v2b32_116 : NVPTXReg<"%v2b32_116">; +def v2b32_117 : NVPTXReg<"%v2b32_117">; +def v2b32_118 : NVPTXReg<"%v2b32_118">; +def v2b32_119 : NVPTXReg<"%v2b32_119">; +def v2b32_120 : NVPTXReg<"%v2b32_120">; +def v2b32_121 : NVPTXReg<"%v2b32_121">; +def v2b32_122 : NVPTXReg<"%v2b32_122">; +def v2b32_123 : NVPTXReg<"%v2b32_123">; +def v2b32_124 : NVPTXReg<"%v2b32_124">; +def v2b32_125 : NVPTXReg<"%v2b32_125">; +def v2b32_126 : NVPTXReg<"%v2b32_126">; +def v2b32_127 : NVPTXReg<"%v2b32_127">; +def v2b32_128 : NVPTXReg<"%v2b32_128">; +def v2b32_129 : NVPTXReg<"%v2b32_129">; +def v2b32_130 : NVPTXReg<"%v2b32_130">; +def v2b32_131 : NVPTXReg<"%v2b32_131">; +def v2b32_132 : NVPTXReg<"%v2b32_132">; +def v2b32_133 : NVPTXReg<"%v2b32_133">; +def v2b32_134 : NVPTXReg<"%v2b32_134">; +def v2b32_135 : NVPTXReg<"%v2b32_135">; +def v2b32_136 : NVPTXReg<"%v2b32_136">; +def v2b32_137 : NVPTXReg<"%v2b32_137">; +def v2b32_138 : NVPTXReg<"%v2b32_138">; +def v2b32_139 : NVPTXReg<"%v2b32_139">; +def v2b32_140 : NVPTXReg<"%v2b32_140">; +def v2b32_141 : NVPTXReg<"%v2b32_141">; +def v2b32_142 : NVPTXReg<"%v2b32_142">; +def v2b32_143 : NVPTXReg<"%v2b32_143">; +def v2b32_144 : NVPTXReg<"%v2b32_144">; +def v2b32_145 : NVPTXReg<"%v2b32_145">; +def v2b32_146 : NVPTXReg<"%v2b32_146">; +def v2b32_147 : NVPTXReg<"%v2b32_147">; +def v2b32_148 : NVPTXReg<"%v2b32_148">; +def v2b32_149 : NVPTXReg<"%v2b32_149">; +def v2b32_150 : NVPTXReg<"%v2b32_150">; +def v2b32_151 : NVPTXReg<"%v2b32_151">; +def v2b32_152 : NVPTXReg<"%v2b32_152">; +def v2b32_153 : NVPTXReg<"%v2b32_153">; +def v2b32_154 : NVPTXReg<"%v2b32_154">; +def v2b32_155 : NVPTXReg<"%v2b32_155">; +def v2b32_156 : NVPTXReg<"%v2b32_156">; +def v2b32_157 : NVPTXReg<"%v2b32_157">; +def v2b32_158 : NVPTXReg<"%v2b32_158">; +def v2b32_159 : NVPTXReg<"%v2b32_159">; +def v2b32_160 : NVPTXReg<"%v2b32_160">; +def v2b32_161 : NVPTXReg<"%v2b32_161">; +def v2b32_162 : NVPTXReg<"%v2b32_162">; +def v2b32_163 : NVPTXReg<"%v2b32_163">; +def v2b32_164 : NVPTXReg<"%v2b32_164">; +def v2b32_165 : NVPTXReg<"%v2b32_165">; +def v2b32_166 : NVPTXReg<"%v2b32_166">; +def v2b32_167 : NVPTXReg<"%v2b32_167">; +def v2b32_168 : NVPTXReg<"%v2b32_168">; +def v2b32_169 : NVPTXReg<"%v2b32_169">; +def v2b32_170 : NVPTXReg<"%v2b32_170">; +def v2b32_171 : NVPTXReg<"%v2b32_171">; +def v2b32_172 : NVPTXReg<"%v2b32_172">; +def v2b32_173 : NVPTXReg<"%v2b32_173">; +def v2b32_174 : NVPTXReg<"%v2b32_174">; +def v2b32_175 : NVPTXReg<"%v2b32_175">; +def v2b32_176 : NVPTXReg<"%v2b32_176">; +def v2b32_177 : NVPTXReg<"%v2b32_177">; +def v2b32_178 : NVPTXReg<"%v2b32_178">; +def v2b32_179 : NVPTXReg<"%v2b32_179">; +def v2b32_180 : NVPTXReg<"%v2b32_180">; +def v2b32_181 : NVPTXReg<"%v2b32_181">; +def v2b32_182 : NVPTXReg<"%v2b32_182">; +def v2b32_183 : NVPTXReg<"%v2b32_183">; +def v2b32_184 : NVPTXReg<"%v2b32_184">; +def v2b32_185 : NVPTXReg<"%v2b32_185">; +def v2b32_186 : NVPTXReg<"%v2b32_186">; +def v2b32_187 : NVPTXReg<"%v2b32_187">; +def v2b32_188 : NVPTXReg<"%v2b32_188">; +def v2b32_189 : NVPTXReg<"%v2b32_189">; +def v2b32_190 : NVPTXReg<"%v2b32_190">; +def v2b32_191 : NVPTXReg<"%v2b32_191">; +def v2b32_192 : NVPTXReg<"%v2b32_192">; +def v2b32_193 : NVPTXReg<"%v2b32_193">; +def v2b32_194 : NVPTXReg<"%v2b32_194">; +def v2b32_195 : NVPTXReg<"%v2b32_195">; +def v2b32_196 : NVPTXReg<"%v2b32_196">; +def v2b32_197 : NVPTXReg<"%v2b32_197">; +def v2b32_198 : NVPTXReg<"%v2b32_198">; +def v2b32_199 : NVPTXReg<"%v2b32_199">; +def v2b32_200 : NVPTXReg<"%v2b32_200">; +def v2b32_201 : NVPTXReg<"%v2b32_201">; +def v2b32_202 : NVPTXReg<"%v2b32_202">; +def v2b32_203 : NVPTXReg<"%v2b32_203">; +def v2b32_204 : NVPTXReg<"%v2b32_204">; +def v2b32_205 : NVPTXReg<"%v2b32_205">; +def v2b32_206 : NVPTXReg<"%v2b32_206">; +def v2b32_207 : NVPTXReg<"%v2b32_207">; +def v2b32_208 : NVPTXReg<"%v2b32_208">; +def v2b32_209 : NVPTXReg<"%v2b32_209">; +def v2b32_210 : NVPTXReg<"%v2b32_210">; +def v2b32_211 : NVPTXReg<"%v2b32_211">; +def v2b32_212 : NVPTXReg<"%v2b32_212">; +def v2b32_213 : NVPTXReg<"%v2b32_213">; +def v2b32_214 : NVPTXReg<"%v2b32_214">; +def v2b32_215 : NVPTXReg<"%v2b32_215">; +def v2b32_216 : NVPTXReg<"%v2b32_216">; +def v2b32_217 : NVPTXReg<"%v2b32_217">; +def v2b32_218 : NVPTXReg<"%v2b32_218">; +def v2b32_219 : NVPTXReg<"%v2b32_219">; +def v2b32_220 : NVPTXReg<"%v2b32_220">; +def v2b32_221 : NVPTXReg<"%v2b32_221">; +def v2b32_222 : NVPTXReg<"%v2b32_222">; +def v2b32_223 : NVPTXReg<"%v2b32_223">; +def v2b32_224 : NVPTXReg<"%v2b32_224">; +def v2b32_225 : NVPTXReg<"%v2b32_225">; +def v2b32_226 : NVPTXReg<"%v2b32_226">; +def v2b32_227 : NVPTXReg<"%v2b32_227">; +def v2b32_228 : NVPTXReg<"%v2b32_228">; +def v2b32_229 : NVPTXReg<"%v2b32_229">; +def v2b32_230 : NVPTXReg<"%v2b32_230">; +def v2b32_231 : NVPTXReg<"%v2b32_231">; +def v2b32_232 : NVPTXReg<"%v2b32_232">; +def v2b32_233 : NVPTXReg<"%v2b32_233">; +def v2b32_234 : NVPTXReg<"%v2b32_234">; +def v2b32_235 : NVPTXReg<"%v2b32_235">; +def v2b32_236 : NVPTXReg<"%v2b32_236">; +def v2b32_237 : NVPTXReg<"%v2b32_237">; +def v2b32_238 : NVPTXReg<"%v2b32_238">; +def v2b32_239 : NVPTXReg<"%v2b32_239">; +def v2b32_240 : NVPTXReg<"%v2b32_240">; +def v2b32_241 : NVPTXReg<"%v2b32_241">; +def v2b32_242 : NVPTXReg<"%v2b32_242">; +def v2b32_243 : NVPTXReg<"%v2b32_243">; +def v2b32_244 : NVPTXReg<"%v2b32_244">; +def v2b32_245 : NVPTXReg<"%v2b32_245">; +def v2b32_246 : NVPTXReg<"%v2b32_246">; +def v2b32_247 : NVPTXReg<"%v2b32_247">; +def v2b32_248 : NVPTXReg<"%v2b32_248">; +def v2b32_249 : NVPTXReg<"%v2b32_249">; +def v2b32_250 : NVPTXReg<"%v2b32_250">; +def v2b32_251 : NVPTXReg<"%v2b32_251">; +def v2b32_252 : NVPTXReg<"%v2b32_252">; +def v2b32_253 : NVPTXReg<"%v2b32_253">; +def v2b32_254 : NVPTXReg<"%v2b32_254">; +def v2b32_255 : NVPTXReg<"%v2b32_255">; +def v2b32_256 : NVPTXReg<"%v2b32_256">; +def v2b32_257 : NVPTXReg<"%v2b32_257">; +def v2b32_258 : NVPTXReg<"%v2b32_258">; +def v2b32_259 : NVPTXReg<"%v2b32_259">; +def v2b32_260 : NVPTXReg<"%v2b32_260">; +def v2b32_261 : NVPTXReg<"%v2b32_261">; +def v2b32_262 : NVPTXReg<"%v2b32_262">; +def v2b32_263 : NVPTXReg<"%v2b32_263">; +def v2b32_264 : NVPTXReg<"%v2b32_264">; +def v2b32_265 : NVPTXReg<"%v2b32_265">; +def v2b32_266 : NVPTXReg<"%v2b32_266">; +def v2b32_267 : NVPTXReg<"%v2b32_267">; +def v2b32_268 : NVPTXReg<"%v2b32_268">; +def v2b32_269 : NVPTXReg<"%v2b32_269">; +def v2b32_270 : NVPTXReg<"%v2b32_270">; +def v2b32_271 : NVPTXReg<"%v2b32_271">; +def v2b32_272 : NVPTXReg<"%v2b32_272">; +def v2b32_273 : NVPTXReg<"%v2b32_273">; +def v2b32_274 : NVPTXReg<"%v2b32_274">; +def v2b32_275 : NVPTXReg<"%v2b32_275">; +def v2b32_276 : NVPTXReg<"%v2b32_276">; +def v2b32_277 : NVPTXReg<"%v2b32_277">; +def v2b32_278 : NVPTXReg<"%v2b32_278">; +def v2b32_279 : NVPTXReg<"%v2b32_279">; +def v2b32_280 : NVPTXReg<"%v2b32_280">; +def v2b32_281 : NVPTXReg<"%v2b32_281">; +def v2b32_282 : NVPTXReg<"%v2b32_282">; +def v2b32_283 : NVPTXReg<"%v2b32_283">; +def v2b32_284 : NVPTXReg<"%v2b32_284">; +def v2b32_285 : NVPTXReg<"%v2b32_285">; +def v2b32_286 : NVPTXReg<"%v2b32_286">; +def v2b32_287 : NVPTXReg<"%v2b32_287">; +def v2b32_288 : NVPTXReg<"%v2b32_288">; +def v2b32_289 : NVPTXReg<"%v2b32_289">; +def v2b32_290 : NVPTXReg<"%v2b32_290">; +def v2b32_291 : NVPTXReg<"%v2b32_291">; +def v2b32_292 : NVPTXReg<"%v2b32_292">; +def v2b32_293 : NVPTXReg<"%v2b32_293">; +def v2b32_294 : NVPTXReg<"%v2b32_294">; +def v2b32_295 : NVPTXReg<"%v2b32_295">; +def v2b32_296 : NVPTXReg<"%v2b32_296">; +def v2b32_297 : NVPTXReg<"%v2b32_297">; +def v2b32_298 : NVPTXReg<"%v2b32_298">; +def v2b32_299 : NVPTXReg<"%v2b32_299">; +def v2b32_300 : NVPTXReg<"%v2b32_300">; +def v2b32_301 : NVPTXReg<"%v2b32_301">; +def v2b32_302 : NVPTXReg<"%v2b32_302">; +def v2b32_303 : NVPTXReg<"%v2b32_303">; +def v2b32_304 : NVPTXReg<"%v2b32_304">; +def v2b32_305 : NVPTXReg<"%v2b32_305">; +def v2b32_306 : NVPTXReg<"%v2b32_306">; +def v2b32_307 : NVPTXReg<"%v2b32_307">; +def v2b32_308 : NVPTXReg<"%v2b32_308">; +def v2b32_309 : NVPTXReg<"%v2b32_309">; +def v2b32_310 : NVPTXReg<"%v2b32_310">; +def v2b32_311 : NVPTXReg<"%v2b32_311">; +def v2b32_312 : NVPTXReg<"%v2b32_312">; +def v2b32_313 : NVPTXReg<"%v2b32_313">; +def v2b32_314 : NVPTXReg<"%v2b32_314">; +def v2b32_315 : NVPTXReg<"%v2b32_315">; +def v2b32_316 : NVPTXReg<"%v2b32_316">; +def v2b32_317 : NVPTXReg<"%v2b32_317">; +def v2b32_318 : NVPTXReg<"%v2b32_318">; +def v2b32_319 : NVPTXReg<"%v2b32_319">; +def v2b32_320 : NVPTXReg<"%v2b32_320">; +def v2b32_321 : NVPTXReg<"%v2b32_321">; +def v2b32_322 : NVPTXReg<"%v2b32_322">; +def v2b32_323 : NVPTXReg<"%v2b32_323">; +def v2b32_324 : NVPTXReg<"%v2b32_324">; +def v2b32_325 : NVPTXReg<"%v2b32_325">; +def v2b32_326 : NVPTXReg<"%v2b32_326">; +def v2b32_327 : NVPTXReg<"%v2b32_327">; +def v2b32_328 : NVPTXReg<"%v2b32_328">; +def v2b32_329 : NVPTXReg<"%v2b32_329">; +def v2b32_330 : NVPTXReg<"%v2b32_330">; +def v2b32_331 : NVPTXReg<"%v2b32_331">; +def v2b32_332 : NVPTXReg<"%v2b32_332">; +def v2b32_333 : NVPTXReg<"%v2b32_333">; +def v2b32_334 : NVPTXReg<"%v2b32_334">; +def v2b32_335 : NVPTXReg<"%v2b32_335">; +def v2b32_336 : NVPTXReg<"%v2b32_336">; +def v2b32_337 : NVPTXReg<"%v2b32_337">; +def v2b32_338 : NVPTXReg<"%v2b32_338">; +def v2b32_339 : NVPTXReg<"%v2b32_339">; +def v2b32_340 : NVPTXReg<"%v2b32_340">; +def v2b32_341 : NVPTXReg<"%v2b32_341">; +def v2b32_342 : NVPTXReg<"%v2b32_342">; +def v2b32_343 : NVPTXReg<"%v2b32_343">; +def v2b32_344 : NVPTXReg<"%v2b32_344">; +def v2b32_345 : NVPTXReg<"%v2b32_345">; +def v2b32_346 : NVPTXReg<"%v2b32_346">; +def v2b32_347 : NVPTXReg<"%v2b32_347">; +def v2b32_348 : NVPTXReg<"%v2b32_348">; +def v2b32_349 : NVPTXReg<"%v2b32_349">; +def v2b32_350 : NVPTXReg<"%v2b32_350">; +def v2b32_351 : NVPTXReg<"%v2b32_351">; +def v2b32_352 : NVPTXReg<"%v2b32_352">; +def v2b32_353 : NVPTXReg<"%v2b32_353">; +def v2b32_354 : NVPTXReg<"%v2b32_354">; +def v2b32_355 : NVPTXReg<"%v2b32_355">; +def v2b32_356 : NVPTXReg<"%v2b32_356">; +def v2b32_357 : NVPTXReg<"%v2b32_357">; +def v2b32_358 : NVPTXReg<"%v2b32_358">; +def v2b32_359 : NVPTXReg<"%v2b32_359">; +def v2b32_360 : NVPTXReg<"%v2b32_360">; +def v2b32_361 : NVPTXReg<"%v2b32_361">; +def v2b32_362 : NVPTXReg<"%v2b32_362">; +def v2b32_363 : NVPTXReg<"%v2b32_363">; +def v2b32_364 : NVPTXReg<"%v2b32_364">; +def v2b32_365 : NVPTXReg<"%v2b32_365">; +def v2b32_366 : NVPTXReg<"%v2b32_366">; +def v2b32_367 : NVPTXReg<"%v2b32_367">; +def v2b32_368 : NVPTXReg<"%v2b32_368">; +def v2b32_369 : NVPTXReg<"%v2b32_369">; +def v2b32_370 : NVPTXReg<"%v2b32_370">; +def v2b32_371 : NVPTXReg<"%v2b32_371">; +def v2b32_372 : NVPTXReg<"%v2b32_372">; +def v2b32_373 : NVPTXReg<"%v2b32_373">; +def v2b32_374 : NVPTXReg<"%v2b32_374">; +def v2b32_375 : NVPTXReg<"%v2b32_375">; +def v2b32_376 : NVPTXReg<"%v2b32_376">; +def v2b32_377 : NVPTXReg<"%v2b32_377">; +def v2b32_378 : NVPTXReg<"%v2b32_378">; +def v2b32_379 : NVPTXReg<"%v2b32_379">; +def v2b32_380 : NVPTXReg<"%v2b32_380">; +def v2b32_381 : NVPTXReg<"%v2b32_381">; +def v2b32_382 : NVPTXReg<"%v2b32_382">; +def v2b32_383 : NVPTXReg<"%v2b32_383">; +def v2b32_384 : NVPTXReg<"%v2b32_384">; +def v2b32_385 : NVPTXReg<"%v2b32_385">; +def v2b32_386 : NVPTXReg<"%v2b32_386">; +def v2b32_387 : NVPTXReg<"%v2b32_387">; +def v2b32_388 : NVPTXReg<"%v2b32_388">; +def v2b32_389 : NVPTXReg<"%v2b32_389">; +def v2b32_390 : NVPTXReg<"%v2b32_390">; +def v2b32_391 : NVPTXReg<"%v2b32_391">; +def v2b32_392 : NVPTXReg<"%v2b32_392">; +def v2b32_393 : NVPTXReg<"%v2b32_393">; +def v2b32_394 : NVPTXReg<"%v2b32_394">; +def v2b32_395 : NVPTXReg<"%v2b32_395">; +def v2b64_0 : NVPTXReg<"%v2b64_0">; +def v2b64_1 : NVPTXReg<"%v2b64_1">; +def v2b64_2 : NVPTXReg<"%v2b64_2">; +def v2b64_3 : NVPTXReg<"%v2b64_3">; +def v2b64_4 : NVPTXReg<"%v2b64_4">; +def v2b64_5 : NVPTXReg<"%v2b64_5">; +def v2b64_6 : NVPTXReg<"%v2b64_6">; +def v2b64_7 : NVPTXReg<"%v2b64_7">; +def v2b64_8 : NVPTXReg<"%v2b64_8">; +def v2b64_9 : NVPTXReg<"%v2b64_9">; +def v2b64_10 : NVPTXReg<"%v2b64_10">; +def v2b64_11 : NVPTXReg<"%v2b64_11">; +def v2b64_12 : NVPTXReg<"%v2b64_12">; +def v2b64_13 : NVPTXReg<"%v2b64_13">; +def v2b64_14 : NVPTXReg<"%v2b64_14">; +def v2b64_15 : NVPTXReg<"%v2b64_15">; +def v2b64_16 : NVPTXReg<"%v2b64_16">; +def v2b64_17 : NVPTXReg<"%v2b64_17">; +def v2b64_18 : NVPTXReg<"%v2b64_18">; +def v2b64_19 : NVPTXReg<"%v2b64_19">; +def v2b64_20 : NVPTXReg<"%v2b64_20">; +def v2b64_21 : NVPTXReg<"%v2b64_21">; +def v2b64_22 : NVPTXReg<"%v2b64_22">; +def v2b64_23 : NVPTXReg<"%v2b64_23">; +def v2b64_24 : NVPTXReg<"%v2b64_24">; +def v2b64_25 : NVPTXReg<"%v2b64_25">; +def v2b64_26 : NVPTXReg<"%v2b64_26">; +def v2b64_27 : NVPTXReg<"%v2b64_27">; +def v2b64_28 : NVPTXReg<"%v2b64_28">; +def v2b64_29 : NVPTXReg<"%v2b64_29">; +def v2b64_30 : NVPTXReg<"%v2b64_30">; +def v2b64_31 : NVPTXReg<"%v2b64_31">; +def v2b64_32 : NVPTXReg<"%v2b64_32">; +def v2b64_33 : NVPTXReg<"%v2b64_33">; +def v2b64_34 : NVPTXReg<"%v2b64_34">; +def v2b64_35 : NVPTXReg<"%v2b64_35">; +def v2b64_36 : NVPTXReg<"%v2b64_36">; +def v2b64_37 : NVPTXReg<"%v2b64_37">; +def v2b64_38 : NVPTXReg<"%v2b64_38">; +def v2b64_39 : NVPTXReg<"%v2b64_39">; +def v2b64_40 : NVPTXReg<"%v2b64_40">; +def v2b64_41 : NVPTXReg<"%v2b64_41">; +def v2b64_42 : NVPTXReg<"%v2b64_42">; +def v2b64_43 : NVPTXReg<"%v2b64_43">; +def v2b64_44 : NVPTXReg<"%v2b64_44">; +def v2b64_45 : NVPTXReg<"%v2b64_45">; +def v2b64_46 : NVPTXReg<"%v2b64_46">; +def v2b64_47 : NVPTXReg<"%v2b64_47">; +def v2b64_48 : NVPTXReg<"%v2b64_48">; +def v2b64_49 : NVPTXReg<"%v2b64_49">; +def v2b64_50 : NVPTXReg<"%v2b64_50">; +def v2b64_51 : NVPTXReg<"%v2b64_51">; +def v2b64_52 : NVPTXReg<"%v2b64_52">; +def v2b64_53 : NVPTXReg<"%v2b64_53">; +def v2b64_54 : NVPTXReg<"%v2b64_54">; +def v2b64_55 : NVPTXReg<"%v2b64_55">; +def v2b64_56 : NVPTXReg<"%v2b64_56">; +def v2b64_57 : NVPTXReg<"%v2b64_57">; +def v2b64_58 : NVPTXReg<"%v2b64_58">; +def v2b64_59 : NVPTXReg<"%v2b64_59">; +def v2b64_60 : NVPTXReg<"%v2b64_60">; +def v2b64_61 : NVPTXReg<"%v2b64_61">; +def v2b64_62 : NVPTXReg<"%v2b64_62">; +def v2b64_63 : NVPTXReg<"%v2b64_63">; +def v2b64_64 : NVPTXReg<"%v2b64_64">; +def v2b64_65 : NVPTXReg<"%v2b64_65">; +def v2b64_66 : NVPTXReg<"%v2b64_66">; +def v2b64_67 : NVPTXReg<"%v2b64_67">; +def v2b64_68 : NVPTXReg<"%v2b64_68">; +def v2b64_69 : NVPTXReg<"%v2b64_69">; +def v2b64_70 : NVPTXReg<"%v2b64_70">; +def v2b64_71 : NVPTXReg<"%v2b64_71">; +def v2b64_72 : NVPTXReg<"%v2b64_72">; +def v2b64_73 : NVPTXReg<"%v2b64_73">; +def v2b64_74 : NVPTXReg<"%v2b64_74">; +def v2b64_75 : NVPTXReg<"%v2b64_75">; +def v2b64_76 : NVPTXReg<"%v2b64_76">; +def v2b64_77 : NVPTXReg<"%v2b64_77">; +def v2b64_78 : NVPTXReg<"%v2b64_78">; +def v2b64_79 : NVPTXReg<"%v2b64_79">; +def v2b64_80 : NVPTXReg<"%v2b64_80">; +def v2b64_81 : NVPTXReg<"%v2b64_81">; +def v2b64_82 : NVPTXReg<"%v2b64_82">; +def v2b64_83 : NVPTXReg<"%v2b64_83">; +def v2b64_84 : NVPTXReg<"%v2b64_84">; +def v2b64_85 : NVPTXReg<"%v2b64_85">; +def v2b64_86 : NVPTXReg<"%v2b64_86">; +def v2b64_87 : NVPTXReg<"%v2b64_87">; +def v2b64_88 : NVPTXReg<"%v2b64_88">; +def v2b64_89 : NVPTXReg<"%v2b64_89">; +def v2b64_90 : NVPTXReg<"%v2b64_90">; +def v2b64_91 : NVPTXReg<"%v2b64_91">; +def v2b64_92 : NVPTXReg<"%v2b64_92">; +def v2b64_93 : NVPTXReg<"%v2b64_93">; +def v2b64_94 : NVPTXReg<"%v2b64_94">; +def v2b64_95 : NVPTXReg<"%v2b64_95">; +def v2b64_96 : NVPTXReg<"%v2b64_96">; +def v2b64_97 : NVPTXReg<"%v2b64_97">; +def v2b64_98 : NVPTXReg<"%v2b64_98">; +def v2b64_99 : NVPTXReg<"%v2b64_99">; +def v2b64_100 : NVPTXReg<"%v2b64_100">; +def v2b64_101 : NVPTXReg<"%v2b64_101">; +def v2b64_102 : NVPTXReg<"%v2b64_102">; +def v2b64_103 : NVPTXReg<"%v2b64_103">; +def v2b64_104 : NVPTXReg<"%v2b64_104">; +def v2b64_105 : NVPTXReg<"%v2b64_105">; +def v2b64_106 : NVPTXReg<"%v2b64_106">; +def v2b64_107 : NVPTXReg<"%v2b64_107">; +def v2b64_108 : NVPTXReg<"%v2b64_108">; +def v2b64_109 : NVPTXReg<"%v2b64_109">; +def v2b64_110 : NVPTXReg<"%v2b64_110">; +def v2b64_111 : NVPTXReg<"%v2b64_111">; +def v2b64_112 : NVPTXReg<"%v2b64_112">; +def v2b64_113 : NVPTXReg<"%v2b64_113">; +def v2b64_114 : NVPTXReg<"%v2b64_114">; +def v2b64_115 : NVPTXReg<"%v2b64_115">; +def v2b64_116 : NVPTXReg<"%v2b64_116">; +def v2b64_117 : NVPTXReg<"%v2b64_117">; +def v2b64_118 : NVPTXReg<"%v2b64_118">; +def v2b64_119 : NVPTXReg<"%v2b64_119">; +def v2b64_120 : NVPTXReg<"%v2b64_120">; +def v2b64_121 : NVPTXReg<"%v2b64_121">; +def v2b64_122 : NVPTXReg<"%v2b64_122">; +def v2b64_123 : NVPTXReg<"%v2b64_123">; +def v2b64_124 : NVPTXReg<"%v2b64_124">; +def v2b64_125 : NVPTXReg<"%v2b64_125">; +def v2b64_126 : NVPTXReg<"%v2b64_126">; +def v2b64_127 : NVPTXReg<"%v2b64_127">; +def v2b64_128 : NVPTXReg<"%v2b64_128">; +def v2b64_129 : NVPTXReg<"%v2b64_129">; +def v2b64_130 : NVPTXReg<"%v2b64_130">; +def v2b64_131 : NVPTXReg<"%v2b64_131">; +def v2b64_132 : NVPTXReg<"%v2b64_132">; +def v2b64_133 : NVPTXReg<"%v2b64_133">; +def v2b64_134 : NVPTXReg<"%v2b64_134">; +def v2b64_135 : NVPTXReg<"%v2b64_135">; +def v2b64_136 : NVPTXReg<"%v2b64_136">; +def v2b64_137 : NVPTXReg<"%v2b64_137">; +def v2b64_138 : NVPTXReg<"%v2b64_138">; +def v2b64_139 : NVPTXReg<"%v2b64_139">; +def v2b64_140 : NVPTXReg<"%v2b64_140">; +def v2b64_141 : NVPTXReg<"%v2b64_141">; +def v2b64_142 : NVPTXReg<"%v2b64_142">; +def v2b64_143 : NVPTXReg<"%v2b64_143">; +def v2b64_144 : NVPTXReg<"%v2b64_144">; +def v2b64_145 : NVPTXReg<"%v2b64_145">; +def v2b64_146 : NVPTXReg<"%v2b64_146">; +def v2b64_147 : NVPTXReg<"%v2b64_147">; +def v2b64_148 : NVPTXReg<"%v2b64_148">; +def v2b64_149 : NVPTXReg<"%v2b64_149">; +def v2b64_150 : NVPTXReg<"%v2b64_150">; +def v2b64_151 : NVPTXReg<"%v2b64_151">; +def v2b64_152 : NVPTXReg<"%v2b64_152">; +def v2b64_153 : NVPTXReg<"%v2b64_153">; +def v2b64_154 : NVPTXReg<"%v2b64_154">; +def v2b64_155 : NVPTXReg<"%v2b64_155">; +def v2b64_156 : NVPTXReg<"%v2b64_156">; +def v2b64_157 : NVPTXReg<"%v2b64_157">; +def v2b64_158 : NVPTXReg<"%v2b64_158">; +def v2b64_159 : NVPTXReg<"%v2b64_159">; +def v2b64_160 : NVPTXReg<"%v2b64_160">; +def v2b64_161 : NVPTXReg<"%v2b64_161">; +def v2b64_162 : NVPTXReg<"%v2b64_162">; +def v2b64_163 : NVPTXReg<"%v2b64_163">; +def v2b64_164 : NVPTXReg<"%v2b64_164">; +def v2b64_165 : NVPTXReg<"%v2b64_165">; +def v2b64_166 : NVPTXReg<"%v2b64_166">; +def v2b64_167 : NVPTXReg<"%v2b64_167">; +def v2b64_168 : NVPTXReg<"%v2b64_168">; +def v2b64_169 : NVPTXReg<"%v2b64_169">; +def v2b64_170 : NVPTXReg<"%v2b64_170">; +def v2b64_171 : NVPTXReg<"%v2b64_171">; +def v2b64_172 : NVPTXReg<"%v2b64_172">; +def v2b64_173 : NVPTXReg<"%v2b64_173">; +def v2b64_174 : NVPTXReg<"%v2b64_174">; +def v2b64_175 : NVPTXReg<"%v2b64_175">; +def v2b64_176 : NVPTXReg<"%v2b64_176">; +def v2b64_177 : NVPTXReg<"%v2b64_177">; +def v2b64_178 : NVPTXReg<"%v2b64_178">; +def v2b64_179 : NVPTXReg<"%v2b64_179">; +def v2b64_180 : NVPTXReg<"%v2b64_180">; +def v2b64_181 : NVPTXReg<"%v2b64_181">; +def v2b64_182 : NVPTXReg<"%v2b64_182">; +def v2b64_183 : NVPTXReg<"%v2b64_183">; +def v2b64_184 : NVPTXReg<"%v2b64_184">; +def v2b64_185 : NVPTXReg<"%v2b64_185">; +def v2b64_186 : NVPTXReg<"%v2b64_186">; +def v2b64_187 : NVPTXReg<"%v2b64_187">; +def v2b64_188 : NVPTXReg<"%v2b64_188">; +def v2b64_189 : NVPTXReg<"%v2b64_189">; +def v2b64_190 : NVPTXReg<"%v2b64_190">; +def v2b64_191 : NVPTXReg<"%v2b64_191">; +def v2b64_192 : NVPTXReg<"%v2b64_192">; +def v2b64_193 : NVPTXReg<"%v2b64_193">; +def v2b64_194 : NVPTXReg<"%v2b64_194">; +def v2b64_195 : NVPTXReg<"%v2b64_195">; +def v2b64_196 : NVPTXReg<"%v2b64_196">; +def v2b64_197 : NVPTXReg<"%v2b64_197">; +def v2b64_198 : NVPTXReg<"%v2b64_198">; +def v2b64_199 : NVPTXReg<"%v2b64_199">; +def v2b64_200 : NVPTXReg<"%v2b64_200">; +def v2b64_201 : NVPTXReg<"%v2b64_201">; +def v2b64_202 : NVPTXReg<"%v2b64_202">; +def v2b64_203 : NVPTXReg<"%v2b64_203">; +def v2b64_204 : NVPTXReg<"%v2b64_204">; +def v2b64_205 : NVPTXReg<"%v2b64_205">; +def v2b64_206 : NVPTXReg<"%v2b64_206">; +def v2b64_207 : NVPTXReg<"%v2b64_207">; +def v2b64_208 : NVPTXReg<"%v2b64_208">; +def v2b64_209 : NVPTXReg<"%v2b64_209">; +def v2b64_210 : NVPTXReg<"%v2b64_210">; +def v2b64_211 : NVPTXReg<"%v2b64_211">; +def v2b64_212 : NVPTXReg<"%v2b64_212">; +def v2b64_213 : NVPTXReg<"%v2b64_213">; +def v2b64_214 : NVPTXReg<"%v2b64_214">; +def v2b64_215 : NVPTXReg<"%v2b64_215">; +def v2b64_216 : NVPTXReg<"%v2b64_216">; +def v2b64_217 : NVPTXReg<"%v2b64_217">; +def v2b64_218 : NVPTXReg<"%v2b64_218">; +def v2b64_219 : NVPTXReg<"%v2b64_219">; +def v2b64_220 : NVPTXReg<"%v2b64_220">; +def v2b64_221 : NVPTXReg<"%v2b64_221">; +def v2b64_222 : NVPTXReg<"%v2b64_222">; +def v2b64_223 : NVPTXReg<"%v2b64_223">; +def v2b64_224 : NVPTXReg<"%v2b64_224">; +def v2b64_225 : NVPTXReg<"%v2b64_225">; +def v2b64_226 : NVPTXReg<"%v2b64_226">; +def v2b64_227 : NVPTXReg<"%v2b64_227">; +def v2b64_228 : NVPTXReg<"%v2b64_228">; +def v2b64_229 : NVPTXReg<"%v2b64_229">; +def v2b64_230 : NVPTXReg<"%v2b64_230">; +def v2b64_231 : NVPTXReg<"%v2b64_231">; +def v2b64_232 : NVPTXReg<"%v2b64_232">; +def v2b64_233 : NVPTXReg<"%v2b64_233">; +def v2b64_234 : NVPTXReg<"%v2b64_234">; +def v2b64_235 : NVPTXReg<"%v2b64_235">; +def v2b64_236 : NVPTXReg<"%v2b64_236">; +def v2b64_237 : NVPTXReg<"%v2b64_237">; +def v2b64_238 : NVPTXReg<"%v2b64_238">; +def v2b64_239 : NVPTXReg<"%v2b64_239">; +def v2b64_240 : NVPTXReg<"%v2b64_240">; +def v2b64_241 : NVPTXReg<"%v2b64_241">; +def v2b64_242 : NVPTXReg<"%v2b64_242">; +def v2b64_243 : NVPTXReg<"%v2b64_243">; +def v2b64_244 : NVPTXReg<"%v2b64_244">; +def v2b64_245 : NVPTXReg<"%v2b64_245">; +def v2b64_246 : NVPTXReg<"%v2b64_246">; +def v2b64_247 : NVPTXReg<"%v2b64_247">; +def v2b64_248 : NVPTXReg<"%v2b64_248">; +def v2b64_249 : NVPTXReg<"%v2b64_249">; +def v2b64_250 : NVPTXReg<"%v2b64_250">; +def v2b64_251 : NVPTXReg<"%v2b64_251">; +def v2b64_252 : NVPTXReg<"%v2b64_252">; +def v2b64_253 : NVPTXReg<"%v2b64_253">; +def v2b64_254 : NVPTXReg<"%v2b64_254">; +def v2b64_255 : NVPTXReg<"%v2b64_255">; +def v2b64_256 : NVPTXReg<"%v2b64_256">; +def v2b64_257 : NVPTXReg<"%v2b64_257">; +def v2b64_258 : NVPTXReg<"%v2b64_258">; +def v2b64_259 : NVPTXReg<"%v2b64_259">; +def v2b64_260 : NVPTXReg<"%v2b64_260">; +def v2b64_261 : NVPTXReg<"%v2b64_261">; +def v2b64_262 : NVPTXReg<"%v2b64_262">; +def v2b64_263 : NVPTXReg<"%v2b64_263">; +def v2b64_264 : NVPTXReg<"%v2b64_264">; +def v2b64_265 : NVPTXReg<"%v2b64_265">; +def v2b64_266 : NVPTXReg<"%v2b64_266">; +def v2b64_267 : NVPTXReg<"%v2b64_267">; +def v2b64_268 : NVPTXReg<"%v2b64_268">; +def v2b64_269 : NVPTXReg<"%v2b64_269">; +def v2b64_270 : NVPTXReg<"%v2b64_270">; +def v2b64_271 : NVPTXReg<"%v2b64_271">; +def v2b64_272 : NVPTXReg<"%v2b64_272">; +def v2b64_273 : NVPTXReg<"%v2b64_273">; +def v2b64_274 : NVPTXReg<"%v2b64_274">; +def v2b64_275 : NVPTXReg<"%v2b64_275">; +def v2b64_276 : NVPTXReg<"%v2b64_276">; +def v2b64_277 : NVPTXReg<"%v2b64_277">; +def v2b64_278 : NVPTXReg<"%v2b64_278">; +def v2b64_279 : NVPTXReg<"%v2b64_279">; +def v2b64_280 : NVPTXReg<"%v2b64_280">; +def v2b64_281 : NVPTXReg<"%v2b64_281">; +def v2b64_282 : NVPTXReg<"%v2b64_282">; +def v2b64_283 : NVPTXReg<"%v2b64_283">; +def v2b64_284 : NVPTXReg<"%v2b64_284">; +def v2b64_285 : NVPTXReg<"%v2b64_285">; +def v2b64_286 : NVPTXReg<"%v2b64_286">; +def v2b64_287 : NVPTXReg<"%v2b64_287">; +def v2b64_288 : NVPTXReg<"%v2b64_288">; +def v2b64_289 : NVPTXReg<"%v2b64_289">; +def v2b64_290 : NVPTXReg<"%v2b64_290">; +def v2b64_291 : NVPTXReg<"%v2b64_291">; +def v2b64_292 : NVPTXReg<"%v2b64_292">; +def v2b64_293 : NVPTXReg<"%v2b64_293">; +def v2b64_294 : NVPTXReg<"%v2b64_294">; +def v2b64_295 : NVPTXReg<"%v2b64_295">; +def v2b64_296 : NVPTXReg<"%v2b64_296">; +def v2b64_297 : NVPTXReg<"%v2b64_297">; +def v2b64_298 : NVPTXReg<"%v2b64_298">; +def v2b64_299 : NVPTXReg<"%v2b64_299">; +def v2b64_300 : NVPTXReg<"%v2b64_300">; +def v2b64_301 : NVPTXReg<"%v2b64_301">; +def v2b64_302 : NVPTXReg<"%v2b64_302">; +def v2b64_303 : NVPTXReg<"%v2b64_303">; +def v2b64_304 : NVPTXReg<"%v2b64_304">; +def v2b64_305 : NVPTXReg<"%v2b64_305">; +def v2b64_306 : NVPTXReg<"%v2b64_306">; +def v2b64_307 : NVPTXReg<"%v2b64_307">; +def v2b64_308 : NVPTXReg<"%v2b64_308">; +def v2b64_309 : NVPTXReg<"%v2b64_309">; +def v2b64_310 : NVPTXReg<"%v2b64_310">; +def v2b64_311 : NVPTXReg<"%v2b64_311">; +def v2b64_312 : NVPTXReg<"%v2b64_312">; +def v2b64_313 : NVPTXReg<"%v2b64_313">; +def v2b64_314 : NVPTXReg<"%v2b64_314">; +def v2b64_315 : NVPTXReg<"%v2b64_315">; +def v2b64_316 : NVPTXReg<"%v2b64_316">; +def v2b64_317 : NVPTXReg<"%v2b64_317">; +def v2b64_318 : NVPTXReg<"%v2b64_318">; +def v2b64_319 : NVPTXReg<"%v2b64_319">; +def v2b64_320 : NVPTXReg<"%v2b64_320">; +def v2b64_321 : NVPTXReg<"%v2b64_321">; +def v2b64_322 : NVPTXReg<"%v2b64_322">; +def v2b64_323 : NVPTXReg<"%v2b64_323">; +def v2b64_324 : NVPTXReg<"%v2b64_324">; +def v2b64_325 : NVPTXReg<"%v2b64_325">; +def v2b64_326 : NVPTXReg<"%v2b64_326">; +def v2b64_327 : NVPTXReg<"%v2b64_327">; +def v2b64_328 : NVPTXReg<"%v2b64_328">; +def v2b64_329 : NVPTXReg<"%v2b64_329">; +def v2b64_330 : NVPTXReg<"%v2b64_330">; +def v2b64_331 : NVPTXReg<"%v2b64_331">; +def v2b64_332 : NVPTXReg<"%v2b64_332">; +def v2b64_333 : NVPTXReg<"%v2b64_333">; +def v2b64_334 : NVPTXReg<"%v2b64_334">; +def v2b64_335 : NVPTXReg<"%v2b64_335">; +def v2b64_336 : NVPTXReg<"%v2b64_336">; +def v2b64_337 : NVPTXReg<"%v2b64_337">; +def v2b64_338 : NVPTXReg<"%v2b64_338">; +def v2b64_339 : NVPTXReg<"%v2b64_339">; +def v2b64_340 : NVPTXReg<"%v2b64_340">; +def v2b64_341 : NVPTXReg<"%v2b64_341">; +def v2b64_342 : NVPTXReg<"%v2b64_342">; +def v2b64_343 : NVPTXReg<"%v2b64_343">; +def v2b64_344 : NVPTXReg<"%v2b64_344">; +def v2b64_345 : NVPTXReg<"%v2b64_345">; +def v2b64_346 : NVPTXReg<"%v2b64_346">; +def v2b64_347 : NVPTXReg<"%v2b64_347">; +def v2b64_348 : NVPTXReg<"%v2b64_348">; +def v2b64_349 : NVPTXReg<"%v2b64_349">; +def v2b64_350 : NVPTXReg<"%v2b64_350">; +def v2b64_351 : NVPTXReg<"%v2b64_351">; +def v2b64_352 : NVPTXReg<"%v2b64_352">; +def v2b64_353 : NVPTXReg<"%v2b64_353">; +def v2b64_354 : NVPTXReg<"%v2b64_354">; +def v2b64_355 : NVPTXReg<"%v2b64_355">; +def v2b64_356 : NVPTXReg<"%v2b64_356">; +def v2b64_357 : NVPTXReg<"%v2b64_357">; +def v2b64_358 : NVPTXReg<"%v2b64_358">; +def v2b64_359 : NVPTXReg<"%v2b64_359">; +def v2b64_360 : NVPTXReg<"%v2b64_360">; +def v2b64_361 : NVPTXReg<"%v2b64_361">; +def v2b64_362 : NVPTXReg<"%v2b64_362">; +def v2b64_363 : NVPTXReg<"%v2b64_363">; +def v2b64_364 : NVPTXReg<"%v2b64_364">; +def v2b64_365 : NVPTXReg<"%v2b64_365">; +def v2b64_366 : NVPTXReg<"%v2b64_366">; +def v2b64_367 : NVPTXReg<"%v2b64_367">; +def v2b64_368 : NVPTXReg<"%v2b64_368">; +def v2b64_369 : NVPTXReg<"%v2b64_369">; +def v2b64_370 : NVPTXReg<"%v2b64_370">; +def v2b64_371 : NVPTXReg<"%v2b64_371">; +def v2b64_372 : NVPTXReg<"%v2b64_372">; +def v2b64_373 : NVPTXReg<"%v2b64_373">; +def v2b64_374 : NVPTXReg<"%v2b64_374">; +def v2b64_375 : NVPTXReg<"%v2b64_375">; +def v2b64_376 : NVPTXReg<"%v2b64_376">; +def v2b64_377 : NVPTXReg<"%v2b64_377">; +def v2b64_378 : NVPTXReg<"%v2b64_378">; +def v2b64_379 : NVPTXReg<"%v2b64_379">; +def v2b64_380 : NVPTXReg<"%v2b64_380">; +def v2b64_381 : NVPTXReg<"%v2b64_381">; +def v2b64_382 : NVPTXReg<"%v2b64_382">; +def v2b64_383 : NVPTXReg<"%v2b64_383">; +def v2b64_384 : NVPTXReg<"%v2b64_384">; +def v2b64_385 : NVPTXReg<"%v2b64_385">; +def v2b64_386 : NVPTXReg<"%v2b64_386">; +def v2b64_387 : NVPTXReg<"%v2b64_387">; +def v2b64_388 : NVPTXReg<"%v2b64_388">; +def v2b64_389 : NVPTXReg<"%v2b64_389">; +def v2b64_390 : NVPTXReg<"%v2b64_390">; +def v2b64_391 : NVPTXReg<"%v2b64_391">; +def v2b64_392 : NVPTXReg<"%v2b64_392">; +def v2b64_393 : NVPTXReg<"%v2b64_393">; +def v2b64_394 : NVPTXReg<"%v2b64_394">; +def v2b64_395 : NVPTXReg<"%v2b64_395">; +def v4b8_0 : NVPTXReg<"%v4b8_0">; +def v4b8_1 : NVPTXReg<"%v4b8_1">; +def v4b8_2 : NVPTXReg<"%v4b8_2">; +def v4b8_3 : NVPTXReg<"%v4b8_3">; +def v4b8_4 : NVPTXReg<"%v4b8_4">; +def v4b8_5 : NVPTXReg<"%v4b8_5">; +def v4b8_6 : NVPTXReg<"%v4b8_6">; +def v4b8_7 : NVPTXReg<"%v4b8_7">; +def v4b8_8 : NVPTXReg<"%v4b8_8">; +def v4b8_9 : NVPTXReg<"%v4b8_9">; +def v4b8_10 : NVPTXReg<"%v4b8_10">; +def v4b8_11 : NVPTXReg<"%v4b8_11">; +def v4b8_12 : NVPTXReg<"%v4b8_12">; +def v4b8_13 : NVPTXReg<"%v4b8_13">; +def v4b8_14 : NVPTXReg<"%v4b8_14">; +def v4b8_15 : NVPTXReg<"%v4b8_15">; +def v4b8_16 : NVPTXReg<"%v4b8_16">; +def v4b8_17 : NVPTXReg<"%v4b8_17">; +def v4b8_18 : NVPTXReg<"%v4b8_18">; +def v4b8_19 : NVPTXReg<"%v4b8_19">; +def v4b8_20 : NVPTXReg<"%v4b8_20">; +def v4b8_21 : NVPTXReg<"%v4b8_21">; +def v4b8_22 : NVPTXReg<"%v4b8_22">; +def v4b8_23 : NVPTXReg<"%v4b8_23">; +def v4b8_24 : NVPTXReg<"%v4b8_24">; +def v4b8_25 : NVPTXReg<"%v4b8_25">; +def v4b8_26 : NVPTXReg<"%v4b8_26">; +def v4b8_27 : NVPTXReg<"%v4b8_27">; +def v4b8_28 : NVPTXReg<"%v4b8_28">; +def v4b8_29 : NVPTXReg<"%v4b8_29">; +def v4b8_30 : NVPTXReg<"%v4b8_30">; +def v4b8_31 : NVPTXReg<"%v4b8_31">; +def v4b8_32 : NVPTXReg<"%v4b8_32">; +def v4b8_33 : NVPTXReg<"%v4b8_33">; +def v4b8_34 : NVPTXReg<"%v4b8_34">; +def v4b8_35 : NVPTXReg<"%v4b8_35">; +def v4b8_36 : NVPTXReg<"%v4b8_36">; +def v4b8_37 : NVPTXReg<"%v4b8_37">; +def v4b8_38 : NVPTXReg<"%v4b8_38">; +def v4b8_39 : NVPTXReg<"%v4b8_39">; +def v4b8_40 : NVPTXReg<"%v4b8_40">; +def v4b8_41 : NVPTXReg<"%v4b8_41">; +def v4b8_42 : NVPTXReg<"%v4b8_42">; +def v4b8_43 : NVPTXReg<"%v4b8_43">; +def v4b8_44 : NVPTXReg<"%v4b8_44">; +def v4b8_45 : NVPTXReg<"%v4b8_45">; +def v4b8_46 : NVPTXReg<"%v4b8_46">; +def v4b8_47 : NVPTXReg<"%v4b8_47">; +def v4b8_48 : NVPTXReg<"%v4b8_48">; +def v4b8_49 : NVPTXReg<"%v4b8_49">; +def v4b8_50 : NVPTXReg<"%v4b8_50">; +def v4b8_51 : NVPTXReg<"%v4b8_51">; +def v4b8_52 : NVPTXReg<"%v4b8_52">; +def v4b8_53 : NVPTXReg<"%v4b8_53">; +def v4b8_54 : NVPTXReg<"%v4b8_54">; +def v4b8_55 : NVPTXReg<"%v4b8_55">; +def v4b8_56 : NVPTXReg<"%v4b8_56">; +def v4b8_57 : NVPTXReg<"%v4b8_57">; +def v4b8_58 : NVPTXReg<"%v4b8_58">; +def v4b8_59 : NVPTXReg<"%v4b8_59">; +def v4b8_60 : NVPTXReg<"%v4b8_60">; +def v4b8_61 : NVPTXReg<"%v4b8_61">; +def v4b8_62 : NVPTXReg<"%v4b8_62">; +def v4b8_63 : NVPTXReg<"%v4b8_63">; +def v4b8_64 : NVPTXReg<"%v4b8_64">; +def v4b8_65 : NVPTXReg<"%v4b8_65">; +def v4b8_66 : NVPTXReg<"%v4b8_66">; +def v4b8_67 : NVPTXReg<"%v4b8_67">; +def v4b8_68 : NVPTXReg<"%v4b8_68">; +def v4b8_69 : NVPTXReg<"%v4b8_69">; +def v4b8_70 : NVPTXReg<"%v4b8_70">; +def v4b8_71 : NVPTXReg<"%v4b8_71">; +def v4b8_72 : NVPTXReg<"%v4b8_72">; +def v4b8_73 : NVPTXReg<"%v4b8_73">; +def v4b8_74 : NVPTXReg<"%v4b8_74">; +def v4b8_75 : NVPTXReg<"%v4b8_75">; +def v4b8_76 : NVPTXReg<"%v4b8_76">; +def v4b8_77 : NVPTXReg<"%v4b8_77">; +def v4b8_78 : NVPTXReg<"%v4b8_78">; +def v4b8_79 : NVPTXReg<"%v4b8_79">; +def v4b8_80 : NVPTXReg<"%v4b8_80">; +def v4b8_81 : NVPTXReg<"%v4b8_81">; +def v4b8_82 : NVPTXReg<"%v4b8_82">; +def v4b8_83 : NVPTXReg<"%v4b8_83">; +def v4b8_84 : NVPTXReg<"%v4b8_84">; +def v4b8_85 : NVPTXReg<"%v4b8_85">; +def v4b8_86 : NVPTXReg<"%v4b8_86">; +def v4b8_87 : NVPTXReg<"%v4b8_87">; +def v4b8_88 : NVPTXReg<"%v4b8_88">; +def v4b8_89 : NVPTXReg<"%v4b8_89">; +def v4b8_90 : NVPTXReg<"%v4b8_90">; +def v4b8_91 : NVPTXReg<"%v4b8_91">; +def v4b8_92 : NVPTXReg<"%v4b8_92">; +def v4b8_93 : NVPTXReg<"%v4b8_93">; +def v4b8_94 : NVPTXReg<"%v4b8_94">; +def v4b8_95 : NVPTXReg<"%v4b8_95">; +def v4b8_96 : NVPTXReg<"%v4b8_96">; +def v4b8_97 : NVPTXReg<"%v4b8_97">; +def v4b8_98 : NVPTXReg<"%v4b8_98">; +def v4b8_99 : NVPTXReg<"%v4b8_99">; +def v4b8_100 : NVPTXReg<"%v4b8_100">; +def v4b8_101 : NVPTXReg<"%v4b8_101">; +def v4b8_102 : NVPTXReg<"%v4b8_102">; +def v4b8_103 : NVPTXReg<"%v4b8_103">; +def v4b8_104 : NVPTXReg<"%v4b8_104">; +def v4b8_105 : NVPTXReg<"%v4b8_105">; +def v4b8_106 : NVPTXReg<"%v4b8_106">; +def v4b8_107 : NVPTXReg<"%v4b8_107">; +def v4b8_108 : NVPTXReg<"%v4b8_108">; +def v4b8_109 : NVPTXReg<"%v4b8_109">; +def v4b8_110 : NVPTXReg<"%v4b8_110">; +def v4b8_111 : NVPTXReg<"%v4b8_111">; +def v4b8_112 : NVPTXReg<"%v4b8_112">; +def v4b8_113 : NVPTXReg<"%v4b8_113">; +def v4b8_114 : NVPTXReg<"%v4b8_114">; +def v4b8_115 : NVPTXReg<"%v4b8_115">; +def v4b8_116 : NVPTXReg<"%v4b8_116">; +def v4b8_117 : NVPTXReg<"%v4b8_117">; +def v4b8_118 : NVPTXReg<"%v4b8_118">; +def v4b8_119 : NVPTXReg<"%v4b8_119">; +def v4b8_120 : NVPTXReg<"%v4b8_120">; +def v4b8_121 : NVPTXReg<"%v4b8_121">; +def v4b8_122 : NVPTXReg<"%v4b8_122">; +def v4b8_123 : NVPTXReg<"%v4b8_123">; +def v4b8_124 : NVPTXReg<"%v4b8_124">; +def v4b8_125 : NVPTXReg<"%v4b8_125">; +def v4b8_126 : NVPTXReg<"%v4b8_126">; +def v4b8_127 : NVPTXReg<"%v4b8_127">; +def v4b8_128 : NVPTXReg<"%v4b8_128">; +def v4b8_129 : NVPTXReg<"%v4b8_129">; +def v4b8_130 : NVPTXReg<"%v4b8_130">; +def v4b8_131 : NVPTXReg<"%v4b8_131">; +def v4b8_132 : NVPTXReg<"%v4b8_132">; +def v4b8_133 : NVPTXReg<"%v4b8_133">; +def v4b8_134 : NVPTXReg<"%v4b8_134">; +def v4b8_135 : NVPTXReg<"%v4b8_135">; +def v4b8_136 : NVPTXReg<"%v4b8_136">; +def v4b8_137 : NVPTXReg<"%v4b8_137">; +def v4b8_138 : NVPTXReg<"%v4b8_138">; +def v4b8_139 : NVPTXReg<"%v4b8_139">; +def v4b8_140 : NVPTXReg<"%v4b8_140">; +def v4b8_141 : NVPTXReg<"%v4b8_141">; +def v4b8_142 : NVPTXReg<"%v4b8_142">; +def v4b8_143 : NVPTXReg<"%v4b8_143">; +def v4b8_144 : NVPTXReg<"%v4b8_144">; +def v4b8_145 : NVPTXReg<"%v4b8_145">; +def v4b8_146 : NVPTXReg<"%v4b8_146">; +def v4b8_147 : NVPTXReg<"%v4b8_147">; +def v4b8_148 : NVPTXReg<"%v4b8_148">; +def v4b8_149 : NVPTXReg<"%v4b8_149">; +def v4b8_150 : NVPTXReg<"%v4b8_150">; +def v4b8_151 : NVPTXReg<"%v4b8_151">; +def v4b8_152 : NVPTXReg<"%v4b8_152">; +def v4b8_153 : NVPTXReg<"%v4b8_153">; +def v4b8_154 : NVPTXReg<"%v4b8_154">; +def v4b8_155 : NVPTXReg<"%v4b8_155">; +def v4b8_156 : NVPTXReg<"%v4b8_156">; +def v4b8_157 : NVPTXReg<"%v4b8_157">; +def v4b8_158 : NVPTXReg<"%v4b8_158">; +def v4b8_159 : NVPTXReg<"%v4b8_159">; +def v4b8_160 : NVPTXReg<"%v4b8_160">; +def v4b8_161 : NVPTXReg<"%v4b8_161">; +def v4b8_162 : NVPTXReg<"%v4b8_162">; +def v4b8_163 : NVPTXReg<"%v4b8_163">; +def v4b8_164 : NVPTXReg<"%v4b8_164">; +def v4b8_165 : NVPTXReg<"%v4b8_165">; +def v4b8_166 : NVPTXReg<"%v4b8_166">; +def v4b8_167 : NVPTXReg<"%v4b8_167">; +def v4b8_168 : NVPTXReg<"%v4b8_168">; +def v4b8_169 : NVPTXReg<"%v4b8_169">; +def v4b8_170 : NVPTXReg<"%v4b8_170">; +def v4b8_171 : NVPTXReg<"%v4b8_171">; +def v4b8_172 : NVPTXReg<"%v4b8_172">; +def v4b8_173 : NVPTXReg<"%v4b8_173">; +def v4b8_174 : NVPTXReg<"%v4b8_174">; +def v4b8_175 : NVPTXReg<"%v4b8_175">; +def v4b8_176 : NVPTXReg<"%v4b8_176">; +def v4b8_177 : NVPTXReg<"%v4b8_177">; +def v4b8_178 : NVPTXReg<"%v4b8_178">; +def v4b8_179 : NVPTXReg<"%v4b8_179">; +def v4b8_180 : NVPTXReg<"%v4b8_180">; +def v4b8_181 : NVPTXReg<"%v4b8_181">; +def v4b8_182 : NVPTXReg<"%v4b8_182">; +def v4b8_183 : NVPTXReg<"%v4b8_183">; +def v4b8_184 : NVPTXReg<"%v4b8_184">; +def v4b8_185 : NVPTXReg<"%v4b8_185">; +def v4b8_186 : NVPTXReg<"%v4b8_186">; +def v4b8_187 : NVPTXReg<"%v4b8_187">; +def v4b8_188 : NVPTXReg<"%v4b8_188">; +def v4b8_189 : NVPTXReg<"%v4b8_189">; +def v4b8_190 : NVPTXReg<"%v4b8_190">; +def v4b8_191 : NVPTXReg<"%v4b8_191">; +def v4b8_192 : NVPTXReg<"%v4b8_192">; +def v4b8_193 : NVPTXReg<"%v4b8_193">; +def v4b8_194 : NVPTXReg<"%v4b8_194">; +def v4b8_195 : NVPTXReg<"%v4b8_195">; +def v4b8_196 : NVPTXReg<"%v4b8_196">; +def v4b8_197 : NVPTXReg<"%v4b8_197">; +def v4b8_198 : NVPTXReg<"%v4b8_198">; +def v4b8_199 : NVPTXReg<"%v4b8_199">; +def v4b8_200 : NVPTXReg<"%v4b8_200">; +def v4b8_201 : NVPTXReg<"%v4b8_201">; +def v4b8_202 : NVPTXReg<"%v4b8_202">; +def v4b8_203 : NVPTXReg<"%v4b8_203">; +def v4b8_204 : NVPTXReg<"%v4b8_204">; +def v4b8_205 : NVPTXReg<"%v4b8_205">; +def v4b8_206 : NVPTXReg<"%v4b8_206">; +def v4b8_207 : NVPTXReg<"%v4b8_207">; +def v4b8_208 : NVPTXReg<"%v4b8_208">; +def v4b8_209 : NVPTXReg<"%v4b8_209">; +def v4b8_210 : NVPTXReg<"%v4b8_210">; +def v4b8_211 : NVPTXReg<"%v4b8_211">; +def v4b8_212 : NVPTXReg<"%v4b8_212">; +def v4b8_213 : NVPTXReg<"%v4b8_213">; +def v4b8_214 : NVPTXReg<"%v4b8_214">; +def v4b8_215 : NVPTXReg<"%v4b8_215">; +def v4b8_216 : NVPTXReg<"%v4b8_216">; +def v4b8_217 : NVPTXReg<"%v4b8_217">; +def v4b8_218 : NVPTXReg<"%v4b8_218">; +def v4b8_219 : NVPTXReg<"%v4b8_219">; +def v4b8_220 : NVPTXReg<"%v4b8_220">; +def v4b8_221 : NVPTXReg<"%v4b8_221">; +def v4b8_222 : NVPTXReg<"%v4b8_222">; +def v4b8_223 : NVPTXReg<"%v4b8_223">; +def v4b8_224 : NVPTXReg<"%v4b8_224">; +def v4b8_225 : NVPTXReg<"%v4b8_225">; +def v4b8_226 : NVPTXReg<"%v4b8_226">; +def v4b8_227 : NVPTXReg<"%v4b8_227">; +def v4b8_228 : NVPTXReg<"%v4b8_228">; +def v4b8_229 : NVPTXReg<"%v4b8_229">; +def v4b8_230 : NVPTXReg<"%v4b8_230">; +def v4b8_231 : NVPTXReg<"%v4b8_231">; +def v4b8_232 : NVPTXReg<"%v4b8_232">; +def v4b8_233 : NVPTXReg<"%v4b8_233">; +def v4b8_234 : NVPTXReg<"%v4b8_234">; +def v4b8_235 : NVPTXReg<"%v4b8_235">; +def v4b8_236 : NVPTXReg<"%v4b8_236">; +def v4b8_237 : NVPTXReg<"%v4b8_237">; +def v4b8_238 : NVPTXReg<"%v4b8_238">; +def v4b8_239 : NVPTXReg<"%v4b8_239">; +def v4b8_240 : NVPTXReg<"%v4b8_240">; +def v4b8_241 : NVPTXReg<"%v4b8_241">; +def v4b8_242 : NVPTXReg<"%v4b8_242">; +def v4b8_243 : NVPTXReg<"%v4b8_243">; +def v4b8_244 : NVPTXReg<"%v4b8_244">; +def v4b8_245 : NVPTXReg<"%v4b8_245">; +def v4b8_246 : NVPTXReg<"%v4b8_246">; +def v4b8_247 : NVPTXReg<"%v4b8_247">; +def v4b8_248 : NVPTXReg<"%v4b8_248">; +def v4b8_249 : NVPTXReg<"%v4b8_249">; +def v4b8_250 : NVPTXReg<"%v4b8_250">; +def v4b8_251 : NVPTXReg<"%v4b8_251">; +def v4b8_252 : NVPTXReg<"%v4b8_252">; +def v4b8_253 : NVPTXReg<"%v4b8_253">; +def v4b8_254 : NVPTXReg<"%v4b8_254">; +def v4b8_255 : NVPTXReg<"%v4b8_255">; +def v4b8_256 : NVPTXReg<"%v4b8_256">; +def v4b8_257 : NVPTXReg<"%v4b8_257">; +def v4b8_258 : NVPTXReg<"%v4b8_258">; +def v4b8_259 : NVPTXReg<"%v4b8_259">; +def v4b8_260 : NVPTXReg<"%v4b8_260">; +def v4b8_261 : NVPTXReg<"%v4b8_261">; +def v4b8_262 : NVPTXReg<"%v4b8_262">; +def v4b8_263 : NVPTXReg<"%v4b8_263">; +def v4b8_264 : NVPTXReg<"%v4b8_264">; +def v4b8_265 : NVPTXReg<"%v4b8_265">; +def v4b8_266 : NVPTXReg<"%v4b8_266">; +def v4b8_267 : NVPTXReg<"%v4b8_267">; +def v4b8_268 : NVPTXReg<"%v4b8_268">; +def v4b8_269 : NVPTXReg<"%v4b8_269">; +def v4b8_270 : NVPTXReg<"%v4b8_270">; +def v4b8_271 : NVPTXReg<"%v4b8_271">; +def v4b8_272 : NVPTXReg<"%v4b8_272">; +def v4b8_273 : NVPTXReg<"%v4b8_273">; +def v4b8_274 : NVPTXReg<"%v4b8_274">; +def v4b8_275 : NVPTXReg<"%v4b8_275">; +def v4b8_276 : NVPTXReg<"%v4b8_276">; +def v4b8_277 : NVPTXReg<"%v4b8_277">; +def v4b8_278 : NVPTXReg<"%v4b8_278">; +def v4b8_279 : NVPTXReg<"%v4b8_279">; +def v4b8_280 : NVPTXReg<"%v4b8_280">; +def v4b8_281 : NVPTXReg<"%v4b8_281">; +def v4b8_282 : NVPTXReg<"%v4b8_282">; +def v4b8_283 : NVPTXReg<"%v4b8_283">; +def v4b8_284 : NVPTXReg<"%v4b8_284">; +def v4b8_285 : NVPTXReg<"%v4b8_285">; +def v4b8_286 : NVPTXReg<"%v4b8_286">; +def v4b8_287 : NVPTXReg<"%v4b8_287">; +def v4b8_288 : NVPTXReg<"%v4b8_288">; +def v4b8_289 : NVPTXReg<"%v4b8_289">; +def v4b8_290 : NVPTXReg<"%v4b8_290">; +def v4b8_291 : NVPTXReg<"%v4b8_291">; +def v4b8_292 : NVPTXReg<"%v4b8_292">; +def v4b8_293 : NVPTXReg<"%v4b8_293">; +def v4b8_294 : NVPTXReg<"%v4b8_294">; +def v4b8_295 : NVPTXReg<"%v4b8_295">; +def v4b8_296 : NVPTXReg<"%v4b8_296">; +def v4b8_297 : NVPTXReg<"%v4b8_297">; +def v4b8_298 : NVPTXReg<"%v4b8_298">; +def v4b8_299 : NVPTXReg<"%v4b8_299">; +def v4b8_300 : NVPTXReg<"%v4b8_300">; +def v4b8_301 : NVPTXReg<"%v4b8_301">; +def v4b8_302 : NVPTXReg<"%v4b8_302">; +def v4b8_303 : NVPTXReg<"%v4b8_303">; +def v4b8_304 : NVPTXReg<"%v4b8_304">; +def v4b8_305 : NVPTXReg<"%v4b8_305">; +def v4b8_306 : NVPTXReg<"%v4b8_306">; +def v4b8_307 : NVPTXReg<"%v4b8_307">; +def v4b8_308 : NVPTXReg<"%v4b8_308">; +def v4b8_309 : NVPTXReg<"%v4b8_309">; +def v4b8_310 : NVPTXReg<"%v4b8_310">; +def v4b8_311 : NVPTXReg<"%v4b8_311">; +def v4b8_312 : NVPTXReg<"%v4b8_312">; +def v4b8_313 : NVPTXReg<"%v4b8_313">; +def v4b8_314 : NVPTXReg<"%v4b8_314">; +def v4b8_315 : NVPTXReg<"%v4b8_315">; +def v4b8_316 : NVPTXReg<"%v4b8_316">; +def v4b8_317 : NVPTXReg<"%v4b8_317">; +def v4b8_318 : NVPTXReg<"%v4b8_318">; +def v4b8_319 : NVPTXReg<"%v4b8_319">; +def v4b8_320 : NVPTXReg<"%v4b8_320">; +def v4b8_321 : NVPTXReg<"%v4b8_321">; +def v4b8_322 : NVPTXReg<"%v4b8_322">; +def v4b8_323 : NVPTXReg<"%v4b8_323">; +def v4b8_324 : NVPTXReg<"%v4b8_324">; +def v4b8_325 : NVPTXReg<"%v4b8_325">; +def v4b8_326 : NVPTXReg<"%v4b8_326">; +def v4b8_327 : NVPTXReg<"%v4b8_327">; +def v4b8_328 : NVPTXReg<"%v4b8_328">; +def v4b8_329 : NVPTXReg<"%v4b8_329">; +def v4b8_330 : NVPTXReg<"%v4b8_330">; +def v4b8_331 : NVPTXReg<"%v4b8_331">; +def v4b8_332 : NVPTXReg<"%v4b8_332">; +def v4b8_333 : NVPTXReg<"%v4b8_333">; +def v4b8_334 : NVPTXReg<"%v4b8_334">; +def v4b8_335 : NVPTXReg<"%v4b8_335">; +def v4b8_336 : NVPTXReg<"%v4b8_336">; +def v4b8_337 : NVPTXReg<"%v4b8_337">; +def v4b8_338 : NVPTXReg<"%v4b8_338">; +def v4b8_339 : NVPTXReg<"%v4b8_339">; +def v4b8_340 : NVPTXReg<"%v4b8_340">; +def v4b8_341 : NVPTXReg<"%v4b8_341">; +def v4b8_342 : NVPTXReg<"%v4b8_342">; +def v4b8_343 : NVPTXReg<"%v4b8_343">; +def v4b8_344 : NVPTXReg<"%v4b8_344">; +def v4b8_345 : NVPTXReg<"%v4b8_345">; +def v4b8_346 : NVPTXReg<"%v4b8_346">; +def v4b8_347 : NVPTXReg<"%v4b8_347">; +def v4b8_348 : NVPTXReg<"%v4b8_348">; +def v4b8_349 : NVPTXReg<"%v4b8_349">; +def v4b8_350 : NVPTXReg<"%v4b8_350">; +def v4b8_351 : NVPTXReg<"%v4b8_351">; +def v4b8_352 : NVPTXReg<"%v4b8_352">; +def v4b8_353 : NVPTXReg<"%v4b8_353">; +def v4b8_354 : NVPTXReg<"%v4b8_354">; +def v4b8_355 : NVPTXReg<"%v4b8_355">; +def v4b8_356 : NVPTXReg<"%v4b8_356">; +def v4b8_357 : NVPTXReg<"%v4b8_357">; +def v4b8_358 : NVPTXReg<"%v4b8_358">; +def v4b8_359 : NVPTXReg<"%v4b8_359">; +def v4b8_360 : NVPTXReg<"%v4b8_360">; +def v4b8_361 : NVPTXReg<"%v4b8_361">; +def v4b8_362 : NVPTXReg<"%v4b8_362">; +def v4b8_363 : NVPTXReg<"%v4b8_363">; +def v4b8_364 : NVPTXReg<"%v4b8_364">; +def v4b8_365 : NVPTXReg<"%v4b8_365">; +def v4b8_366 : NVPTXReg<"%v4b8_366">; +def v4b8_367 : NVPTXReg<"%v4b8_367">; +def v4b8_368 : NVPTXReg<"%v4b8_368">; +def v4b8_369 : NVPTXReg<"%v4b8_369">; +def v4b8_370 : NVPTXReg<"%v4b8_370">; +def v4b8_371 : NVPTXReg<"%v4b8_371">; +def v4b8_372 : NVPTXReg<"%v4b8_372">; +def v4b8_373 : NVPTXReg<"%v4b8_373">; +def v4b8_374 : NVPTXReg<"%v4b8_374">; +def v4b8_375 : NVPTXReg<"%v4b8_375">; +def v4b8_376 : NVPTXReg<"%v4b8_376">; +def v4b8_377 : NVPTXReg<"%v4b8_377">; +def v4b8_378 : NVPTXReg<"%v4b8_378">; +def v4b8_379 : NVPTXReg<"%v4b8_379">; +def v4b8_380 : NVPTXReg<"%v4b8_380">; +def v4b8_381 : NVPTXReg<"%v4b8_381">; +def v4b8_382 : NVPTXReg<"%v4b8_382">; +def v4b8_383 : NVPTXReg<"%v4b8_383">; +def v4b8_384 : NVPTXReg<"%v4b8_384">; +def v4b8_385 : NVPTXReg<"%v4b8_385">; +def v4b8_386 : NVPTXReg<"%v4b8_386">; +def v4b8_387 : NVPTXReg<"%v4b8_387">; +def v4b8_388 : NVPTXReg<"%v4b8_388">; +def v4b8_389 : NVPTXReg<"%v4b8_389">; +def v4b8_390 : NVPTXReg<"%v4b8_390">; +def v4b8_391 : NVPTXReg<"%v4b8_391">; +def v4b8_392 : NVPTXReg<"%v4b8_392">; +def v4b8_393 : NVPTXReg<"%v4b8_393">; +def v4b8_394 : NVPTXReg<"%v4b8_394">; +def v4b8_395 : NVPTXReg<"%v4b8_395">; +def v4b16_0 : NVPTXReg<"%v4b16_0">; +def v4b16_1 : NVPTXReg<"%v4b16_1">; +def v4b16_2 : NVPTXReg<"%v4b16_2">; +def v4b16_3 : NVPTXReg<"%v4b16_3">; +def v4b16_4 : NVPTXReg<"%v4b16_4">; +def v4b16_5 : NVPTXReg<"%v4b16_5">; +def v4b16_6 : NVPTXReg<"%v4b16_6">; +def v4b16_7 : NVPTXReg<"%v4b16_7">; +def v4b16_8 : NVPTXReg<"%v4b16_8">; +def v4b16_9 : NVPTXReg<"%v4b16_9">; +def v4b16_10 : NVPTXReg<"%v4b16_10">; +def v4b16_11 : NVPTXReg<"%v4b16_11">; +def v4b16_12 : NVPTXReg<"%v4b16_12">; +def v4b16_13 : NVPTXReg<"%v4b16_13">; +def v4b16_14 : NVPTXReg<"%v4b16_14">; +def v4b16_15 : NVPTXReg<"%v4b16_15">; +def v4b16_16 : NVPTXReg<"%v4b16_16">; +def v4b16_17 : NVPTXReg<"%v4b16_17">; +def v4b16_18 : NVPTXReg<"%v4b16_18">; +def v4b16_19 : NVPTXReg<"%v4b16_19">; +def v4b16_20 : NVPTXReg<"%v4b16_20">; +def v4b16_21 : NVPTXReg<"%v4b16_21">; +def v4b16_22 : NVPTXReg<"%v4b16_22">; +def v4b16_23 : NVPTXReg<"%v4b16_23">; +def v4b16_24 : NVPTXReg<"%v4b16_24">; +def v4b16_25 : NVPTXReg<"%v4b16_25">; +def v4b16_26 : NVPTXReg<"%v4b16_26">; +def v4b16_27 : NVPTXReg<"%v4b16_27">; +def v4b16_28 : NVPTXReg<"%v4b16_28">; +def v4b16_29 : NVPTXReg<"%v4b16_29">; +def v4b16_30 : NVPTXReg<"%v4b16_30">; +def v4b16_31 : NVPTXReg<"%v4b16_31">; +def v4b16_32 : NVPTXReg<"%v4b16_32">; +def v4b16_33 : NVPTXReg<"%v4b16_33">; +def v4b16_34 : NVPTXReg<"%v4b16_34">; +def v4b16_35 : NVPTXReg<"%v4b16_35">; +def v4b16_36 : NVPTXReg<"%v4b16_36">; +def v4b16_37 : NVPTXReg<"%v4b16_37">; +def v4b16_38 : NVPTXReg<"%v4b16_38">; +def v4b16_39 : NVPTXReg<"%v4b16_39">; +def v4b16_40 : NVPTXReg<"%v4b16_40">; +def v4b16_41 : NVPTXReg<"%v4b16_41">; +def v4b16_42 : NVPTXReg<"%v4b16_42">; +def v4b16_43 : NVPTXReg<"%v4b16_43">; +def v4b16_44 : NVPTXReg<"%v4b16_44">; +def v4b16_45 : NVPTXReg<"%v4b16_45">; +def v4b16_46 : NVPTXReg<"%v4b16_46">; +def v4b16_47 : NVPTXReg<"%v4b16_47">; +def v4b16_48 : NVPTXReg<"%v4b16_48">; +def v4b16_49 : NVPTXReg<"%v4b16_49">; +def v4b16_50 : NVPTXReg<"%v4b16_50">; +def v4b16_51 : NVPTXReg<"%v4b16_51">; +def v4b16_52 : NVPTXReg<"%v4b16_52">; +def v4b16_53 : NVPTXReg<"%v4b16_53">; +def v4b16_54 : NVPTXReg<"%v4b16_54">; +def v4b16_55 : NVPTXReg<"%v4b16_55">; +def v4b16_56 : NVPTXReg<"%v4b16_56">; +def v4b16_57 : NVPTXReg<"%v4b16_57">; +def v4b16_58 : NVPTXReg<"%v4b16_58">; +def v4b16_59 : NVPTXReg<"%v4b16_59">; +def v4b16_60 : NVPTXReg<"%v4b16_60">; +def v4b16_61 : NVPTXReg<"%v4b16_61">; +def v4b16_62 : NVPTXReg<"%v4b16_62">; +def v4b16_63 : NVPTXReg<"%v4b16_63">; +def v4b16_64 : NVPTXReg<"%v4b16_64">; +def v4b16_65 : NVPTXReg<"%v4b16_65">; +def v4b16_66 : NVPTXReg<"%v4b16_66">; +def v4b16_67 : NVPTXReg<"%v4b16_67">; +def v4b16_68 : NVPTXReg<"%v4b16_68">; +def v4b16_69 : NVPTXReg<"%v4b16_69">; +def v4b16_70 : NVPTXReg<"%v4b16_70">; +def v4b16_71 : NVPTXReg<"%v4b16_71">; +def v4b16_72 : NVPTXReg<"%v4b16_72">; +def v4b16_73 : NVPTXReg<"%v4b16_73">; +def v4b16_74 : NVPTXReg<"%v4b16_74">; +def v4b16_75 : NVPTXReg<"%v4b16_75">; +def v4b16_76 : NVPTXReg<"%v4b16_76">; +def v4b16_77 : NVPTXReg<"%v4b16_77">; +def v4b16_78 : NVPTXReg<"%v4b16_78">; +def v4b16_79 : NVPTXReg<"%v4b16_79">; +def v4b16_80 : NVPTXReg<"%v4b16_80">; +def v4b16_81 : NVPTXReg<"%v4b16_81">; +def v4b16_82 : NVPTXReg<"%v4b16_82">; +def v4b16_83 : NVPTXReg<"%v4b16_83">; +def v4b16_84 : NVPTXReg<"%v4b16_84">; +def v4b16_85 : NVPTXReg<"%v4b16_85">; +def v4b16_86 : NVPTXReg<"%v4b16_86">; +def v4b16_87 : NVPTXReg<"%v4b16_87">; +def v4b16_88 : NVPTXReg<"%v4b16_88">; +def v4b16_89 : NVPTXReg<"%v4b16_89">; +def v4b16_90 : NVPTXReg<"%v4b16_90">; +def v4b16_91 : NVPTXReg<"%v4b16_91">; +def v4b16_92 : NVPTXReg<"%v4b16_92">; +def v4b16_93 : NVPTXReg<"%v4b16_93">; +def v4b16_94 : NVPTXReg<"%v4b16_94">; +def v4b16_95 : NVPTXReg<"%v4b16_95">; +def v4b16_96 : NVPTXReg<"%v4b16_96">; +def v4b16_97 : NVPTXReg<"%v4b16_97">; +def v4b16_98 : NVPTXReg<"%v4b16_98">; +def v4b16_99 : NVPTXReg<"%v4b16_99">; +def v4b16_100 : NVPTXReg<"%v4b16_100">; +def v4b16_101 : NVPTXReg<"%v4b16_101">; +def v4b16_102 : NVPTXReg<"%v4b16_102">; +def v4b16_103 : NVPTXReg<"%v4b16_103">; +def v4b16_104 : NVPTXReg<"%v4b16_104">; +def v4b16_105 : NVPTXReg<"%v4b16_105">; +def v4b16_106 : NVPTXReg<"%v4b16_106">; +def v4b16_107 : NVPTXReg<"%v4b16_107">; +def v4b16_108 : NVPTXReg<"%v4b16_108">; +def v4b16_109 : NVPTXReg<"%v4b16_109">; +def v4b16_110 : NVPTXReg<"%v4b16_110">; +def v4b16_111 : NVPTXReg<"%v4b16_111">; +def v4b16_112 : NVPTXReg<"%v4b16_112">; +def v4b16_113 : NVPTXReg<"%v4b16_113">; +def v4b16_114 : NVPTXReg<"%v4b16_114">; +def v4b16_115 : NVPTXReg<"%v4b16_115">; +def v4b16_116 : NVPTXReg<"%v4b16_116">; +def v4b16_117 : NVPTXReg<"%v4b16_117">; +def v4b16_118 : NVPTXReg<"%v4b16_118">; +def v4b16_119 : NVPTXReg<"%v4b16_119">; +def v4b16_120 : NVPTXReg<"%v4b16_120">; +def v4b16_121 : NVPTXReg<"%v4b16_121">; +def v4b16_122 : NVPTXReg<"%v4b16_122">; +def v4b16_123 : NVPTXReg<"%v4b16_123">; +def v4b16_124 : NVPTXReg<"%v4b16_124">; +def v4b16_125 : NVPTXReg<"%v4b16_125">; +def v4b16_126 : NVPTXReg<"%v4b16_126">; +def v4b16_127 : NVPTXReg<"%v4b16_127">; +def v4b16_128 : NVPTXReg<"%v4b16_128">; +def v4b16_129 : NVPTXReg<"%v4b16_129">; +def v4b16_130 : NVPTXReg<"%v4b16_130">; +def v4b16_131 : NVPTXReg<"%v4b16_131">; +def v4b16_132 : NVPTXReg<"%v4b16_132">; +def v4b16_133 : NVPTXReg<"%v4b16_133">; +def v4b16_134 : NVPTXReg<"%v4b16_134">; +def v4b16_135 : NVPTXReg<"%v4b16_135">; +def v4b16_136 : NVPTXReg<"%v4b16_136">; +def v4b16_137 : NVPTXReg<"%v4b16_137">; +def v4b16_138 : NVPTXReg<"%v4b16_138">; +def v4b16_139 : NVPTXReg<"%v4b16_139">; +def v4b16_140 : NVPTXReg<"%v4b16_140">; +def v4b16_141 : NVPTXReg<"%v4b16_141">; +def v4b16_142 : NVPTXReg<"%v4b16_142">; +def v4b16_143 : NVPTXReg<"%v4b16_143">; +def v4b16_144 : NVPTXReg<"%v4b16_144">; +def v4b16_145 : NVPTXReg<"%v4b16_145">; +def v4b16_146 : NVPTXReg<"%v4b16_146">; +def v4b16_147 : NVPTXReg<"%v4b16_147">; +def v4b16_148 : NVPTXReg<"%v4b16_148">; +def v4b16_149 : NVPTXReg<"%v4b16_149">; +def v4b16_150 : NVPTXReg<"%v4b16_150">; +def v4b16_151 : NVPTXReg<"%v4b16_151">; +def v4b16_152 : NVPTXReg<"%v4b16_152">; +def v4b16_153 : NVPTXReg<"%v4b16_153">; +def v4b16_154 : NVPTXReg<"%v4b16_154">; +def v4b16_155 : NVPTXReg<"%v4b16_155">; +def v4b16_156 : NVPTXReg<"%v4b16_156">; +def v4b16_157 : NVPTXReg<"%v4b16_157">; +def v4b16_158 : NVPTXReg<"%v4b16_158">; +def v4b16_159 : NVPTXReg<"%v4b16_159">; +def v4b16_160 : NVPTXReg<"%v4b16_160">; +def v4b16_161 : NVPTXReg<"%v4b16_161">; +def v4b16_162 : NVPTXReg<"%v4b16_162">; +def v4b16_163 : NVPTXReg<"%v4b16_163">; +def v4b16_164 : NVPTXReg<"%v4b16_164">; +def v4b16_165 : NVPTXReg<"%v4b16_165">; +def v4b16_166 : NVPTXReg<"%v4b16_166">; +def v4b16_167 : NVPTXReg<"%v4b16_167">; +def v4b16_168 : NVPTXReg<"%v4b16_168">; +def v4b16_169 : NVPTXReg<"%v4b16_169">; +def v4b16_170 : NVPTXReg<"%v4b16_170">; +def v4b16_171 : NVPTXReg<"%v4b16_171">; +def v4b16_172 : NVPTXReg<"%v4b16_172">; +def v4b16_173 : NVPTXReg<"%v4b16_173">; +def v4b16_174 : NVPTXReg<"%v4b16_174">; +def v4b16_175 : NVPTXReg<"%v4b16_175">; +def v4b16_176 : NVPTXReg<"%v4b16_176">; +def v4b16_177 : NVPTXReg<"%v4b16_177">; +def v4b16_178 : NVPTXReg<"%v4b16_178">; +def v4b16_179 : NVPTXReg<"%v4b16_179">; +def v4b16_180 : NVPTXReg<"%v4b16_180">; +def v4b16_181 : NVPTXReg<"%v4b16_181">; +def v4b16_182 : NVPTXReg<"%v4b16_182">; +def v4b16_183 : NVPTXReg<"%v4b16_183">; +def v4b16_184 : NVPTXReg<"%v4b16_184">; +def v4b16_185 : NVPTXReg<"%v4b16_185">; +def v4b16_186 : NVPTXReg<"%v4b16_186">; +def v4b16_187 : NVPTXReg<"%v4b16_187">; +def v4b16_188 : NVPTXReg<"%v4b16_188">; +def v4b16_189 : NVPTXReg<"%v4b16_189">; +def v4b16_190 : NVPTXReg<"%v4b16_190">; +def v4b16_191 : NVPTXReg<"%v4b16_191">; +def v4b16_192 : NVPTXReg<"%v4b16_192">; +def v4b16_193 : NVPTXReg<"%v4b16_193">; +def v4b16_194 : NVPTXReg<"%v4b16_194">; +def v4b16_195 : NVPTXReg<"%v4b16_195">; +def v4b16_196 : NVPTXReg<"%v4b16_196">; +def v4b16_197 : NVPTXReg<"%v4b16_197">; +def v4b16_198 : NVPTXReg<"%v4b16_198">; +def v4b16_199 : NVPTXReg<"%v4b16_199">; +def v4b16_200 : NVPTXReg<"%v4b16_200">; +def v4b16_201 : NVPTXReg<"%v4b16_201">; +def v4b16_202 : NVPTXReg<"%v4b16_202">; +def v4b16_203 : NVPTXReg<"%v4b16_203">; +def v4b16_204 : NVPTXReg<"%v4b16_204">; +def v4b16_205 : NVPTXReg<"%v4b16_205">; +def v4b16_206 : NVPTXReg<"%v4b16_206">; +def v4b16_207 : NVPTXReg<"%v4b16_207">; +def v4b16_208 : NVPTXReg<"%v4b16_208">; +def v4b16_209 : NVPTXReg<"%v4b16_209">; +def v4b16_210 : NVPTXReg<"%v4b16_210">; +def v4b16_211 : NVPTXReg<"%v4b16_211">; +def v4b16_212 : NVPTXReg<"%v4b16_212">; +def v4b16_213 : NVPTXReg<"%v4b16_213">; +def v4b16_214 : NVPTXReg<"%v4b16_214">; +def v4b16_215 : NVPTXReg<"%v4b16_215">; +def v4b16_216 : NVPTXReg<"%v4b16_216">; +def v4b16_217 : NVPTXReg<"%v4b16_217">; +def v4b16_218 : NVPTXReg<"%v4b16_218">; +def v4b16_219 : NVPTXReg<"%v4b16_219">; +def v4b16_220 : NVPTXReg<"%v4b16_220">; +def v4b16_221 : NVPTXReg<"%v4b16_221">; +def v4b16_222 : NVPTXReg<"%v4b16_222">; +def v4b16_223 : NVPTXReg<"%v4b16_223">; +def v4b16_224 : NVPTXReg<"%v4b16_224">; +def v4b16_225 : NVPTXReg<"%v4b16_225">; +def v4b16_226 : NVPTXReg<"%v4b16_226">; +def v4b16_227 : NVPTXReg<"%v4b16_227">; +def v4b16_228 : NVPTXReg<"%v4b16_228">; +def v4b16_229 : NVPTXReg<"%v4b16_229">; +def v4b16_230 : NVPTXReg<"%v4b16_230">; +def v4b16_231 : NVPTXReg<"%v4b16_231">; +def v4b16_232 : NVPTXReg<"%v4b16_232">; +def v4b16_233 : NVPTXReg<"%v4b16_233">; +def v4b16_234 : NVPTXReg<"%v4b16_234">; +def v4b16_235 : NVPTXReg<"%v4b16_235">; +def v4b16_236 : NVPTXReg<"%v4b16_236">; +def v4b16_237 : NVPTXReg<"%v4b16_237">; +def v4b16_238 : NVPTXReg<"%v4b16_238">; +def v4b16_239 : NVPTXReg<"%v4b16_239">; +def v4b16_240 : NVPTXReg<"%v4b16_240">; +def v4b16_241 : NVPTXReg<"%v4b16_241">; +def v4b16_242 : NVPTXReg<"%v4b16_242">; +def v4b16_243 : NVPTXReg<"%v4b16_243">; +def v4b16_244 : NVPTXReg<"%v4b16_244">; +def v4b16_245 : NVPTXReg<"%v4b16_245">; +def v4b16_246 : NVPTXReg<"%v4b16_246">; +def v4b16_247 : NVPTXReg<"%v4b16_247">; +def v4b16_248 : NVPTXReg<"%v4b16_248">; +def v4b16_249 : NVPTXReg<"%v4b16_249">; +def v4b16_250 : NVPTXReg<"%v4b16_250">; +def v4b16_251 : NVPTXReg<"%v4b16_251">; +def v4b16_252 : NVPTXReg<"%v4b16_252">; +def v4b16_253 : NVPTXReg<"%v4b16_253">; +def v4b16_254 : NVPTXReg<"%v4b16_254">; +def v4b16_255 : NVPTXReg<"%v4b16_255">; +def v4b16_256 : NVPTXReg<"%v4b16_256">; +def v4b16_257 : NVPTXReg<"%v4b16_257">; +def v4b16_258 : NVPTXReg<"%v4b16_258">; +def v4b16_259 : NVPTXReg<"%v4b16_259">; +def v4b16_260 : NVPTXReg<"%v4b16_260">; +def v4b16_261 : NVPTXReg<"%v4b16_261">; +def v4b16_262 : NVPTXReg<"%v4b16_262">; +def v4b16_263 : NVPTXReg<"%v4b16_263">; +def v4b16_264 : NVPTXReg<"%v4b16_264">; +def v4b16_265 : NVPTXReg<"%v4b16_265">; +def v4b16_266 : NVPTXReg<"%v4b16_266">; +def v4b16_267 : NVPTXReg<"%v4b16_267">; +def v4b16_268 : NVPTXReg<"%v4b16_268">; +def v4b16_269 : NVPTXReg<"%v4b16_269">; +def v4b16_270 : NVPTXReg<"%v4b16_270">; +def v4b16_271 : NVPTXReg<"%v4b16_271">; +def v4b16_272 : NVPTXReg<"%v4b16_272">; +def v4b16_273 : NVPTXReg<"%v4b16_273">; +def v4b16_274 : NVPTXReg<"%v4b16_274">; +def v4b16_275 : NVPTXReg<"%v4b16_275">; +def v4b16_276 : NVPTXReg<"%v4b16_276">; +def v4b16_277 : NVPTXReg<"%v4b16_277">; +def v4b16_278 : NVPTXReg<"%v4b16_278">; +def v4b16_279 : NVPTXReg<"%v4b16_279">; +def v4b16_280 : NVPTXReg<"%v4b16_280">; +def v4b16_281 : NVPTXReg<"%v4b16_281">; +def v4b16_282 : NVPTXReg<"%v4b16_282">; +def v4b16_283 : NVPTXReg<"%v4b16_283">; +def v4b16_284 : NVPTXReg<"%v4b16_284">; +def v4b16_285 : NVPTXReg<"%v4b16_285">; +def v4b16_286 : NVPTXReg<"%v4b16_286">; +def v4b16_287 : NVPTXReg<"%v4b16_287">; +def v4b16_288 : NVPTXReg<"%v4b16_288">; +def v4b16_289 : NVPTXReg<"%v4b16_289">; +def v4b16_290 : NVPTXReg<"%v4b16_290">; +def v4b16_291 : NVPTXReg<"%v4b16_291">; +def v4b16_292 : NVPTXReg<"%v4b16_292">; +def v4b16_293 : NVPTXReg<"%v4b16_293">; +def v4b16_294 : NVPTXReg<"%v4b16_294">; +def v4b16_295 : NVPTXReg<"%v4b16_295">; +def v4b16_296 : NVPTXReg<"%v4b16_296">; +def v4b16_297 : NVPTXReg<"%v4b16_297">; +def v4b16_298 : NVPTXReg<"%v4b16_298">; +def v4b16_299 : NVPTXReg<"%v4b16_299">; +def v4b16_300 : NVPTXReg<"%v4b16_300">; +def v4b16_301 : NVPTXReg<"%v4b16_301">; +def v4b16_302 : NVPTXReg<"%v4b16_302">; +def v4b16_303 : NVPTXReg<"%v4b16_303">; +def v4b16_304 : NVPTXReg<"%v4b16_304">; +def v4b16_305 : NVPTXReg<"%v4b16_305">; +def v4b16_306 : NVPTXReg<"%v4b16_306">; +def v4b16_307 : NVPTXReg<"%v4b16_307">; +def v4b16_308 : NVPTXReg<"%v4b16_308">; +def v4b16_309 : NVPTXReg<"%v4b16_309">; +def v4b16_310 : NVPTXReg<"%v4b16_310">; +def v4b16_311 : NVPTXReg<"%v4b16_311">; +def v4b16_312 : NVPTXReg<"%v4b16_312">; +def v4b16_313 : NVPTXReg<"%v4b16_313">; +def v4b16_314 : NVPTXReg<"%v4b16_314">; +def v4b16_315 : NVPTXReg<"%v4b16_315">; +def v4b16_316 : NVPTXReg<"%v4b16_316">; +def v4b16_317 : NVPTXReg<"%v4b16_317">; +def v4b16_318 : NVPTXReg<"%v4b16_318">; +def v4b16_319 : NVPTXReg<"%v4b16_319">; +def v4b16_320 : NVPTXReg<"%v4b16_320">; +def v4b16_321 : NVPTXReg<"%v4b16_321">; +def v4b16_322 : NVPTXReg<"%v4b16_322">; +def v4b16_323 : NVPTXReg<"%v4b16_323">; +def v4b16_324 : NVPTXReg<"%v4b16_324">; +def v4b16_325 : NVPTXReg<"%v4b16_325">; +def v4b16_326 : NVPTXReg<"%v4b16_326">; +def v4b16_327 : NVPTXReg<"%v4b16_327">; +def v4b16_328 : NVPTXReg<"%v4b16_328">; +def v4b16_329 : NVPTXReg<"%v4b16_329">; +def v4b16_330 : NVPTXReg<"%v4b16_330">; +def v4b16_331 : NVPTXReg<"%v4b16_331">; +def v4b16_332 : NVPTXReg<"%v4b16_332">; +def v4b16_333 : NVPTXReg<"%v4b16_333">; +def v4b16_334 : NVPTXReg<"%v4b16_334">; +def v4b16_335 : NVPTXReg<"%v4b16_335">; +def v4b16_336 : NVPTXReg<"%v4b16_336">; +def v4b16_337 : NVPTXReg<"%v4b16_337">; +def v4b16_338 : NVPTXReg<"%v4b16_338">; +def v4b16_339 : NVPTXReg<"%v4b16_339">; +def v4b16_340 : NVPTXReg<"%v4b16_340">; +def v4b16_341 : NVPTXReg<"%v4b16_341">; +def v4b16_342 : NVPTXReg<"%v4b16_342">; +def v4b16_343 : NVPTXReg<"%v4b16_343">; +def v4b16_344 : NVPTXReg<"%v4b16_344">; +def v4b16_345 : NVPTXReg<"%v4b16_345">; +def v4b16_346 : NVPTXReg<"%v4b16_346">; +def v4b16_347 : NVPTXReg<"%v4b16_347">; +def v4b16_348 : NVPTXReg<"%v4b16_348">; +def v4b16_349 : NVPTXReg<"%v4b16_349">; +def v4b16_350 : NVPTXReg<"%v4b16_350">; +def v4b16_351 : NVPTXReg<"%v4b16_351">; +def v4b16_352 : NVPTXReg<"%v4b16_352">; +def v4b16_353 : NVPTXReg<"%v4b16_353">; +def v4b16_354 : NVPTXReg<"%v4b16_354">; +def v4b16_355 : NVPTXReg<"%v4b16_355">; +def v4b16_356 : NVPTXReg<"%v4b16_356">; +def v4b16_357 : NVPTXReg<"%v4b16_357">; +def v4b16_358 : NVPTXReg<"%v4b16_358">; +def v4b16_359 : NVPTXReg<"%v4b16_359">; +def v4b16_360 : NVPTXReg<"%v4b16_360">; +def v4b16_361 : NVPTXReg<"%v4b16_361">; +def v4b16_362 : NVPTXReg<"%v4b16_362">; +def v4b16_363 : NVPTXReg<"%v4b16_363">; +def v4b16_364 : NVPTXReg<"%v4b16_364">; +def v4b16_365 : NVPTXReg<"%v4b16_365">; +def v4b16_366 : NVPTXReg<"%v4b16_366">; +def v4b16_367 : NVPTXReg<"%v4b16_367">; +def v4b16_368 : NVPTXReg<"%v4b16_368">; +def v4b16_369 : NVPTXReg<"%v4b16_369">; +def v4b16_370 : NVPTXReg<"%v4b16_370">; +def v4b16_371 : NVPTXReg<"%v4b16_371">; +def v4b16_372 : NVPTXReg<"%v4b16_372">; +def v4b16_373 : NVPTXReg<"%v4b16_373">; +def v4b16_374 : NVPTXReg<"%v4b16_374">; +def v4b16_375 : NVPTXReg<"%v4b16_375">; +def v4b16_376 : NVPTXReg<"%v4b16_376">; +def v4b16_377 : NVPTXReg<"%v4b16_377">; +def v4b16_378 : NVPTXReg<"%v4b16_378">; +def v4b16_379 : NVPTXReg<"%v4b16_379">; +def v4b16_380 : NVPTXReg<"%v4b16_380">; +def v4b16_381 : NVPTXReg<"%v4b16_381">; +def v4b16_382 : NVPTXReg<"%v4b16_382">; +def v4b16_383 : NVPTXReg<"%v4b16_383">; +def v4b16_384 : NVPTXReg<"%v4b16_384">; +def v4b16_385 : NVPTXReg<"%v4b16_385">; +def v4b16_386 : NVPTXReg<"%v4b16_386">; +def v4b16_387 : NVPTXReg<"%v4b16_387">; +def v4b16_388 : NVPTXReg<"%v4b16_388">; +def v4b16_389 : NVPTXReg<"%v4b16_389">; +def v4b16_390 : NVPTXReg<"%v4b16_390">; +def v4b16_391 : NVPTXReg<"%v4b16_391">; +def v4b16_392 : NVPTXReg<"%v4b16_392">; +def v4b16_393 : NVPTXReg<"%v4b16_393">; +def v4b16_394 : NVPTXReg<"%v4b16_394">; +def v4b16_395 : NVPTXReg<"%v4b16_395">; +def v4b32_0 : NVPTXReg<"%v4b32_0">; +def v4b32_1 : NVPTXReg<"%v4b32_1">; +def v4b32_2 : NVPTXReg<"%v4b32_2">; +def v4b32_3 : NVPTXReg<"%v4b32_3">; +def v4b32_4 : NVPTXReg<"%v4b32_4">; +def v4b32_5 : NVPTXReg<"%v4b32_5">; +def v4b32_6 : NVPTXReg<"%v4b32_6">; +def v4b32_7 : NVPTXReg<"%v4b32_7">; +def v4b32_8 : NVPTXReg<"%v4b32_8">; +def v4b32_9 : NVPTXReg<"%v4b32_9">; +def v4b32_10 : NVPTXReg<"%v4b32_10">; +def v4b32_11 : NVPTXReg<"%v4b32_11">; +def v4b32_12 : NVPTXReg<"%v4b32_12">; +def v4b32_13 : NVPTXReg<"%v4b32_13">; +def v4b32_14 : NVPTXReg<"%v4b32_14">; +def v4b32_15 : NVPTXReg<"%v4b32_15">; +def v4b32_16 : NVPTXReg<"%v4b32_16">; +def v4b32_17 : NVPTXReg<"%v4b32_17">; +def v4b32_18 : NVPTXReg<"%v4b32_18">; +def v4b32_19 : NVPTXReg<"%v4b32_19">; +def v4b32_20 : NVPTXReg<"%v4b32_20">; +def v4b32_21 : NVPTXReg<"%v4b32_21">; +def v4b32_22 : NVPTXReg<"%v4b32_22">; +def v4b32_23 : NVPTXReg<"%v4b32_23">; +def v4b32_24 : NVPTXReg<"%v4b32_24">; +def v4b32_25 : NVPTXReg<"%v4b32_25">; +def v4b32_26 : NVPTXReg<"%v4b32_26">; +def v4b32_27 : NVPTXReg<"%v4b32_27">; +def v4b32_28 : NVPTXReg<"%v4b32_28">; +def v4b32_29 : NVPTXReg<"%v4b32_29">; +def v4b32_30 : NVPTXReg<"%v4b32_30">; +def v4b32_31 : NVPTXReg<"%v4b32_31">; +def v4b32_32 : NVPTXReg<"%v4b32_32">; +def v4b32_33 : NVPTXReg<"%v4b32_33">; +def v4b32_34 : NVPTXReg<"%v4b32_34">; +def v4b32_35 : NVPTXReg<"%v4b32_35">; +def v4b32_36 : NVPTXReg<"%v4b32_36">; +def v4b32_37 : NVPTXReg<"%v4b32_37">; +def v4b32_38 : NVPTXReg<"%v4b32_38">; +def v4b32_39 : NVPTXReg<"%v4b32_39">; +def v4b32_40 : NVPTXReg<"%v4b32_40">; +def v4b32_41 : NVPTXReg<"%v4b32_41">; +def v4b32_42 : NVPTXReg<"%v4b32_42">; +def v4b32_43 : NVPTXReg<"%v4b32_43">; +def v4b32_44 : NVPTXReg<"%v4b32_44">; +def v4b32_45 : NVPTXReg<"%v4b32_45">; +def v4b32_46 : NVPTXReg<"%v4b32_46">; +def v4b32_47 : NVPTXReg<"%v4b32_47">; +def v4b32_48 : NVPTXReg<"%v4b32_48">; +def v4b32_49 : NVPTXReg<"%v4b32_49">; +def v4b32_50 : NVPTXReg<"%v4b32_50">; +def v4b32_51 : NVPTXReg<"%v4b32_51">; +def v4b32_52 : NVPTXReg<"%v4b32_52">; +def v4b32_53 : NVPTXReg<"%v4b32_53">; +def v4b32_54 : NVPTXReg<"%v4b32_54">; +def v4b32_55 : NVPTXReg<"%v4b32_55">; +def v4b32_56 : NVPTXReg<"%v4b32_56">; +def v4b32_57 : NVPTXReg<"%v4b32_57">; +def v4b32_58 : NVPTXReg<"%v4b32_58">; +def v4b32_59 : NVPTXReg<"%v4b32_59">; +def v4b32_60 : NVPTXReg<"%v4b32_60">; +def v4b32_61 : NVPTXReg<"%v4b32_61">; +def v4b32_62 : NVPTXReg<"%v4b32_62">; +def v4b32_63 : NVPTXReg<"%v4b32_63">; +def v4b32_64 : NVPTXReg<"%v4b32_64">; +def v4b32_65 : NVPTXReg<"%v4b32_65">; +def v4b32_66 : NVPTXReg<"%v4b32_66">; +def v4b32_67 : NVPTXReg<"%v4b32_67">; +def v4b32_68 : NVPTXReg<"%v4b32_68">; +def v4b32_69 : NVPTXReg<"%v4b32_69">; +def v4b32_70 : NVPTXReg<"%v4b32_70">; +def v4b32_71 : NVPTXReg<"%v4b32_71">; +def v4b32_72 : NVPTXReg<"%v4b32_72">; +def v4b32_73 : NVPTXReg<"%v4b32_73">; +def v4b32_74 : NVPTXReg<"%v4b32_74">; +def v4b32_75 : NVPTXReg<"%v4b32_75">; +def v4b32_76 : NVPTXReg<"%v4b32_76">; +def v4b32_77 : NVPTXReg<"%v4b32_77">; +def v4b32_78 : NVPTXReg<"%v4b32_78">; +def v4b32_79 : NVPTXReg<"%v4b32_79">; +def v4b32_80 : NVPTXReg<"%v4b32_80">; +def v4b32_81 : NVPTXReg<"%v4b32_81">; +def v4b32_82 : NVPTXReg<"%v4b32_82">; +def v4b32_83 : NVPTXReg<"%v4b32_83">; +def v4b32_84 : NVPTXReg<"%v4b32_84">; +def v4b32_85 : NVPTXReg<"%v4b32_85">; +def v4b32_86 : NVPTXReg<"%v4b32_86">; +def v4b32_87 : NVPTXReg<"%v4b32_87">; +def v4b32_88 : NVPTXReg<"%v4b32_88">; +def v4b32_89 : NVPTXReg<"%v4b32_89">; +def v4b32_90 : NVPTXReg<"%v4b32_90">; +def v4b32_91 : NVPTXReg<"%v4b32_91">; +def v4b32_92 : NVPTXReg<"%v4b32_92">; +def v4b32_93 : NVPTXReg<"%v4b32_93">; +def v4b32_94 : NVPTXReg<"%v4b32_94">; +def v4b32_95 : NVPTXReg<"%v4b32_95">; +def v4b32_96 : NVPTXReg<"%v4b32_96">; +def v4b32_97 : NVPTXReg<"%v4b32_97">; +def v4b32_98 : NVPTXReg<"%v4b32_98">; +def v4b32_99 : NVPTXReg<"%v4b32_99">; +def v4b32_100 : NVPTXReg<"%v4b32_100">; +def v4b32_101 : NVPTXReg<"%v4b32_101">; +def v4b32_102 : NVPTXReg<"%v4b32_102">; +def v4b32_103 : NVPTXReg<"%v4b32_103">; +def v4b32_104 : NVPTXReg<"%v4b32_104">; +def v4b32_105 : NVPTXReg<"%v4b32_105">; +def v4b32_106 : NVPTXReg<"%v4b32_106">; +def v4b32_107 : NVPTXReg<"%v4b32_107">; +def v4b32_108 : NVPTXReg<"%v4b32_108">; +def v4b32_109 : NVPTXReg<"%v4b32_109">; +def v4b32_110 : NVPTXReg<"%v4b32_110">; +def v4b32_111 : NVPTXReg<"%v4b32_111">; +def v4b32_112 : NVPTXReg<"%v4b32_112">; +def v4b32_113 : NVPTXReg<"%v4b32_113">; +def v4b32_114 : NVPTXReg<"%v4b32_114">; +def v4b32_115 : NVPTXReg<"%v4b32_115">; +def v4b32_116 : NVPTXReg<"%v4b32_116">; +def v4b32_117 : NVPTXReg<"%v4b32_117">; +def v4b32_118 : NVPTXReg<"%v4b32_118">; +def v4b32_119 : NVPTXReg<"%v4b32_119">; +def v4b32_120 : NVPTXReg<"%v4b32_120">; +def v4b32_121 : NVPTXReg<"%v4b32_121">; +def v4b32_122 : NVPTXReg<"%v4b32_122">; +def v4b32_123 : NVPTXReg<"%v4b32_123">; +def v4b32_124 : NVPTXReg<"%v4b32_124">; +def v4b32_125 : NVPTXReg<"%v4b32_125">; +def v4b32_126 : NVPTXReg<"%v4b32_126">; +def v4b32_127 : NVPTXReg<"%v4b32_127">; +def v4b32_128 : NVPTXReg<"%v4b32_128">; +def v4b32_129 : NVPTXReg<"%v4b32_129">; +def v4b32_130 : NVPTXReg<"%v4b32_130">; +def v4b32_131 : NVPTXReg<"%v4b32_131">; +def v4b32_132 : NVPTXReg<"%v4b32_132">; +def v4b32_133 : NVPTXReg<"%v4b32_133">; +def v4b32_134 : NVPTXReg<"%v4b32_134">; +def v4b32_135 : NVPTXReg<"%v4b32_135">; +def v4b32_136 : NVPTXReg<"%v4b32_136">; +def v4b32_137 : NVPTXReg<"%v4b32_137">; +def v4b32_138 : NVPTXReg<"%v4b32_138">; +def v4b32_139 : NVPTXReg<"%v4b32_139">; +def v4b32_140 : NVPTXReg<"%v4b32_140">; +def v4b32_141 : NVPTXReg<"%v4b32_141">; +def v4b32_142 : NVPTXReg<"%v4b32_142">; +def v4b32_143 : NVPTXReg<"%v4b32_143">; +def v4b32_144 : NVPTXReg<"%v4b32_144">; +def v4b32_145 : NVPTXReg<"%v4b32_145">; +def v4b32_146 : NVPTXReg<"%v4b32_146">; +def v4b32_147 : NVPTXReg<"%v4b32_147">; +def v4b32_148 : NVPTXReg<"%v4b32_148">; +def v4b32_149 : NVPTXReg<"%v4b32_149">; +def v4b32_150 : NVPTXReg<"%v4b32_150">; +def v4b32_151 : NVPTXReg<"%v4b32_151">; +def v4b32_152 : NVPTXReg<"%v4b32_152">; +def v4b32_153 : NVPTXReg<"%v4b32_153">; +def v4b32_154 : NVPTXReg<"%v4b32_154">; +def v4b32_155 : NVPTXReg<"%v4b32_155">; +def v4b32_156 : NVPTXReg<"%v4b32_156">; +def v4b32_157 : NVPTXReg<"%v4b32_157">; +def v4b32_158 : NVPTXReg<"%v4b32_158">; +def v4b32_159 : NVPTXReg<"%v4b32_159">; +def v4b32_160 : NVPTXReg<"%v4b32_160">; +def v4b32_161 : NVPTXReg<"%v4b32_161">; +def v4b32_162 : NVPTXReg<"%v4b32_162">; +def v4b32_163 : NVPTXReg<"%v4b32_163">; +def v4b32_164 : NVPTXReg<"%v4b32_164">; +def v4b32_165 : NVPTXReg<"%v4b32_165">; +def v4b32_166 : NVPTXReg<"%v4b32_166">; +def v4b32_167 : NVPTXReg<"%v4b32_167">; +def v4b32_168 : NVPTXReg<"%v4b32_168">; +def v4b32_169 : NVPTXReg<"%v4b32_169">; +def v4b32_170 : NVPTXReg<"%v4b32_170">; +def v4b32_171 : NVPTXReg<"%v4b32_171">; +def v4b32_172 : NVPTXReg<"%v4b32_172">; +def v4b32_173 : NVPTXReg<"%v4b32_173">; +def v4b32_174 : NVPTXReg<"%v4b32_174">; +def v4b32_175 : NVPTXReg<"%v4b32_175">; +def v4b32_176 : NVPTXReg<"%v4b32_176">; +def v4b32_177 : NVPTXReg<"%v4b32_177">; +def v4b32_178 : NVPTXReg<"%v4b32_178">; +def v4b32_179 : NVPTXReg<"%v4b32_179">; +def v4b32_180 : NVPTXReg<"%v4b32_180">; +def v4b32_181 : NVPTXReg<"%v4b32_181">; +def v4b32_182 : NVPTXReg<"%v4b32_182">; +def v4b32_183 : NVPTXReg<"%v4b32_183">; +def v4b32_184 : NVPTXReg<"%v4b32_184">; +def v4b32_185 : NVPTXReg<"%v4b32_185">; +def v4b32_186 : NVPTXReg<"%v4b32_186">; +def v4b32_187 : NVPTXReg<"%v4b32_187">; +def v4b32_188 : NVPTXReg<"%v4b32_188">; +def v4b32_189 : NVPTXReg<"%v4b32_189">; +def v4b32_190 : NVPTXReg<"%v4b32_190">; +def v4b32_191 : NVPTXReg<"%v4b32_191">; +def v4b32_192 : NVPTXReg<"%v4b32_192">; +def v4b32_193 : NVPTXReg<"%v4b32_193">; +def v4b32_194 : NVPTXReg<"%v4b32_194">; +def v4b32_195 : NVPTXReg<"%v4b32_195">; +def v4b32_196 : NVPTXReg<"%v4b32_196">; +def v4b32_197 : NVPTXReg<"%v4b32_197">; +def v4b32_198 : NVPTXReg<"%v4b32_198">; +def v4b32_199 : NVPTXReg<"%v4b32_199">; +def v4b32_200 : NVPTXReg<"%v4b32_200">; +def v4b32_201 : NVPTXReg<"%v4b32_201">; +def v4b32_202 : NVPTXReg<"%v4b32_202">; +def v4b32_203 : NVPTXReg<"%v4b32_203">; +def v4b32_204 : NVPTXReg<"%v4b32_204">; +def v4b32_205 : NVPTXReg<"%v4b32_205">; +def v4b32_206 : NVPTXReg<"%v4b32_206">; +def v4b32_207 : NVPTXReg<"%v4b32_207">; +def v4b32_208 : NVPTXReg<"%v4b32_208">; +def v4b32_209 : NVPTXReg<"%v4b32_209">; +def v4b32_210 : NVPTXReg<"%v4b32_210">; +def v4b32_211 : NVPTXReg<"%v4b32_211">; +def v4b32_212 : NVPTXReg<"%v4b32_212">; +def v4b32_213 : NVPTXReg<"%v4b32_213">; +def v4b32_214 : NVPTXReg<"%v4b32_214">; +def v4b32_215 : NVPTXReg<"%v4b32_215">; +def v4b32_216 : NVPTXReg<"%v4b32_216">; +def v4b32_217 : NVPTXReg<"%v4b32_217">; +def v4b32_218 : NVPTXReg<"%v4b32_218">; +def v4b32_219 : NVPTXReg<"%v4b32_219">; +def v4b32_220 : NVPTXReg<"%v4b32_220">; +def v4b32_221 : NVPTXReg<"%v4b32_221">; +def v4b32_222 : NVPTXReg<"%v4b32_222">; +def v4b32_223 : NVPTXReg<"%v4b32_223">; +def v4b32_224 : NVPTXReg<"%v4b32_224">; +def v4b32_225 : NVPTXReg<"%v4b32_225">; +def v4b32_226 : NVPTXReg<"%v4b32_226">; +def v4b32_227 : NVPTXReg<"%v4b32_227">; +def v4b32_228 : NVPTXReg<"%v4b32_228">; +def v4b32_229 : NVPTXReg<"%v4b32_229">; +def v4b32_230 : NVPTXReg<"%v4b32_230">; +def v4b32_231 : NVPTXReg<"%v4b32_231">; +def v4b32_232 : NVPTXReg<"%v4b32_232">; +def v4b32_233 : NVPTXReg<"%v4b32_233">; +def v4b32_234 : NVPTXReg<"%v4b32_234">; +def v4b32_235 : NVPTXReg<"%v4b32_235">; +def v4b32_236 : NVPTXReg<"%v4b32_236">; +def v4b32_237 : NVPTXReg<"%v4b32_237">; +def v4b32_238 : NVPTXReg<"%v4b32_238">; +def v4b32_239 : NVPTXReg<"%v4b32_239">; +def v4b32_240 : NVPTXReg<"%v4b32_240">; +def v4b32_241 : NVPTXReg<"%v4b32_241">; +def v4b32_242 : NVPTXReg<"%v4b32_242">; +def v4b32_243 : NVPTXReg<"%v4b32_243">; +def v4b32_244 : NVPTXReg<"%v4b32_244">; +def v4b32_245 : NVPTXReg<"%v4b32_245">; +def v4b32_246 : NVPTXReg<"%v4b32_246">; +def v4b32_247 : NVPTXReg<"%v4b32_247">; +def v4b32_248 : NVPTXReg<"%v4b32_248">; +def v4b32_249 : NVPTXReg<"%v4b32_249">; +def v4b32_250 : NVPTXReg<"%v4b32_250">; +def v4b32_251 : NVPTXReg<"%v4b32_251">; +def v4b32_252 : NVPTXReg<"%v4b32_252">; +def v4b32_253 : NVPTXReg<"%v4b32_253">; +def v4b32_254 : NVPTXReg<"%v4b32_254">; +def v4b32_255 : NVPTXReg<"%v4b32_255">; +def v4b32_256 : NVPTXReg<"%v4b32_256">; +def v4b32_257 : NVPTXReg<"%v4b32_257">; +def v4b32_258 : NVPTXReg<"%v4b32_258">; +def v4b32_259 : NVPTXReg<"%v4b32_259">; +def v4b32_260 : NVPTXReg<"%v4b32_260">; +def v4b32_261 : NVPTXReg<"%v4b32_261">; +def v4b32_262 : NVPTXReg<"%v4b32_262">; +def v4b32_263 : NVPTXReg<"%v4b32_263">; +def v4b32_264 : NVPTXReg<"%v4b32_264">; +def v4b32_265 : NVPTXReg<"%v4b32_265">; +def v4b32_266 : NVPTXReg<"%v4b32_266">; +def v4b32_267 : NVPTXReg<"%v4b32_267">; +def v4b32_268 : NVPTXReg<"%v4b32_268">; +def v4b32_269 : NVPTXReg<"%v4b32_269">; +def v4b32_270 : NVPTXReg<"%v4b32_270">; +def v4b32_271 : NVPTXReg<"%v4b32_271">; +def v4b32_272 : NVPTXReg<"%v4b32_272">; +def v4b32_273 : NVPTXReg<"%v4b32_273">; +def v4b32_274 : NVPTXReg<"%v4b32_274">; +def v4b32_275 : NVPTXReg<"%v4b32_275">; +def v4b32_276 : NVPTXReg<"%v4b32_276">; +def v4b32_277 : NVPTXReg<"%v4b32_277">; +def v4b32_278 : NVPTXReg<"%v4b32_278">; +def v4b32_279 : NVPTXReg<"%v4b32_279">; +def v4b32_280 : NVPTXReg<"%v4b32_280">; +def v4b32_281 : NVPTXReg<"%v4b32_281">; +def v4b32_282 : NVPTXReg<"%v4b32_282">; +def v4b32_283 : NVPTXReg<"%v4b32_283">; +def v4b32_284 : NVPTXReg<"%v4b32_284">; +def v4b32_285 : NVPTXReg<"%v4b32_285">; +def v4b32_286 : NVPTXReg<"%v4b32_286">; +def v4b32_287 : NVPTXReg<"%v4b32_287">; +def v4b32_288 : NVPTXReg<"%v4b32_288">; +def v4b32_289 : NVPTXReg<"%v4b32_289">; +def v4b32_290 : NVPTXReg<"%v4b32_290">; +def v4b32_291 : NVPTXReg<"%v4b32_291">; +def v4b32_292 : NVPTXReg<"%v4b32_292">; +def v4b32_293 : NVPTXReg<"%v4b32_293">; +def v4b32_294 : NVPTXReg<"%v4b32_294">; +def v4b32_295 : NVPTXReg<"%v4b32_295">; +def v4b32_296 : NVPTXReg<"%v4b32_296">; +def v4b32_297 : NVPTXReg<"%v4b32_297">; +def v4b32_298 : NVPTXReg<"%v4b32_298">; +def v4b32_299 : NVPTXReg<"%v4b32_299">; +def v4b32_300 : NVPTXReg<"%v4b32_300">; +def v4b32_301 : NVPTXReg<"%v4b32_301">; +def v4b32_302 : NVPTXReg<"%v4b32_302">; +def v4b32_303 : NVPTXReg<"%v4b32_303">; +def v4b32_304 : NVPTXReg<"%v4b32_304">; +def v4b32_305 : NVPTXReg<"%v4b32_305">; +def v4b32_306 : NVPTXReg<"%v4b32_306">; +def v4b32_307 : NVPTXReg<"%v4b32_307">; +def v4b32_308 : NVPTXReg<"%v4b32_308">; +def v4b32_309 : NVPTXReg<"%v4b32_309">; +def v4b32_310 : NVPTXReg<"%v4b32_310">; +def v4b32_311 : NVPTXReg<"%v4b32_311">; +def v4b32_312 : NVPTXReg<"%v4b32_312">; +def v4b32_313 : NVPTXReg<"%v4b32_313">; +def v4b32_314 : NVPTXReg<"%v4b32_314">; +def v4b32_315 : NVPTXReg<"%v4b32_315">; +def v4b32_316 : NVPTXReg<"%v4b32_316">; +def v4b32_317 : NVPTXReg<"%v4b32_317">; +def v4b32_318 : NVPTXReg<"%v4b32_318">; +def v4b32_319 : NVPTXReg<"%v4b32_319">; +def v4b32_320 : NVPTXReg<"%v4b32_320">; +def v4b32_321 : NVPTXReg<"%v4b32_321">; +def v4b32_322 : NVPTXReg<"%v4b32_322">; +def v4b32_323 : NVPTXReg<"%v4b32_323">; +def v4b32_324 : NVPTXReg<"%v4b32_324">; +def v4b32_325 : NVPTXReg<"%v4b32_325">; +def v4b32_326 : NVPTXReg<"%v4b32_326">; +def v4b32_327 : NVPTXReg<"%v4b32_327">; +def v4b32_328 : NVPTXReg<"%v4b32_328">; +def v4b32_329 : NVPTXReg<"%v4b32_329">; +def v4b32_330 : NVPTXReg<"%v4b32_330">; +def v4b32_331 : NVPTXReg<"%v4b32_331">; +def v4b32_332 : NVPTXReg<"%v4b32_332">; +def v4b32_333 : NVPTXReg<"%v4b32_333">; +def v4b32_334 : NVPTXReg<"%v4b32_334">; +def v4b32_335 : NVPTXReg<"%v4b32_335">; +def v4b32_336 : NVPTXReg<"%v4b32_336">; +def v4b32_337 : NVPTXReg<"%v4b32_337">; +def v4b32_338 : NVPTXReg<"%v4b32_338">; +def v4b32_339 : NVPTXReg<"%v4b32_339">; +def v4b32_340 : NVPTXReg<"%v4b32_340">; +def v4b32_341 : NVPTXReg<"%v4b32_341">; +def v4b32_342 : NVPTXReg<"%v4b32_342">; +def v4b32_343 : NVPTXReg<"%v4b32_343">; +def v4b32_344 : NVPTXReg<"%v4b32_344">; +def v4b32_345 : NVPTXReg<"%v4b32_345">; +def v4b32_346 : NVPTXReg<"%v4b32_346">; +def v4b32_347 : NVPTXReg<"%v4b32_347">; +def v4b32_348 : NVPTXReg<"%v4b32_348">; +def v4b32_349 : NVPTXReg<"%v4b32_349">; +def v4b32_350 : NVPTXReg<"%v4b32_350">; +def v4b32_351 : NVPTXReg<"%v4b32_351">; +def v4b32_352 : NVPTXReg<"%v4b32_352">; +def v4b32_353 : NVPTXReg<"%v4b32_353">; +def v4b32_354 : NVPTXReg<"%v4b32_354">; +def v4b32_355 : NVPTXReg<"%v4b32_355">; +def v4b32_356 : NVPTXReg<"%v4b32_356">; +def v4b32_357 : NVPTXReg<"%v4b32_357">; +def v4b32_358 : NVPTXReg<"%v4b32_358">; +def v4b32_359 : NVPTXReg<"%v4b32_359">; +def v4b32_360 : NVPTXReg<"%v4b32_360">; +def v4b32_361 : NVPTXReg<"%v4b32_361">; +def v4b32_362 : NVPTXReg<"%v4b32_362">; +def v4b32_363 : NVPTXReg<"%v4b32_363">; +def v4b32_364 : NVPTXReg<"%v4b32_364">; +def v4b32_365 : NVPTXReg<"%v4b32_365">; +def v4b32_366 : NVPTXReg<"%v4b32_366">; +def v4b32_367 : NVPTXReg<"%v4b32_367">; +def v4b32_368 : NVPTXReg<"%v4b32_368">; +def v4b32_369 : NVPTXReg<"%v4b32_369">; +def v4b32_370 : NVPTXReg<"%v4b32_370">; +def v4b32_371 : NVPTXReg<"%v4b32_371">; +def v4b32_372 : NVPTXReg<"%v4b32_372">; +def v4b32_373 : NVPTXReg<"%v4b32_373">; +def v4b32_374 : NVPTXReg<"%v4b32_374">; +def v4b32_375 : NVPTXReg<"%v4b32_375">; +def v4b32_376 : NVPTXReg<"%v4b32_376">; +def v4b32_377 : NVPTXReg<"%v4b32_377">; +def v4b32_378 : NVPTXReg<"%v4b32_378">; +def v4b32_379 : NVPTXReg<"%v4b32_379">; +def v4b32_380 : NVPTXReg<"%v4b32_380">; +def v4b32_381 : NVPTXReg<"%v4b32_381">; +def v4b32_382 : NVPTXReg<"%v4b32_382">; +def v4b32_383 : NVPTXReg<"%v4b32_383">; +def v4b32_384 : NVPTXReg<"%v4b32_384">; +def v4b32_385 : NVPTXReg<"%v4b32_385">; +def v4b32_386 : NVPTXReg<"%v4b32_386">; +def v4b32_387 : NVPTXReg<"%v4b32_387">; +def v4b32_388 : NVPTXReg<"%v4b32_388">; +def v4b32_389 : NVPTXReg<"%v4b32_389">; +def v4b32_390 : NVPTXReg<"%v4b32_390">; +def v4b32_391 : NVPTXReg<"%v4b32_391">; +def v4b32_392 : NVPTXReg<"%v4b32_392">; +def v4b32_393 : NVPTXReg<"%v4b32_393">; +def v4b32_394 : NVPTXReg<"%v4b32_394">; +def v4b32_395 : NVPTXReg<"%v4b32_395">; + +//===--- Arguments --------------------------------------------------------===// +def ia0 : NVPTXReg<"%ia0">; +def ia1 : NVPTXReg<"%ia1">; +def ia2 : NVPTXReg<"%ia2">; +def ia3 : NVPTXReg<"%ia3">; +def ia4 : NVPTXReg<"%ia4">; +def ia5 : NVPTXReg<"%ia5">; +def ia6 : NVPTXReg<"%ia6">; +def ia7 : NVPTXReg<"%ia7">; +def ia8 : NVPTXReg<"%ia8">; +def ia9 : NVPTXReg<"%ia9">; +def ia10 : NVPTXReg<"%ia10">; +def ia11 : NVPTXReg<"%ia11">; +def ia12 : NVPTXReg<"%ia12">; +def ia13 : NVPTXReg<"%ia13">; +def ia14 : NVPTXReg<"%ia14">; +def ia15 : NVPTXReg<"%ia15">; +def ia16 : NVPTXReg<"%ia16">; +def ia17 : NVPTXReg<"%ia17">; +def ia18 : NVPTXReg<"%ia18">; +def ia19 : NVPTXReg<"%ia19">; +def ia20 : NVPTXReg<"%ia20">; +def ia21 : NVPTXReg<"%ia21">; +def ia22 : NVPTXReg<"%ia22">; +def ia23 : NVPTXReg<"%ia23">; +def ia24 : NVPTXReg<"%ia24">; +def ia25 : NVPTXReg<"%ia25">; +def ia26 : NVPTXReg<"%ia26">; +def ia27 : NVPTXReg<"%ia27">; +def ia28 : NVPTXReg<"%ia28">; +def ia29 : NVPTXReg<"%ia29">; +def ia30 : NVPTXReg<"%ia30">; +def ia31 : NVPTXReg<"%ia31">; +def ia32 : NVPTXReg<"%ia32">; +def ia33 : NVPTXReg<"%ia33">; +def ia34 : NVPTXReg<"%ia34">; +def ia35 : NVPTXReg<"%ia35">; +def ia36 : NVPTXReg<"%ia36">; +def ia37 : NVPTXReg<"%ia37">; +def ia38 : NVPTXReg<"%ia38">; +def ia39 : NVPTXReg<"%ia39">; +def ia40 : NVPTXReg<"%ia40">; +def ia41 : NVPTXReg<"%ia41">; +def ia42 : NVPTXReg<"%ia42">; +def ia43 : NVPTXReg<"%ia43">; +def ia44 : NVPTXReg<"%ia44">; +def ia45 : NVPTXReg<"%ia45">; +def ia46 : NVPTXReg<"%ia46">; +def ia47 : NVPTXReg<"%ia47">; +def ia48 : NVPTXReg<"%ia48">; +def ia49 : NVPTXReg<"%ia49">; +def ia50 : NVPTXReg<"%ia50">; +def ia51 : NVPTXReg<"%ia51">; +def ia52 : NVPTXReg<"%ia52">; +def ia53 : NVPTXReg<"%ia53">; +def ia54 : NVPTXReg<"%ia54">; +def ia55 : NVPTXReg<"%ia55">; +def ia56 : NVPTXReg<"%ia56">; +def ia57 : NVPTXReg<"%ia57">; +def ia58 : NVPTXReg<"%ia58">; +def ia59 : NVPTXReg<"%ia59">; +def ia60 : NVPTXReg<"%ia60">; +def ia61 : NVPTXReg<"%ia61">; +def ia62 : NVPTXReg<"%ia62">; +def ia63 : NVPTXReg<"%ia63">; +def ia64 : NVPTXReg<"%ia64">; +def ia65 : NVPTXReg<"%ia65">; +def ia66 : NVPTXReg<"%ia66">; +def ia67 : NVPTXReg<"%ia67">; +def ia68 : NVPTXReg<"%ia68">; +def ia69 : NVPTXReg<"%ia69">; +def ia70 : NVPTXReg<"%ia70">; +def ia71 : NVPTXReg<"%ia71">; +def ia72 : NVPTXReg<"%ia72">; +def ia73 : NVPTXReg<"%ia73">; +def ia74 : NVPTXReg<"%ia74">; +def ia75 : NVPTXReg<"%ia75">; +def ia76 : NVPTXReg<"%ia76">; +def ia77 : NVPTXReg<"%ia77">; +def ia78 : NVPTXReg<"%ia78">; +def ia79 : NVPTXReg<"%ia79">; +def ia80 : NVPTXReg<"%ia80">; +def ia81 : NVPTXReg<"%ia81">; +def ia82 : NVPTXReg<"%ia82">; +def ia83 : NVPTXReg<"%ia83">; +def ia84 : NVPTXReg<"%ia84">; +def ia85 : NVPTXReg<"%ia85">; +def ia86 : NVPTXReg<"%ia86">; +def ia87 : NVPTXReg<"%ia87">; +def ia88 : NVPTXReg<"%ia88">; +def ia89 : NVPTXReg<"%ia89">; +def ia90 : NVPTXReg<"%ia90">; +def ia91 : NVPTXReg<"%ia91">; +def ia92 : NVPTXReg<"%ia92">; +def ia93 : NVPTXReg<"%ia93">; +def ia94 : NVPTXReg<"%ia94">; +def ia95 : NVPTXReg<"%ia95">; +def ia96 : NVPTXReg<"%ia96">; +def ia97 : NVPTXReg<"%ia97">; +def ia98 : NVPTXReg<"%ia98">; +def ia99 : NVPTXReg<"%ia99">; +def ia100 : NVPTXReg<"%ia100">; +def ia101 : NVPTXReg<"%ia101">; +def ia102 : NVPTXReg<"%ia102">; +def ia103 : NVPTXReg<"%ia103">; +def ia104 : NVPTXReg<"%ia104">; +def ia105 : NVPTXReg<"%ia105">; +def ia106 : NVPTXReg<"%ia106">; +def ia107 : NVPTXReg<"%ia107">; +def ia108 : NVPTXReg<"%ia108">; +def ia109 : NVPTXReg<"%ia109">; +def ia110 : NVPTXReg<"%ia110">; +def ia111 : NVPTXReg<"%ia111">; +def ia112 : NVPTXReg<"%ia112">; +def ia113 : NVPTXReg<"%ia113">; +def ia114 : NVPTXReg<"%ia114">; +def ia115 : NVPTXReg<"%ia115">; +def ia116 : NVPTXReg<"%ia116">; +def ia117 : NVPTXReg<"%ia117">; +def ia118 : NVPTXReg<"%ia118">; +def ia119 : NVPTXReg<"%ia119">; +def ia120 : NVPTXReg<"%ia120">; +def ia121 : NVPTXReg<"%ia121">; +def ia122 : NVPTXReg<"%ia122">; +def ia123 : NVPTXReg<"%ia123">; +def ia124 : NVPTXReg<"%ia124">; +def ia125 : NVPTXReg<"%ia125">; +def ia126 : NVPTXReg<"%ia126">; +def ia127 : NVPTXReg<"%ia127">; +def ia128 : NVPTXReg<"%ia128">; +def ia129 : NVPTXReg<"%ia129">; +def ia130 : NVPTXReg<"%ia130">; +def ia131 : NVPTXReg<"%ia131">; +def ia132 : NVPTXReg<"%ia132">; +def ia133 : NVPTXReg<"%ia133">; +def ia134 : NVPTXReg<"%ia134">; +def ia135 : NVPTXReg<"%ia135">; +def ia136 : NVPTXReg<"%ia136">; +def ia137 : NVPTXReg<"%ia137">; +def ia138 : NVPTXReg<"%ia138">; +def ia139 : NVPTXReg<"%ia139">; +def ia140 : NVPTXReg<"%ia140">; +def ia141 : NVPTXReg<"%ia141">; +def ia142 : NVPTXReg<"%ia142">; +def ia143 : NVPTXReg<"%ia143">; +def ia144 : NVPTXReg<"%ia144">; +def ia145 : NVPTXReg<"%ia145">; +def ia146 : NVPTXReg<"%ia146">; +def ia147 : NVPTXReg<"%ia147">; +def ia148 : NVPTXReg<"%ia148">; +def ia149 : NVPTXReg<"%ia149">; +def ia150 : NVPTXReg<"%ia150">; +def ia151 : NVPTXReg<"%ia151">; +def ia152 : NVPTXReg<"%ia152">; +def ia153 : NVPTXReg<"%ia153">; +def ia154 : NVPTXReg<"%ia154">; +def ia155 : NVPTXReg<"%ia155">; +def ia156 : NVPTXReg<"%ia156">; +def ia157 : NVPTXReg<"%ia157">; +def ia158 : NVPTXReg<"%ia158">; +def ia159 : NVPTXReg<"%ia159">; +def ia160 : NVPTXReg<"%ia160">; +def ia161 : NVPTXReg<"%ia161">; +def ia162 : NVPTXReg<"%ia162">; +def ia163 : NVPTXReg<"%ia163">; +def ia164 : NVPTXReg<"%ia164">; +def ia165 : NVPTXReg<"%ia165">; +def ia166 : NVPTXReg<"%ia166">; +def ia167 : NVPTXReg<"%ia167">; +def ia168 : NVPTXReg<"%ia168">; +def ia169 : NVPTXReg<"%ia169">; +def ia170 : NVPTXReg<"%ia170">; +def ia171 : NVPTXReg<"%ia171">; +def ia172 : NVPTXReg<"%ia172">; +def ia173 : NVPTXReg<"%ia173">; +def ia174 : NVPTXReg<"%ia174">; +def ia175 : NVPTXReg<"%ia175">; +def ia176 : NVPTXReg<"%ia176">; +def ia177 : NVPTXReg<"%ia177">; +def ia178 : NVPTXReg<"%ia178">; +def ia179 : NVPTXReg<"%ia179">; +def ia180 : NVPTXReg<"%ia180">; +def ia181 : NVPTXReg<"%ia181">; +def ia182 : NVPTXReg<"%ia182">; +def ia183 : NVPTXReg<"%ia183">; +def ia184 : NVPTXReg<"%ia184">; +def ia185 : NVPTXReg<"%ia185">; +def ia186 : NVPTXReg<"%ia186">; +def ia187 : NVPTXReg<"%ia187">; +def ia188 : NVPTXReg<"%ia188">; +def ia189 : NVPTXReg<"%ia189">; +def ia190 : NVPTXReg<"%ia190">; +def ia191 : NVPTXReg<"%ia191">; +def ia192 : NVPTXReg<"%ia192">; +def ia193 : NVPTXReg<"%ia193">; +def ia194 : NVPTXReg<"%ia194">; +def ia195 : NVPTXReg<"%ia195">; +def ia196 : NVPTXReg<"%ia196">; +def ia197 : NVPTXReg<"%ia197">; +def ia198 : NVPTXReg<"%ia198">; +def ia199 : NVPTXReg<"%ia199">; +def ia200 : NVPTXReg<"%ia200">; +def ia201 : NVPTXReg<"%ia201">; +def ia202 : NVPTXReg<"%ia202">; +def ia203 : NVPTXReg<"%ia203">; +def ia204 : NVPTXReg<"%ia204">; +def ia205 : NVPTXReg<"%ia205">; +def ia206 : NVPTXReg<"%ia206">; +def ia207 : NVPTXReg<"%ia207">; +def ia208 : NVPTXReg<"%ia208">; +def ia209 : NVPTXReg<"%ia209">; +def ia210 : NVPTXReg<"%ia210">; +def ia211 : NVPTXReg<"%ia211">; +def ia212 : NVPTXReg<"%ia212">; +def ia213 : NVPTXReg<"%ia213">; +def ia214 : NVPTXReg<"%ia214">; +def ia215 : NVPTXReg<"%ia215">; +def ia216 : NVPTXReg<"%ia216">; +def ia217 : NVPTXReg<"%ia217">; +def ia218 : NVPTXReg<"%ia218">; +def ia219 : NVPTXReg<"%ia219">; +def ia220 : NVPTXReg<"%ia220">; +def ia221 : NVPTXReg<"%ia221">; +def ia222 : NVPTXReg<"%ia222">; +def ia223 : NVPTXReg<"%ia223">; +def ia224 : NVPTXReg<"%ia224">; +def ia225 : NVPTXReg<"%ia225">; +def ia226 : NVPTXReg<"%ia226">; +def ia227 : NVPTXReg<"%ia227">; +def ia228 : NVPTXReg<"%ia228">; +def ia229 : NVPTXReg<"%ia229">; +def ia230 : NVPTXReg<"%ia230">; +def ia231 : NVPTXReg<"%ia231">; +def ia232 : NVPTXReg<"%ia232">; +def ia233 : NVPTXReg<"%ia233">; +def ia234 : NVPTXReg<"%ia234">; +def ia235 : NVPTXReg<"%ia235">; +def ia236 : NVPTXReg<"%ia236">; +def ia237 : NVPTXReg<"%ia237">; +def ia238 : NVPTXReg<"%ia238">; +def ia239 : NVPTXReg<"%ia239">; +def ia240 : NVPTXReg<"%ia240">; +def ia241 : NVPTXReg<"%ia241">; +def ia242 : NVPTXReg<"%ia242">; +def ia243 : NVPTXReg<"%ia243">; +def ia244 : NVPTXReg<"%ia244">; +def ia245 : NVPTXReg<"%ia245">; +def ia246 : NVPTXReg<"%ia246">; +def ia247 : NVPTXReg<"%ia247">; +def ia248 : NVPTXReg<"%ia248">; +def ia249 : NVPTXReg<"%ia249">; +def ia250 : NVPTXReg<"%ia250">; +def ia251 : NVPTXReg<"%ia251">; +def ia252 : NVPTXReg<"%ia252">; +def ia253 : NVPTXReg<"%ia253">; +def ia254 : NVPTXReg<"%ia254">; +def ia255 : NVPTXReg<"%ia255">; +def ia256 : NVPTXReg<"%ia256">; +def ia257 : NVPTXReg<"%ia257">; +def ia258 : NVPTXReg<"%ia258">; +def ia259 : NVPTXReg<"%ia259">; +def ia260 : NVPTXReg<"%ia260">; +def ia261 : NVPTXReg<"%ia261">; +def ia262 : NVPTXReg<"%ia262">; +def ia263 : NVPTXReg<"%ia263">; +def ia264 : NVPTXReg<"%ia264">; +def ia265 : NVPTXReg<"%ia265">; +def ia266 : NVPTXReg<"%ia266">; +def ia267 : NVPTXReg<"%ia267">; +def ia268 : NVPTXReg<"%ia268">; +def ia269 : NVPTXReg<"%ia269">; +def ia270 : NVPTXReg<"%ia270">; +def ia271 : NVPTXReg<"%ia271">; +def ia272 : NVPTXReg<"%ia272">; +def ia273 : NVPTXReg<"%ia273">; +def ia274 : NVPTXReg<"%ia274">; +def ia275 : NVPTXReg<"%ia275">; +def ia276 : NVPTXReg<"%ia276">; +def ia277 : NVPTXReg<"%ia277">; +def ia278 : NVPTXReg<"%ia278">; +def ia279 : NVPTXReg<"%ia279">; +def ia280 : NVPTXReg<"%ia280">; +def ia281 : NVPTXReg<"%ia281">; +def ia282 : NVPTXReg<"%ia282">; +def ia283 : NVPTXReg<"%ia283">; +def ia284 : NVPTXReg<"%ia284">; +def ia285 : NVPTXReg<"%ia285">; +def ia286 : NVPTXReg<"%ia286">; +def ia287 : NVPTXReg<"%ia287">; +def ia288 : NVPTXReg<"%ia288">; +def ia289 : NVPTXReg<"%ia289">; +def ia290 : NVPTXReg<"%ia290">; +def ia291 : NVPTXReg<"%ia291">; +def ia292 : NVPTXReg<"%ia292">; +def ia293 : NVPTXReg<"%ia293">; +def ia294 : NVPTXReg<"%ia294">; +def ia295 : NVPTXReg<"%ia295">; +def ia296 : NVPTXReg<"%ia296">; +def ia297 : NVPTXReg<"%ia297">; +def ia298 : NVPTXReg<"%ia298">; +def ia299 : NVPTXReg<"%ia299">; +def ia300 : NVPTXReg<"%ia300">; +def ia301 : NVPTXReg<"%ia301">; +def ia302 : NVPTXReg<"%ia302">; +def ia303 : NVPTXReg<"%ia303">; +def ia304 : NVPTXReg<"%ia304">; +def ia305 : NVPTXReg<"%ia305">; +def ia306 : NVPTXReg<"%ia306">; +def ia307 : NVPTXReg<"%ia307">; +def ia308 : NVPTXReg<"%ia308">; +def ia309 : NVPTXReg<"%ia309">; +def ia310 : NVPTXReg<"%ia310">; +def ia311 : NVPTXReg<"%ia311">; +def ia312 : NVPTXReg<"%ia312">; +def ia313 : NVPTXReg<"%ia313">; +def ia314 : NVPTXReg<"%ia314">; +def ia315 : NVPTXReg<"%ia315">; +def ia316 : NVPTXReg<"%ia316">; +def ia317 : NVPTXReg<"%ia317">; +def ia318 : NVPTXReg<"%ia318">; +def ia319 : NVPTXReg<"%ia319">; +def ia320 : NVPTXReg<"%ia320">; +def ia321 : NVPTXReg<"%ia321">; +def ia322 : NVPTXReg<"%ia322">; +def ia323 : NVPTXReg<"%ia323">; +def ia324 : NVPTXReg<"%ia324">; +def ia325 : NVPTXReg<"%ia325">; +def ia326 : NVPTXReg<"%ia326">; +def ia327 : NVPTXReg<"%ia327">; +def ia328 : NVPTXReg<"%ia328">; +def ia329 : NVPTXReg<"%ia329">; +def ia330 : NVPTXReg<"%ia330">; +def ia331 : NVPTXReg<"%ia331">; +def ia332 : NVPTXReg<"%ia332">; +def ia333 : NVPTXReg<"%ia333">; +def ia334 : NVPTXReg<"%ia334">; +def ia335 : NVPTXReg<"%ia335">; +def ia336 : NVPTXReg<"%ia336">; +def ia337 : NVPTXReg<"%ia337">; +def ia338 : NVPTXReg<"%ia338">; +def ia339 : NVPTXReg<"%ia339">; +def ia340 : NVPTXReg<"%ia340">; +def ia341 : NVPTXReg<"%ia341">; +def ia342 : NVPTXReg<"%ia342">; +def ia343 : NVPTXReg<"%ia343">; +def ia344 : NVPTXReg<"%ia344">; +def ia345 : NVPTXReg<"%ia345">; +def ia346 : NVPTXReg<"%ia346">; +def ia347 : NVPTXReg<"%ia347">; +def ia348 : NVPTXReg<"%ia348">; +def ia349 : NVPTXReg<"%ia349">; +def ia350 : NVPTXReg<"%ia350">; +def ia351 : NVPTXReg<"%ia351">; +def ia352 : NVPTXReg<"%ia352">; +def ia353 : NVPTXReg<"%ia353">; +def ia354 : NVPTXReg<"%ia354">; +def ia355 : NVPTXReg<"%ia355">; +def ia356 : NVPTXReg<"%ia356">; +def ia357 : NVPTXReg<"%ia357">; +def ia358 : NVPTXReg<"%ia358">; +def ia359 : NVPTXReg<"%ia359">; +def ia360 : NVPTXReg<"%ia360">; +def ia361 : NVPTXReg<"%ia361">; +def ia362 : NVPTXReg<"%ia362">; +def ia363 : NVPTXReg<"%ia363">; +def ia364 : NVPTXReg<"%ia364">; +def ia365 : NVPTXReg<"%ia365">; +def ia366 : NVPTXReg<"%ia366">; +def ia367 : NVPTXReg<"%ia367">; +def ia368 : NVPTXReg<"%ia368">; +def ia369 : NVPTXReg<"%ia369">; +def ia370 : NVPTXReg<"%ia370">; +def ia371 : NVPTXReg<"%ia371">; +def ia372 : NVPTXReg<"%ia372">; +def ia373 : NVPTXReg<"%ia373">; +def ia374 : NVPTXReg<"%ia374">; +def ia375 : NVPTXReg<"%ia375">; +def ia376 : NVPTXReg<"%ia376">; +def ia377 : NVPTXReg<"%ia377">; +def ia378 : NVPTXReg<"%ia378">; +def ia379 : NVPTXReg<"%ia379">; +def ia380 : NVPTXReg<"%ia380">; +def ia381 : NVPTXReg<"%ia381">; +def ia382 : NVPTXReg<"%ia382">; +def ia383 : NVPTXReg<"%ia383">; +def ia384 : NVPTXReg<"%ia384">; +def ia385 : NVPTXReg<"%ia385">; +def ia386 : NVPTXReg<"%ia386">; +def ia387 : NVPTXReg<"%ia387">; +def ia388 : NVPTXReg<"%ia388">; +def ia389 : NVPTXReg<"%ia389">; +def ia390 : NVPTXReg<"%ia390">; +def ia391 : NVPTXReg<"%ia391">; +def ia392 : NVPTXReg<"%ia392">; +def ia393 : NVPTXReg<"%ia393">; +def ia394 : NVPTXReg<"%ia394">; +def ia395 : NVPTXReg<"%ia395">; +def la0 : NVPTXReg<"%la0">; +def la1 : NVPTXReg<"%la1">; +def la2 : NVPTXReg<"%la2">; +def la3 : NVPTXReg<"%la3">; +def la4 : NVPTXReg<"%la4">; +def la5 : NVPTXReg<"%la5">; +def la6 : NVPTXReg<"%la6">; +def la7 : NVPTXReg<"%la7">; +def la8 : NVPTXReg<"%la8">; +def la9 : NVPTXReg<"%la9">; +def la10 : NVPTXReg<"%la10">; +def la11 : NVPTXReg<"%la11">; +def la12 : NVPTXReg<"%la12">; +def la13 : NVPTXReg<"%la13">; +def la14 : NVPTXReg<"%la14">; +def la15 : NVPTXReg<"%la15">; +def la16 : NVPTXReg<"%la16">; +def la17 : NVPTXReg<"%la17">; +def la18 : NVPTXReg<"%la18">; +def la19 : NVPTXReg<"%la19">; +def la20 : NVPTXReg<"%la20">; +def la21 : NVPTXReg<"%la21">; +def la22 : NVPTXReg<"%la22">; +def la23 : NVPTXReg<"%la23">; +def la24 : NVPTXReg<"%la24">; +def la25 : NVPTXReg<"%la25">; +def la26 : NVPTXReg<"%la26">; +def la27 : NVPTXReg<"%la27">; +def la28 : NVPTXReg<"%la28">; +def la29 : NVPTXReg<"%la29">; +def la30 : NVPTXReg<"%la30">; +def la31 : NVPTXReg<"%la31">; +def la32 : NVPTXReg<"%la32">; +def la33 : NVPTXReg<"%la33">; +def la34 : NVPTXReg<"%la34">; +def la35 : NVPTXReg<"%la35">; +def la36 : NVPTXReg<"%la36">; +def la37 : NVPTXReg<"%la37">; +def la38 : NVPTXReg<"%la38">; +def la39 : NVPTXReg<"%la39">; +def la40 : NVPTXReg<"%la40">; +def la41 : NVPTXReg<"%la41">; +def la42 : NVPTXReg<"%la42">; +def la43 : NVPTXReg<"%la43">; +def la44 : NVPTXReg<"%la44">; +def la45 : NVPTXReg<"%la45">; +def la46 : NVPTXReg<"%la46">; +def la47 : NVPTXReg<"%la47">; +def la48 : NVPTXReg<"%la48">; +def la49 : NVPTXReg<"%la49">; +def la50 : NVPTXReg<"%la50">; +def la51 : NVPTXReg<"%la51">; +def la52 : NVPTXReg<"%la52">; +def la53 : NVPTXReg<"%la53">; +def la54 : NVPTXReg<"%la54">; +def la55 : NVPTXReg<"%la55">; +def la56 : NVPTXReg<"%la56">; +def la57 : NVPTXReg<"%la57">; +def la58 : NVPTXReg<"%la58">; +def la59 : NVPTXReg<"%la59">; +def la60 : NVPTXReg<"%la60">; +def la61 : NVPTXReg<"%la61">; +def la62 : NVPTXReg<"%la62">; +def la63 : NVPTXReg<"%la63">; +def la64 : NVPTXReg<"%la64">; +def la65 : NVPTXReg<"%la65">; +def la66 : NVPTXReg<"%la66">; +def la67 : NVPTXReg<"%la67">; +def la68 : NVPTXReg<"%la68">; +def la69 : NVPTXReg<"%la69">; +def la70 : NVPTXReg<"%la70">; +def la71 : NVPTXReg<"%la71">; +def la72 : NVPTXReg<"%la72">; +def la73 : NVPTXReg<"%la73">; +def la74 : NVPTXReg<"%la74">; +def la75 : NVPTXReg<"%la75">; +def la76 : NVPTXReg<"%la76">; +def la77 : NVPTXReg<"%la77">; +def la78 : NVPTXReg<"%la78">; +def la79 : NVPTXReg<"%la79">; +def la80 : NVPTXReg<"%la80">; +def la81 : NVPTXReg<"%la81">; +def la82 : NVPTXReg<"%la82">; +def la83 : NVPTXReg<"%la83">; +def la84 : NVPTXReg<"%la84">; +def la85 : NVPTXReg<"%la85">; +def la86 : NVPTXReg<"%la86">; +def la87 : NVPTXReg<"%la87">; +def la88 : NVPTXReg<"%la88">; +def la89 : NVPTXReg<"%la89">; +def la90 : NVPTXReg<"%la90">; +def la91 : NVPTXReg<"%la91">; +def la92 : NVPTXReg<"%la92">; +def la93 : NVPTXReg<"%la93">; +def la94 : NVPTXReg<"%la94">; +def la95 : NVPTXReg<"%la95">; +def la96 : NVPTXReg<"%la96">; +def la97 : NVPTXReg<"%la97">; +def la98 : NVPTXReg<"%la98">; +def la99 : NVPTXReg<"%la99">; +def la100 : NVPTXReg<"%la100">; +def la101 : NVPTXReg<"%la101">; +def la102 : NVPTXReg<"%la102">; +def la103 : NVPTXReg<"%la103">; +def la104 : NVPTXReg<"%la104">; +def la105 : NVPTXReg<"%la105">; +def la106 : NVPTXReg<"%la106">; +def la107 : NVPTXReg<"%la107">; +def la108 : NVPTXReg<"%la108">; +def la109 : NVPTXReg<"%la109">; +def la110 : NVPTXReg<"%la110">; +def la111 : NVPTXReg<"%la111">; +def la112 : NVPTXReg<"%la112">; +def la113 : NVPTXReg<"%la113">; +def la114 : NVPTXReg<"%la114">; +def la115 : NVPTXReg<"%la115">; +def la116 : NVPTXReg<"%la116">; +def la117 : NVPTXReg<"%la117">; +def la118 : NVPTXReg<"%la118">; +def la119 : NVPTXReg<"%la119">; +def la120 : NVPTXReg<"%la120">; +def la121 : NVPTXReg<"%la121">; +def la122 : NVPTXReg<"%la122">; +def la123 : NVPTXReg<"%la123">; +def la124 : NVPTXReg<"%la124">; +def la125 : NVPTXReg<"%la125">; +def la126 : NVPTXReg<"%la126">; +def la127 : NVPTXReg<"%la127">; +def la128 : NVPTXReg<"%la128">; +def la129 : NVPTXReg<"%la129">; +def la130 : NVPTXReg<"%la130">; +def la131 : NVPTXReg<"%la131">; +def la132 : NVPTXReg<"%la132">; +def la133 : NVPTXReg<"%la133">; +def la134 : NVPTXReg<"%la134">; +def la135 : NVPTXReg<"%la135">; +def la136 : NVPTXReg<"%la136">; +def la137 : NVPTXReg<"%la137">; +def la138 : NVPTXReg<"%la138">; +def la139 : NVPTXReg<"%la139">; +def la140 : NVPTXReg<"%la140">; +def la141 : NVPTXReg<"%la141">; +def la142 : NVPTXReg<"%la142">; +def la143 : NVPTXReg<"%la143">; +def la144 : NVPTXReg<"%la144">; +def la145 : NVPTXReg<"%la145">; +def la146 : NVPTXReg<"%la146">; +def la147 : NVPTXReg<"%la147">; +def la148 : NVPTXReg<"%la148">; +def la149 : NVPTXReg<"%la149">; +def la150 : NVPTXReg<"%la150">; +def la151 : NVPTXReg<"%la151">; +def la152 : NVPTXReg<"%la152">; +def la153 : NVPTXReg<"%la153">; +def la154 : NVPTXReg<"%la154">; +def la155 : NVPTXReg<"%la155">; +def la156 : NVPTXReg<"%la156">; +def la157 : NVPTXReg<"%la157">; +def la158 : NVPTXReg<"%la158">; +def la159 : NVPTXReg<"%la159">; +def la160 : NVPTXReg<"%la160">; +def la161 : NVPTXReg<"%la161">; +def la162 : NVPTXReg<"%la162">; +def la163 : NVPTXReg<"%la163">; +def la164 : NVPTXReg<"%la164">; +def la165 : NVPTXReg<"%la165">; +def la166 : NVPTXReg<"%la166">; +def la167 : NVPTXReg<"%la167">; +def la168 : NVPTXReg<"%la168">; +def la169 : NVPTXReg<"%la169">; +def la170 : NVPTXReg<"%la170">; +def la171 : NVPTXReg<"%la171">; +def la172 : NVPTXReg<"%la172">; +def la173 : NVPTXReg<"%la173">; +def la174 : NVPTXReg<"%la174">; +def la175 : NVPTXReg<"%la175">; +def la176 : NVPTXReg<"%la176">; +def la177 : NVPTXReg<"%la177">; +def la178 : NVPTXReg<"%la178">; +def la179 : NVPTXReg<"%la179">; +def la180 : NVPTXReg<"%la180">; +def la181 : NVPTXReg<"%la181">; +def la182 : NVPTXReg<"%la182">; +def la183 : NVPTXReg<"%la183">; +def la184 : NVPTXReg<"%la184">; +def la185 : NVPTXReg<"%la185">; +def la186 : NVPTXReg<"%la186">; +def la187 : NVPTXReg<"%la187">; +def la188 : NVPTXReg<"%la188">; +def la189 : NVPTXReg<"%la189">; +def la190 : NVPTXReg<"%la190">; +def la191 : NVPTXReg<"%la191">; +def la192 : NVPTXReg<"%la192">; +def la193 : NVPTXReg<"%la193">; +def la194 : NVPTXReg<"%la194">; +def la195 : NVPTXReg<"%la195">; +def la196 : NVPTXReg<"%la196">; +def la197 : NVPTXReg<"%la197">; +def la198 : NVPTXReg<"%la198">; +def la199 : NVPTXReg<"%la199">; +def la200 : NVPTXReg<"%la200">; +def la201 : NVPTXReg<"%la201">; +def la202 : NVPTXReg<"%la202">; +def la203 : NVPTXReg<"%la203">; +def la204 : NVPTXReg<"%la204">; +def la205 : NVPTXReg<"%la205">; +def la206 : NVPTXReg<"%la206">; +def la207 : NVPTXReg<"%la207">; +def la208 : NVPTXReg<"%la208">; +def la209 : NVPTXReg<"%la209">; +def la210 : NVPTXReg<"%la210">; +def la211 : NVPTXReg<"%la211">; +def la212 : NVPTXReg<"%la212">; +def la213 : NVPTXReg<"%la213">; +def la214 : NVPTXReg<"%la214">; +def la215 : NVPTXReg<"%la215">; +def la216 : NVPTXReg<"%la216">; +def la217 : NVPTXReg<"%la217">; +def la218 : NVPTXReg<"%la218">; +def la219 : NVPTXReg<"%la219">; +def la220 : NVPTXReg<"%la220">; +def la221 : NVPTXReg<"%la221">; +def la222 : NVPTXReg<"%la222">; +def la223 : NVPTXReg<"%la223">; +def la224 : NVPTXReg<"%la224">; +def la225 : NVPTXReg<"%la225">; +def la226 : NVPTXReg<"%la226">; +def la227 : NVPTXReg<"%la227">; +def la228 : NVPTXReg<"%la228">; +def la229 : NVPTXReg<"%la229">; +def la230 : NVPTXReg<"%la230">; +def la231 : NVPTXReg<"%la231">; +def la232 : NVPTXReg<"%la232">; +def la233 : NVPTXReg<"%la233">; +def la234 : NVPTXReg<"%la234">; +def la235 : NVPTXReg<"%la235">; +def la236 : NVPTXReg<"%la236">; +def la237 : NVPTXReg<"%la237">; +def la238 : NVPTXReg<"%la238">; +def la239 : NVPTXReg<"%la239">; +def la240 : NVPTXReg<"%la240">; +def la241 : NVPTXReg<"%la241">; +def la242 : NVPTXReg<"%la242">; +def la243 : NVPTXReg<"%la243">; +def la244 : NVPTXReg<"%la244">; +def la245 : NVPTXReg<"%la245">; +def la246 : NVPTXReg<"%la246">; +def la247 : NVPTXReg<"%la247">; +def la248 : NVPTXReg<"%la248">; +def la249 : NVPTXReg<"%la249">; +def la250 : NVPTXReg<"%la250">; +def la251 : NVPTXReg<"%la251">; +def la252 : NVPTXReg<"%la252">; +def la253 : NVPTXReg<"%la253">; +def la254 : NVPTXReg<"%la254">; +def la255 : NVPTXReg<"%la255">; +def la256 : NVPTXReg<"%la256">; +def la257 : NVPTXReg<"%la257">; +def la258 : NVPTXReg<"%la258">; +def la259 : NVPTXReg<"%la259">; +def la260 : NVPTXReg<"%la260">; +def la261 : NVPTXReg<"%la261">; +def la262 : NVPTXReg<"%la262">; +def la263 : NVPTXReg<"%la263">; +def la264 : NVPTXReg<"%la264">; +def la265 : NVPTXReg<"%la265">; +def la266 : NVPTXReg<"%la266">; +def la267 : NVPTXReg<"%la267">; +def la268 : NVPTXReg<"%la268">; +def la269 : NVPTXReg<"%la269">; +def la270 : NVPTXReg<"%la270">; +def la271 : NVPTXReg<"%la271">; +def la272 : NVPTXReg<"%la272">; +def la273 : NVPTXReg<"%la273">; +def la274 : NVPTXReg<"%la274">; +def la275 : NVPTXReg<"%la275">; +def la276 : NVPTXReg<"%la276">; +def la277 : NVPTXReg<"%la277">; +def la278 : NVPTXReg<"%la278">; +def la279 : NVPTXReg<"%la279">; +def la280 : NVPTXReg<"%la280">; +def la281 : NVPTXReg<"%la281">; +def la282 : NVPTXReg<"%la282">; +def la283 : NVPTXReg<"%la283">; +def la284 : NVPTXReg<"%la284">; +def la285 : NVPTXReg<"%la285">; +def la286 : NVPTXReg<"%la286">; +def la287 : NVPTXReg<"%la287">; +def la288 : NVPTXReg<"%la288">; +def la289 : NVPTXReg<"%la289">; +def la290 : NVPTXReg<"%la290">; +def la291 : NVPTXReg<"%la291">; +def la292 : NVPTXReg<"%la292">; +def la293 : NVPTXReg<"%la293">; +def la294 : NVPTXReg<"%la294">; +def la295 : NVPTXReg<"%la295">; +def la296 : NVPTXReg<"%la296">; +def la297 : NVPTXReg<"%la297">; +def la298 : NVPTXReg<"%la298">; +def la299 : NVPTXReg<"%la299">; +def la300 : NVPTXReg<"%la300">; +def la301 : NVPTXReg<"%la301">; +def la302 : NVPTXReg<"%la302">; +def la303 : NVPTXReg<"%la303">; +def la304 : NVPTXReg<"%la304">; +def la305 : NVPTXReg<"%la305">; +def la306 : NVPTXReg<"%la306">; +def la307 : NVPTXReg<"%la307">; +def la308 : NVPTXReg<"%la308">; +def la309 : NVPTXReg<"%la309">; +def la310 : NVPTXReg<"%la310">; +def la311 : NVPTXReg<"%la311">; +def la312 : NVPTXReg<"%la312">; +def la313 : NVPTXReg<"%la313">; +def la314 : NVPTXReg<"%la314">; +def la315 : NVPTXReg<"%la315">; +def la316 : NVPTXReg<"%la316">; +def la317 : NVPTXReg<"%la317">; +def la318 : NVPTXReg<"%la318">; +def la319 : NVPTXReg<"%la319">; +def la320 : NVPTXReg<"%la320">; +def la321 : NVPTXReg<"%la321">; +def la322 : NVPTXReg<"%la322">; +def la323 : NVPTXReg<"%la323">; +def la324 : NVPTXReg<"%la324">; +def la325 : NVPTXReg<"%la325">; +def la326 : NVPTXReg<"%la326">; +def la327 : NVPTXReg<"%la327">; +def la328 : NVPTXReg<"%la328">; +def la329 : NVPTXReg<"%la329">; +def la330 : NVPTXReg<"%la330">; +def la331 : NVPTXReg<"%la331">; +def la332 : NVPTXReg<"%la332">; +def la333 : NVPTXReg<"%la333">; +def la334 : NVPTXReg<"%la334">; +def la335 : NVPTXReg<"%la335">; +def la336 : NVPTXReg<"%la336">; +def la337 : NVPTXReg<"%la337">; +def la338 : NVPTXReg<"%la338">; +def la339 : NVPTXReg<"%la339">; +def la340 : NVPTXReg<"%la340">; +def la341 : NVPTXReg<"%la341">; +def la342 : NVPTXReg<"%la342">; +def la343 : NVPTXReg<"%la343">; +def la344 : NVPTXReg<"%la344">; +def la345 : NVPTXReg<"%la345">; +def la346 : NVPTXReg<"%la346">; +def la347 : NVPTXReg<"%la347">; +def la348 : NVPTXReg<"%la348">; +def la349 : NVPTXReg<"%la349">; +def la350 : NVPTXReg<"%la350">; +def la351 : NVPTXReg<"%la351">; +def la352 : NVPTXReg<"%la352">; +def la353 : NVPTXReg<"%la353">; +def la354 : NVPTXReg<"%la354">; +def la355 : NVPTXReg<"%la355">; +def la356 : NVPTXReg<"%la356">; +def la357 : NVPTXReg<"%la357">; +def la358 : NVPTXReg<"%la358">; +def la359 : NVPTXReg<"%la359">; +def la360 : NVPTXReg<"%la360">; +def la361 : NVPTXReg<"%la361">; +def la362 : NVPTXReg<"%la362">; +def la363 : NVPTXReg<"%la363">; +def la364 : NVPTXReg<"%la364">; +def la365 : NVPTXReg<"%la365">; +def la366 : NVPTXReg<"%la366">; +def la367 : NVPTXReg<"%la367">; +def la368 : NVPTXReg<"%la368">; +def la369 : NVPTXReg<"%la369">; +def la370 : NVPTXReg<"%la370">; +def la371 : NVPTXReg<"%la371">; +def la372 : NVPTXReg<"%la372">; +def la373 : NVPTXReg<"%la373">; +def la374 : NVPTXReg<"%la374">; +def la375 : NVPTXReg<"%la375">; +def la376 : NVPTXReg<"%la376">; +def la377 : NVPTXReg<"%la377">; +def la378 : NVPTXReg<"%la378">; +def la379 : NVPTXReg<"%la379">; +def la380 : NVPTXReg<"%la380">; +def la381 : NVPTXReg<"%la381">; +def la382 : NVPTXReg<"%la382">; +def la383 : NVPTXReg<"%la383">; +def la384 : NVPTXReg<"%la384">; +def la385 : NVPTXReg<"%la385">; +def la386 : NVPTXReg<"%la386">; +def la387 : NVPTXReg<"%la387">; +def la388 : NVPTXReg<"%la388">; +def la389 : NVPTXReg<"%la389">; +def la390 : NVPTXReg<"%la390">; +def la391 : NVPTXReg<"%la391">; +def la392 : NVPTXReg<"%la392">; +def la393 : NVPTXReg<"%la393">; +def la394 : NVPTXReg<"%la394">; +def la395 : NVPTXReg<"%la395">; +def fa0 : NVPTXReg<"%fa0">; +def fa1 : NVPTXReg<"%fa1">; +def fa2 : NVPTXReg<"%fa2">; +def fa3 : NVPTXReg<"%fa3">; +def fa4 : NVPTXReg<"%fa4">; +def fa5 : NVPTXReg<"%fa5">; +def fa6 : NVPTXReg<"%fa6">; +def fa7 : NVPTXReg<"%fa7">; +def fa8 : NVPTXReg<"%fa8">; +def fa9 : NVPTXReg<"%fa9">; +def fa10 : NVPTXReg<"%fa10">; +def fa11 : NVPTXReg<"%fa11">; +def fa12 : NVPTXReg<"%fa12">; +def fa13 : NVPTXReg<"%fa13">; +def fa14 : NVPTXReg<"%fa14">; +def fa15 : NVPTXReg<"%fa15">; +def fa16 : NVPTXReg<"%fa16">; +def fa17 : NVPTXReg<"%fa17">; +def fa18 : NVPTXReg<"%fa18">; +def fa19 : NVPTXReg<"%fa19">; +def fa20 : NVPTXReg<"%fa20">; +def fa21 : NVPTXReg<"%fa21">; +def fa22 : NVPTXReg<"%fa22">; +def fa23 : NVPTXReg<"%fa23">; +def fa24 : NVPTXReg<"%fa24">; +def fa25 : NVPTXReg<"%fa25">; +def fa26 : NVPTXReg<"%fa26">; +def fa27 : NVPTXReg<"%fa27">; +def fa28 : NVPTXReg<"%fa28">; +def fa29 : NVPTXReg<"%fa29">; +def fa30 : NVPTXReg<"%fa30">; +def fa31 : NVPTXReg<"%fa31">; +def fa32 : NVPTXReg<"%fa32">; +def fa33 : NVPTXReg<"%fa33">; +def fa34 : NVPTXReg<"%fa34">; +def fa35 : NVPTXReg<"%fa35">; +def fa36 : NVPTXReg<"%fa36">; +def fa37 : NVPTXReg<"%fa37">; +def fa38 : NVPTXReg<"%fa38">; +def fa39 : NVPTXReg<"%fa39">; +def fa40 : NVPTXReg<"%fa40">; +def fa41 : NVPTXReg<"%fa41">; +def fa42 : NVPTXReg<"%fa42">; +def fa43 : NVPTXReg<"%fa43">; +def fa44 : NVPTXReg<"%fa44">; +def fa45 : NVPTXReg<"%fa45">; +def fa46 : NVPTXReg<"%fa46">; +def fa47 : NVPTXReg<"%fa47">; +def fa48 : NVPTXReg<"%fa48">; +def fa49 : NVPTXReg<"%fa49">; +def fa50 : NVPTXReg<"%fa50">; +def fa51 : NVPTXReg<"%fa51">; +def fa52 : NVPTXReg<"%fa52">; +def fa53 : NVPTXReg<"%fa53">; +def fa54 : NVPTXReg<"%fa54">; +def fa55 : NVPTXReg<"%fa55">; +def fa56 : NVPTXReg<"%fa56">; +def fa57 : NVPTXReg<"%fa57">; +def fa58 : NVPTXReg<"%fa58">; +def fa59 : NVPTXReg<"%fa59">; +def fa60 : NVPTXReg<"%fa60">; +def fa61 : NVPTXReg<"%fa61">; +def fa62 : NVPTXReg<"%fa62">; +def fa63 : NVPTXReg<"%fa63">; +def fa64 : NVPTXReg<"%fa64">; +def fa65 : NVPTXReg<"%fa65">; +def fa66 : NVPTXReg<"%fa66">; +def fa67 : NVPTXReg<"%fa67">; +def fa68 : NVPTXReg<"%fa68">; +def fa69 : NVPTXReg<"%fa69">; +def fa70 : NVPTXReg<"%fa70">; +def fa71 : NVPTXReg<"%fa71">; +def fa72 : NVPTXReg<"%fa72">; +def fa73 : NVPTXReg<"%fa73">; +def fa74 : NVPTXReg<"%fa74">; +def fa75 : NVPTXReg<"%fa75">; +def fa76 : NVPTXReg<"%fa76">; +def fa77 : NVPTXReg<"%fa77">; +def fa78 : NVPTXReg<"%fa78">; +def fa79 : NVPTXReg<"%fa79">; +def fa80 : NVPTXReg<"%fa80">; +def fa81 : NVPTXReg<"%fa81">; +def fa82 : NVPTXReg<"%fa82">; +def fa83 : NVPTXReg<"%fa83">; +def fa84 : NVPTXReg<"%fa84">; +def fa85 : NVPTXReg<"%fa85">; +def fa86 : NVPTXReg<"%fa86">; +def fa87 : NVPTXReg<"%fa87">; +def fa88 : NVPTXReg<"%fa88">; +def fa89 : NVPTXReg<"%fa89">; +def fa90 : NVPTXReg<"%fa90">; +def fa91 : NVPTXReg<"%fa91">; +def fa92 : NVPTXReg<"%fa92">; +def fa93 : NVPTXReg<"%fa93">; +def fa94 : NVPTXReg<"%fa94">; +def fa95 : NVPTXReg<"%fa95">; +def fa96 : NVPTXReg<"%fa96">; +def fa97 : NVPTXReg<"%fa97">; +def fa98 : NVPTXReg<"%fa98">; +def fa99 : NVPTXReg<"%fa99">; +def fa100 : NVPTXReg<"%fa100">; +def fa101 : NVPTXReg<"%fa101">; +def fa102 : NVPTXReg<"%fa102">; +def fa103 : NVPTXReg<"%fa103">; +def fa104 : NVPTXReg<"%fa104">; +def fa105 : NVPTXReg<"%fa105">; +def fa106 : NVPTXReg<"%fa106">; +def fa107 : NVPTXReg<"%fa107">; +def fa108 : NVPTXReg<"%fa108">; +def fa109 : NVPTXReg<"%fa109">; +def fa110 : NVPTXReg<"%fa110">; +def fa111 : NVPTXReg<"%fa111">; +def fa112 : NVPTXReg<"%fa112">; +def fa113 : NVPTXReg<"%fa113">; +def fa114 : NVPTXReg<"%fa114">; +def fa115 : NVPTXReg<"%fa115">; +def fa116 : NVPTXReg<"%fa116">; +def fa117 : NVPTXReg<"%fa117">; +def fa118 : NVPTXReg<"%fa118">; +def fa119 : NVPTXReg<"%fa119">; +def fa120 : NVPTXReg<"%fa120">; +def fa121 : NVPTXReg<"%fa121">; +def fa122 : NVPTXReg<"%fa122">; +def fa123 : NVPTXReg<"%fa123">; +def fa124 : NVPTXReg<"%fa124">; +def fa125 : NVPTXReg<"%fa125">; +def fa126 : NVPTXReg<"%fa126">; +def fa127 : NVPTXReg<"%fa127">; +def fa128 : NVPTXReg<"%fa128">; +def fa129 : NVPTXReg<"%fa129">; +def fa130 : NVPTXReg<"%fa130">; +def fa131 : NVPTXReg<"%fa131">; +def fa132 : NVPTXReg<"%fa132">; +def fa133 : NVPTXReg<"%fa133">; +def fa134 : NVPTXReg<"%fa134">; +def fa135 : NVPTXReg<"%fa135">; +def fa136 : NVPTXReg<"%fa136">; +def fa137 : NVPTXReg<"%fa137">; +def fa138 : NVPTXReg<"%fa138">; +def fa139 : NVPTXReg<"%fa139">; +def fa140 : NVPTXReg<"%fa140">; +def fa141 : NVPTXReg<"%fa141">; +def fa142 : NVPTXReg<"%fa142">; +def fa143 : NVPTXReg<"%fa143">; +def fa144 : NVPTXReg<"%fa144">; +def fa145 : NVPTXReg<"%fa145">; +def fa146 : NVPTXReg<"%fa146">; +def fa147 : NVPTXReg<"%fa147">; +def fa148 : NVPTXReg<"%fa148">; +def fa149 : NVPTXReg<"%fa149">; +def fa150 : NVPTXReg<"%fa150">; +def fa151 : NVPTXReg<"%fa151">; +def fa152 : NVPTXReg<"%fa152">; +def fa153 : NVPTXReg<"%fa153">; +def fa154 : NVPTXReg<"%fa154">; +def fa155 : NVPTXReg<"%fa155">; +def fa156 : NVPTXReg<"%fa156">; +def fa157 : NVPTXReg<"%fa157">; +def fa158 : NVPTXReg<"%fa158">; +def fa159 : NVPTXReg<"%fa159">; +def fa160 : NVPTXReg<"%fa160">; +def fa161 : NVPTXReg<"%fa161">; +def fa162 : NVPTXReg<"%fa162">; +def fa163 : NVPTXReg<"%fa163">; +def fa164 : NVPTXReg<"%fa164">; +def fa165 : NVPTXReg<"%fa165">; +def fa166 : NVPTXReg<"%fa166">; +def fa167 : NVPTXReg<"%fa167">; +def fa168 : NVPTXReg<"%fa168">; +def fa169 : NVPTXReg<"%fa169">; +def fa170 : NVPTXReg<"%fa170">; +def fa171 : NVPTXReg<"%fa171">; +def fa172 : NVPTXReg<"%fa172">; +def fa173 : NVPTXReg<"%fa173">; +def fa174 : NVPTXReg<"%fa174">; +def fa175 : NVPTXReg<"%fa175">; +def fa176 : NVPTXReg<"%fa176">; +def fa177 : NVPTXReg<"%fa177">; +def fa178 : NVPTXReg<"%fa178">; +def fa179 : NVPTXReg<"%fa179">; +def fa180 : NVPTXReg<"%fa180">; +def fa181 : NVPTXReg<"%fa181">; +def fa182 : NVPTXReg<"%fa182">; +def fa183 : NVPTXReg<"%fa183">; +def fa184 : NVPTXReg<"%fa184">; +def fa185 : NVPTXReg<"%fa185">; +def fa186 : NVPTXReg<"%fa186">; +def fa187 : NVPTXReg<"%fa187">; +def fa188 : NVPTXReg<"%fa188">; +def fa189 : NVPTXReg<"%fa189">; +def fa190 : NVPTXReg<"%fa190">; +def fa191 : NVPTXReg<"%fa191">; +def fa192 : NVPTXReg<"%fa192">; +def fa193 : NVPTXReg<"%fa193">; +def fa194 : NVPTXReg<"%fa194">; +def fa195 : NVPTXReg<"%fa195">; +def fa196 : NVPTXReg<"%fa196">; +def fa197 : NVPTXReg<"%fa197">; +def fa198 : NVPTXReg<"%fa198">; +def fa199 : NVPTXReg<"%fa199">; +def fa200 : NVPTXReg<"%fa200">; +def fa201 : NVPTXReg<"%fa201">; +def fa202 : NVPTXReg<"%fa202">; +def fa203 : NVPTXReg<"%fa203">; +def fa204 : NVPTXReg<"%fa204">; +def fa205 : NVPTXReg<"%fa205">; +def fa206 : NVPTXReg<"%fa206">; +def fa207 : NVPTXReg<"%fa207">; +def fa208 : NVPTXReg<"%fa208">; +def fa209 : NVPTXReg<"%fa209">; +def fa210 : NVPTXReg<"%fa210">; +def fa211 : NVPTXReg<"%fa211">; +def fa212 : NVPTXReg<"%fa212">; +def fa213 : NVPTXReg<"%fa213">; +def fa214 : NVPTXReg<"%fa214">; +def fa215 : NVPTXReg<"%fa215">; +def fa216 : NVPTXReg<"%fa216">; +def fa217 : NVPTXReg<"%fa217">; +def fa218 : NVPTXReg<"%fa218">; +def fa219 : NVPTXReg<"%fa219">; +def fa220 : NVPTXReg<"%fa220">; +def fa221 : NVPTXReg<"%fa221">; +def fa222 : NVPTXReg<"%fa222">; +def fa223 : NVPTXReg<"%fa223">; +def fa224 : NVPTXReg<"%fa224">; +def fa225 : NVPTXReg<"%fa225">; +def fa226 : NVPTXReg<"%fa226">; +def fa227 : NVPTXReg<"%fa227">; +def fa228 : NVPTXReg<"%fa228">; +def fa229 : NVPTXReg<"%fa229">; +def fa230 : NVPTXReg<"%fa230">; +def fa231 : NVPTXReg<"%fa231">; +def fa232 : NVPTXReg<"%fa232">; +def fa233 : NVPTXReg<"%fa233">; +def fa234 : NVPTXReg<"%fa234">; +def fa235 : NVPTXReg<"%fa235">; +def fa236 : NVPTXReg<"%fa236">; +def fa237 : NVPTXReg<"%fa237">; +def fa238 : NVPTXReg<"%fa238">; +def fa239 : NVPTXReg<"%fa239">; +def fa240 : NVPTXReg<"%fa240">; +def fa241 : NVPTXReg<"%fa241">; +def fa242 : NVPTXReg<"%fa242">; +def fa243 : NVPTXReg<"%fa243">; +def fa244 : NVPTXReg<"%fa244">; +def fa245 : NVPTXReg<"%fa245">; +def fa246 : NVPTXReg<"%fa246">; +def fa247 : NVPTXReg<"%fa247">; +def fa248 : NVPTXReg<"%fa248">; +def fa249 : NVPTXReg<"%fa249">; +def fa250 : NVPTXReg<"%fa250">; +def fa251 : NVPTXReg<"%fa251">; +def fa252 : NVPTXReg<"%fa252">; +def fa253 : NVPTXReg<"%fa253">; +def fa254 : NVPTXReg<"%fa254">; +def fa255 : NVPTXReg<"%fa255">; +def fa256 : NVPTXReg<"%fa256">; +def fa257 : NVPTXReg<"%fa257">; +def fa258 : NVPTXReg<"%fa258">; +def fa259 : NVPTXReg<"%fa259">; +def fa260 : NVPTXReg<"%fa260">; +def fa261 : NVPTXReg<"%fa261">; +def fa262 : NVPTXReg<"%fa262">; +def fa263 : NVPTXReg<"%fa263">; +def fa264 : NVPTXReg<"%fa264">; +def fa265 : NVPTXReg<"%fa265">; +def fa266 : NVPTXReg<"%fa266">; +def fa267 : NVPTXReg<"%fa267">; +def fa268 : NVPTXReg<"%fa268">; +def fa269 : NVPTXReg<"%fa269">; +def fa270 : NVPTXReg<"%fa270">; +def fa271 : NVPTXReg<"%fa271">; +def fa272 : NVPTXReg<"%fa272">; +def fa273 : NVPTXReg<"%fa273">; +def fa274 : NVPTXReg<"%fa274">; +def fa275 : NVPTXReg<"%fa275">; +def fa276 : NVPTXReg<"%fa276">; +def fa277 : NVPTXReg<"%fa277">; +def fa278 : NVPTXReg<"%fa278">; +def fa279 : NVPTXReg<"%fa279">; +def fa280 : NVPTXReg<"%fa280">; +def fa281 : NVPTXReg<"%fa281">; +def fa282 : NVPTXReg<"%fa282">; +def fa283 : NVPTXReg<"%fa283">; +def fa284 : NVPTXReg<"%fa284">; +def fa285 : NVPTXReg<"%fa285">; +def fa286 : NVPTXReg<"%fa286">; +def fa287 : NVPTXReg<"%fa287">; +def fa288 : NVPTXReg<"%fa288">; +def fa289 : NVPTXReg<"%fa289">; +def fa290 : NVPTXReg<"%fa290">; +def fa291 : NVPTXReg<"%fa291">; +def fa292 : NVPTXReg<"%fa292">; +def fa293 : NVPTXReg<"%fa293">; +def fa294 : NVPTXReg<"%fa294">; +def fa295 : NVPTXReg<"%fa295">; +def fa296 : NVPTXReg<"%fa296">; +def fa297 : NVPTXReg<"%fa297">; +def fa298 : NVPTXReg<"%fa298">; +def fa299 : NVPTXReg<"%fa299">; +def fa300 : NVPTXReg<"%fa300">; +def fa301 : NVPTXReg<"%fa301">; +def fa302 : NVPTXReg<"%fa302">; +def fa303 : NVPTXReg<"%fa303">; +def fa304 : NVPTXReg<"%fa304">; +def fa305 : NVPTXReg<"%fa305">; +def fa306 : NVPTXReg<"%fa306">; +def fa307 : NVPTXReg<"%fa307">; +def fa308 : NVPTXReg<"%fa308">; +def fa309 : NVPTXReg<"%fa309">; +def fa310 : NVPTXReg<"%fa310">; +def fa311 : NVPTXReg<"%fa311">; +def fa312 : NVPTXReg<"%fa312">; +def fa313 : NVPTXReg<"%fa313">; +def fa314 : NVPTXReg<"%fa314">; +def fa315 : NVPTXReg<"%fa315">; +def fa316 : NVPTXReg<"%fa316">; +def fa317 : NVPTXReg<"%fa317">; +def fa318 : NVPTXReg<"%fa318">; +def fa319 : NVPTXReg<"%fa319">; +def fa320 : NVPTXReg<"%fa320">; +def fa321 : NVPTXReg<"%fa321">; +def fa322 : NVPTXReg<"%fa322">; +def fa323 : NVPTXReg<"%fa323">; +def fa324 : NVPTXReg<"%fa324">; +def fa325 : NVPTXReg<"%fa325">; +def fa326 : NVPTXReg<"%fa326">; +def fa327 : NVPTXReg<"%fa327">; +def fa328 : NVPTXReg<"%fa328">; +def fa329 : NVPTXReg<"%fa329">; +def fa330 : NVPTXReg<"%fa330">; +def fa331 : NVPTXReg<"%fa331">; +def fa332 : NVPTXReg<"%fa332">; +def fa333 : NVPTXReg<"%fa333">; +def fa334 : NVPTXReg<"%fa334">; +def fa335 : NVPTXReg<"%fa335">; +def fa336 : NVPTXReg<"%fa336">; +def fa337 : NVPTXReg<"%fa337">; +def fa338 : NVPTXReg<"%fa338">; +def fa339 : NVPTXReg<"%fa339">; +def fa340 : NVPTXReg<"%fa340">; +def fa341 : NVPTXReg<"%fa341">; +def fa342 : NVPTXReg<"%fa342">; +def fa343 : NVPTXReg<"%fa343">; +def fa344 : NVPTXReg<"%fa344">; +def fa345 : NVPTXReg<"%fa345">; +def fa346 : NVPTXReg<"%fa346">; +def fa347 : NVPTXReg<"%fa347">; +def fa348 : NVPTXReg<"%fa348">; +def fa349 : NVPTXReg<"%fa349">; +def fa350 : NVPTXReg<"%fa350">; +def fa351 : NVPTXReg<"%fa351">; +def fa352 : NVPTXReg<"%fa352">; +def fa353 : NVPTXReg<"%fa353">; +def fa354 : NVPTXReg<"%fa354">; +def fa355 : NVPTXReg<"%fa355">; +def fa356 : NVPTXReg<"%fa356">; +def fa357 : NVPTXReg<"%fa357">; +def fa358 : NVPTXReg<"%fa358">; +def fa359 : NVPTXReg<"%fa359">; +def fa360 : NVPTXReg<"%fa360">; +def fa361 : NVPTXReg<"%fa361">; +def fa362 : NVPTXReg<"%fa362">; +def fa363 : NVPTXReg<"%fa363">; +def fa364 : NVPTXReg<"%fa364">; +def fa365 : NVPTXReg<"%fa365">; +def fa366 : NVPTXReg<"%fa366">; +def fa367 : NVPTXReg<"%fa367">; +def fa368 : NVPTXReg<"%fa368">; +def fa369 : NVPTXReg<"%fa369">; +def fa370 : NVPTXReg<"%fa370">; +def fa371 : NVPTXReg<"%fa371">; +def fa372 : NVPTXReg<"%fa372">; +def fa373 : NVPTXReg<"%fa373">; +def fa374 : NVPTXReg<"%fa374">; +def fa375 : NVPTXReg<"%fa375">; +def fa376 : NVPTXReg<"%fa376">; +def fa377 : NVPTXReg<"%fa377">; +def fa378 : NVPTXReg<"%fa378">; +def fa379 : NVPTXReg<"%fa379">; +def fa380 : NVPTXReg<"%fa380">; +def fa381 : NVPTXReg<"%fa381">; +def fa382 : NVPTXReg<"%fa382">; +def fa383 : NVPTXReg<"%fa383">; +def fa384 : NVPTXReg<"%fa384">; +def fa385 : NVPTXReg<"%fa385">; +def fa386 : NVPTXReg<"%fa386">; +def fa387 : NVPTXReg<"%fa387">; +def fa388 : NVPTXReg<"%fa388">; +def fa389 : NVPTXReg<"%fa389">; +def fa390 : NVPTXReg<"%fa390">; +def fa391 : NVPTXReg<"%fa391">; +def fa392 : NVPTXReg<"%fa392">; +def fa393 : NVPTXReg<"%fa393">; +def fa394 : NVPTXReg<"%fa394">; +def fa395 : NVPTXReg<"%fa395">; +def da0 : NVPTXReg<"%da0">; +def da1 : NVPTXReg<"%da1">; +def da2 : NVPTXReg<"%da2">; +def da3 : NVPTXReg<"%da3">; +def da4 : NVPTXReg<"%da4">; +def da5 : NVPTXReg<"%da5">; +def da6 : NVPTXReg<"%da6">; +def da7 : NVPTXReg<"%da7">; +def da8 : NVPTXReg<"%da8">; +def da9 : NVPTXReg<"%da9">; +def da10 : NVPTXReg<"%da10">; +def da11 : NVPTXReg<"%da11">; +def da12 : NVPTXReg<"%da12">; +def da13 : NVPTXReg<"%da13">; +def da14 : NVPTXReg<"%da14">; +def da15 : NVPTXReg<"%da15">; +def da16 : NVPTXReg<"%da16">; +def da17 : NVPTXReg<"%da17">; +def da18 : NVPTXReg<"%da18">; +def da19 : NVPTXReg<"%da19">; +def da20 : NVPTXReg<"%da20">; +def da21 : NVPTXReg<"%da21">; +def da22 : NVPTXReg<"%da22">; +def da23 : NVPTXReg<"%da23">; +def da24 : NVPTXReg<"%da24">; +def da25 : NVPTXReg<"%da25">; +def da26 : NVPTXReg<"%da26">; +def da27 : NVPTXReg<"%da27">; +def da28 : NVPTXReg<"%da28">; +def da29 : NVPTXReg<"%da29">; +def da30 : NVPTXReg<"%da30">; +def da31 : NVPTXReg<"%da31">; +def da32 : NVPTXReg<"%da32">; +def da33 : NVPTXReg<"%da33">; +def da34 : NVPTXReg<"%da34">; +def da35 : NVPTXReg<"%da35">; +def da36 : NVPTXReg<"%da36">; +def da37 : NVPTXReg<"%da37">; +def da38 : NVPTXReg<"%da38">; +def da39 : NVPTXReg<"%da39">; +def da40 : NVPTXReg<"%da40">; +def da41 : NVPTXReg<"%da41">; +def da42 : NVPTXReg<"%da42">; +def da43 : NVPTXReg<"%da43">; +def da44 : NVPTXReg<"%da44">; +def da45 : NVPTXReg<"%da45">; +def da46 : NVPTXReg<"%da46">; +def da47 : NVPTXReg<"%da47">; +def da48 : NVPTXReg<"%da48">; +def da49 : NVPTXReg<"%da49">; +def da50 : NVPTXReg<"%da50">; +def da51 : NVPTXReg<"%da51">; +def da52 : NVPTXReg<"%da52">; +def da53 : NVPTXReg<"%da53">; +def da54 : NVPTXReg<"%da54">; +def da55 : NVPTXReg<"%da55">; +def da56 : NVPTXReg<"%da56">; +def da57 : NVPTXReg<"%da57">; +def da58 : NVPTXReg<"%da58">; +def da59 : NVPTXReg<"%da59">; +def da60 : NVPTXReg<"%da60">; +def da61 : NVPTXReg<"%da61">; +def da62 : NVPTXReg<"%da62">; +def da63 : NVPTXReg<"%da63">; +def da64 : NVPTXReg<"%da64">; +def da65 : NVPTXReg<"%da65">; +def da66 : NVPTXReg<"%da66">; +def da67 : NVPTXReg<"%da67">; +def da68 : NVPTXReg<"%da68">; +def da69 : NVPTXReg<"%da69">; +def da70 : NVPTXReg<"%da70">; +def da71 : NVPTXReg<"%da71">; +def da72 : NVPTXReg<"%da72">; +def da73 : NVPTXReg<"%da73">; +def da74 : NVPTXReg<"%da74">; +def da75 : NVPTXReg<"%da75">; +def da76 : NVPTXReg<"%da76">; +def da77 : NVPTXReg<"%da77">; +def da78 : NVPTXReg<"%da78">; +def da79 : NVPTXReg<"%da79">; +def da80 : NVPTXReg<"%da80">; +def da81 : NVPTXReg<"%da81">; +def da82 : NVPTXReg<"%da82">; +def da83 : NVPTXReg<"%da83">; +def da84 : NVPTXReg<"%da84">; +def da85 : NVPTXReg<"%da85">; +def da86 : NVPTXReg<"%da86">; +def da87 : NVPTXReg<"%da87">; +def da88 : NVPTXReg<"%da88">; +def da89 : NVPTXReg<"%da89">; +def da90 : NVPTXReg<"%da90">; +def da91 : NVPTXReg<"%da91">; +def da92 : NVPTXReg<"%da92">; +def da93 : NVPTXReg<"%da93">; +def da94 : NVPTXReg<"%da94">; +def da95 : NVPTXReg<"%da95">; +def da96 : NVPTXReg<"%da96">; +def da97 : NVPTXReg<"%da97">; +def da98 : NVPTXReg<"%da98">; +def da99 : NVPTXReg<"%da99">; +def da100 : NVPTXReg<"%da100">; +def da101 : NVPTXReg<"%da101">; +def da102 : NVPTXReg<"%da102">; +def da103 : NVPTXReg<"%da103">; +def da104 : NVPTXReg<"%da104">; +def da105 : NVPTXReg<"%da105">; +def da106 : NVPTXReg<"%da106">; +def da107 : NVPTXReg<"%da107">; +def da108 : NVPTXReg<"%da108">; +def da109 : NVPTXReg<"%da109">; +def da110 : NVPTXReg<"%da110">; +def da111 : NVPTXReg<"%da111">; +def da112 : NVPTXReg<"%da112">; +def da113 : NVPTXReg<"%da113">; +def da114 : NVPTXReg<"%da114">; +def da115 : NVPTXReg<"%da115">; +def da116 : NVPTXReg<"%da116">; +def da117 : NVPTXReg<"%da117">; +def da118 : NVPTXReg<"%da118">; +def da119 : NVPTXReg<"%da119">; +def da120 : NVPTXReg<"%da120">; +def da121 : NVPTXReg<"%da121">; +def da122 : NVPTXReg<"%da122">; +def da123 : NVPTXReg<"%da123">; +def da124 : NVPTXReg<"%da124">; +def da125 : NVPTXReg<"%da125">; +def da126 : NVPTXReg<"%da126">; +def da127 : NVPTXReg<"%da127">; +def da128 : NVPTXReg<"%da128">; +def da129 : NVPTXReg<"%da129">; +def da130 : NVPTXReg<"%da130">; +def da131 : NVPTXReg<"%da131">; +def da132 : NVPTXReg<"%da132">; +def da133 : NVPTXReg<"%da133">; +def da134 : NVPTXReg<"%da134">; +def da135 : NVPTXReg<"%da135">; +def da136 : NVPTXReg<"%da136">; +def da137 : NVPTXReg<"%da137">; +def da138 : NVPTXReg<"%da138">; +def da139 : NVPTXReg<"%da139">; +def da140 : NVPTXReg<"%da140">; +def da141 : NVPTXReg<"%da141">; +def da142 : NVPTXReg<"%da142">; +def da143 : NVPTXReg<"%da143">; +def da144 : NVPTXReg<"%da144">; +def da145 : NVPTXReg<"%da145">; +def da146 : NVPTXReg<"%da146">; +def da147 : NVPTXReg<"%da147">; +def da148 : NVPTXReg<"%da148">; +def da149 : NVPTXReg<"%da149">; +def da150 : NVPTXReg<"%da150">; +def da151 : NVPTXReg<"%da151">; +def da152 : NVPTXReg<"%da152">; +def da153 : NVPTXReg<"%da153">; +def da154 : NVPTXReg<"%da154">; +def da155 : NVPTXReg<"%da155">; +def da156 : NVPTXReg<"%da156">; +def da157 : NVPTXReg<"%da157">; +def da158 : NVPTXReg<"%da158">; +def da159 : NVPTXReg<"%da159">; +def da160 : NVPTXReg<"%da160">; +def da161 : NVPTXReg<"%da161">; +def da162 : NVPTXReg<"%da162">; +def da163 : NVPTXReg<"%da163">; +def da164 : NVPTXReg<"%da164">; +def da165 : NVPTXReg<"%da165">; +def da166 : NVPTXReg<"%da166">; +def da167 : NVPTXReg<"%da167">; +def da168 : NVPTXReg<"%da168">; +def da169 : NVPTXReg<"%da169">; +def da170 : NVPTXReg<"%da170">; +def da171 : NVPTXReg<"%da171">; +def da172 : NVPTXReg<"%da172">; +def da173 : NVPTXReg<"%da173">; +def da174 : NVPTXReg<"%da174">; +def da175 : NVPTXReg<"%da175">; +def da176 : NVPTXReg<"%da176">; +def da177 : NVPTXReg<"%da177">; +def da178 : NVPTXReg<"%da178">; +def da179 : NVPTXReg<"%da179">; +def da180 : NVPTXReg<"%da180">; +def da181 : NVPTXReg<"%da181">; +def da182 : NVPTXReg<"%da182">; +def da183 : NVPTXReg<"%da183">; +def da184 : NVPTXReg<"%da184">; +def da185 : NVPTXReg<"%da185">; +def da186 : NVPTXReg<"%da186">; +def da187 : NVPTXReg<"%da187">; +def da188 : NVPTXReg<"%da188">; +def da189 : NVPTXReg<"%da189">; +def da190 : NVPTXReg<"%da190">; +def da191 : NVPTXReg<"%da191">; +def da192 : NVPTXReg<"%da192">; +def da193 : NVPTXReg<"%da193">; +def da194 : NVPTXReg<"%da194">; +def da195 : NVPTXReg<"%da195">; +def da196 : NVPTXReg<"%da196">; +def da197 : NVPTXReg<"%da197">; +def da198 : NVPTXReg<"%da198">; +def da199 : NVPTXReg<"%da199">; +def da200 : NVPTXReg<"%da200">; +def da201 : NVPTXReg<"%da201">; +def da202 : NVPTXReg<"%da202">; +def da203 : NVPTXReg<"%da203">; +def da204 : NVPTXReg<"%da204">; +def da205 : NVPTXReg<"%da205">; +def da206 : NVPTXReg<"%da206">; +def da207 : NVPTXReg<"%da207">; +def da208 : NVPTXReg<"%da208">; +def da209 : NVPTXReg<"%da209">; +def da210 : NVPTXReg<"%da210">; +def da211 : NVPTXReg<"%da211">; +def da212 : NVPTXReg<"%da212">; +def da213 : NVPTXReg<"%da213">; +def da214 : NVPTXReg<"%da214">; +def da215 : NVPTXReg<"%da215">; +def da216 : NVPTXReg<"%da216">; +def da217 : NVPTXReg<"%da217">; +def da218 : NVPTXReg<"%da218">; +def da219 : NVPTXReg<"%da219">; +def da220 : NVPTXReg<"%da220">; +def da221 : NVPTXReg<"%da221">; +def da222 : NVPTXReg<"%da222">; +def da223 : NVPTXReg<"%da223">; +def da224 : NVPTXReg<"%da224">; +def da225 : NVPTXReg<"%da225">; +def da226 : NVPTXReg<"%da226">; +def da227 : NVPTXReg<"%da227">; +def da228 : NVPTXReg<"%da228">; +def da229 : NVPTXReg<"%da229">; +def da230 : NVPTXReg<"%da230">; +def da231 : NVPTXReg<"%da231">; +def da232 : NVPTXReg<"%da232">; +def da233 : NVPTXReg<"%da233">; +def da234 : NVPTXReg<"%da234">; +def da235 : NVPTXReg<"%da235">; +def da236 : NVPTXReg<"%da236">; +def da237 : NVPTXReg<"%da237">; +def da238 : NVPTXReg<"%da238">; +def da239 : NVPTXReg<"%da239">; +def da240 : NVPTXReg<"%da240">; +def da241 : NVPTXReg<"%da241">; +def da242 : NVPTXReg<"%da242">; +def da243 : NVPTXReg<"%da243">; +def da244 : NVPTXReg<"%da244">; +def da245 : NVPTXReg<"%da245">; +def da246 : NVPTXReg<"%da246">; +def da247 : NVPTXReg<"%da247">; +def da248 : NVPTXReg<"%da248">; +def da249 : NVPTXReg<"%da249">; +def da250 : NVPTXReg<"%da250">; +def da251 : NVPTXReg<"%da251">; +def da252 : NVPTXReg<"%da252">; +def da253 : NVPTXReg<"%da253">; +def da254 : NVPTXReg<"%da254">; +def da255 : NVPTXReg<"%da255">; +def da256 : NVPTXReg<"%da256">; +def da257 : NVPTXReg<"%da257">; +def da258 : NVPTXReg<"%da258">; +def da259 : NVPTXReg<"%da259">; +def da260 : NVPTXReg<"%da260">; +def da261 : NVPTXReg<"%da261">; +def da262 : NVPTXReg<"%da262">; +def da263 : NVPTXReg<"%da263">; +def da264 : NVPTXReg<"%da264">; +def da265 : NVPTXReg<"%da265">; +def da266 : NVPTXReg<"%da266">; +def da267 : NVPTXReg<"%da267">; +def da268 : NVPTXReg<"%da268">; +def da269 : NVPTXReg<"%da269">; +def da270 : NVPTXReg<"%da270">; +def da271 : NVPTXReg<"%da271">; +def da272 : NVPTXReg<"%da272">; +def da273 : NVPTXReg<"%da273">; +def da274 : NVPTXReg<"%da274">; +def da275 : NVPTXReg<"%da275">; +def da276 : NVPTXReg<"%da276">; +def da277 : NVPTXReg<"%da277">; +def da278 : NVPTXReg<"%da278">; +def da279 : NVPTXReg<"%da279">; +def da280 : NVPTXReg<"%da280">; +def da281 : NVPTXReg<"%da281">; +def da282 : NVPTXReg<"%da282">; +def da283 : NVPTXReg<"%da283">; +def da284 : NVPTXReg<"%da284">; +def da285 : NVPTXReg<"%da285">; +def da286 : NVPTXReg<"%da286">; +def da287 : NVPTXReg<"%da287">; +def da288 : NVPTXReg<"%da288">; +def da289 : NVPTXReg<"%da289">; +def da290 : NVPTXReg<"%da290">; +def da291 : NVPTXReg<"%da291">; +def da292 : NVPTXReg<"%da292">; +def da293 : NVPTXReg<"%da293">; +def da294 : NVPTXReg<"%da294">; +def da295 : NVPTXReg<"%da295">; +def da296 : NVPTXReg<"%da296">; +def da297 : NVPTXReg<"%da297">; +def da298 : NVPTXReg<"%da298">; +def da299 : NVPTXReg<"%da299">; +def da300 : NVPTXReg<"%da300">; +def da301 : NVPTXReg<"%da301">; +def da302 : NVPTXReg<"%da302">; +def da303 : NVPTXReg<"%da303">; +def da304 : NVPTXReg<"%da304">; +def da305 : NVPTXReg<"%da305">; +def da306 : NVPTXReg<"%da306">; +def da307 : NVPTXReg<"%da307">; +def da308 : NVPTXReg<"%da308">; +def da309 : NVPTXReg<"%da309">; +def da310 : NVPTXReg<"%da310">; +def da311 : NVPTXReg<"%da311">; +def da312 : NVPTXReg<"%da312">; +def da313 : NVPTXReg<"%da313">; +def da314 : NVPTXReg<"%da314">; +def da315 : NVPTXReg<"%da315">; +def da316 : NVPTXReg<"%da316">; +def da317 : NVPTXReg<"%da317">; +def da318 : NVPTXReg<"%da318">; +def da319 : NVPTXReg<"%da319">; +def da320 : NVPTXReg<"%da320">; +def da321 : NVPTXReg<"%da321">; +def da322 : NVPTXReg<"%da322">; +def da323 : NVPTXReg<"%da323">; +def da324 : NVPTXReg<"%da324">; +def da325 : NVPTXReg<"%da325">; +def da326 : NVPTXReg<"%da326">; +def da327 : NVPTXReg<"%da327">; +def da328 : NVPTXReg<"%da328">; +def da329 : NVPTXReg<"%da329">; +def da330 : NVPTXReg<"%da330">; +def da331 : NVPTXReg<"%da331">; +def da332 : NVPTXReg<"%da332">; +def da333 : NVPTXReg<"%da333">; +def da334 : NVPTXReg<"%da334">; +def da335 : NVPTXReg<"%da335">; +def da336 : NVPTXReg<"%da336">; +def da337 : NVPTXReg<"%da337">; +def da338 : NVPTXReg<"%da338">; +def da339 : NVPTXReg<"%da339">; +def da340 : NVPTXReg<"%da340">; +def da341 : NVPTXReg<"%da341">; +def da342 : NVPTXReg<"%da342">; +def da343 : NVPTXReg<"%da343">; +def da344 : NVPTXReg<"%da344">; +def da345 : NVPTXReg<"%da345">; +def da346 : NVPTXReg<"%da346">; +def da347 : NVPTXReg<"%da347">; +def da348 : NVPTXReg<"%da348">; +def da349 : NVPTXReg<"%da349">; +def da350 : NVPTXReg<"%da350">; +def da351 : NVPTXReg<"%da351">; +def da352 : NVPTXReg<"%da352">; +def da353 : NVPTXReg<"%da353">; +def da354 : NVPTXReg<"%da354">; +def da355 : NVPTXReg<"%da355">; +def da356 : NVPTXReg<"%da356">; +def da357 : NVPTXReg<"%da357">; +def da358 : NVPTXReg<"%da358">; +def da359 : NVPTXReg<"%da359">; +def da360 : NVPTXReg<"%da360">; +def da361 : NVPTXReg<"%da361">; +def da362 : NVPTXReg<"%da362">; +def da363 : NVPTXReg<"%da363">; +def da364 : NVPTXReg<"%da364">; +def da365 : NVPTXReg<"%da365">; +def da366 : NVPTXReg<"%da366">; +def da367 : NVPTXReg<"%da367">; +def da368 : NVPTXReg<"%da368">; +def da369 : NVPTXReg<"%da369">; +def da370 : NVPTXReg<"%da370">; +def da371 : NVPTXReg<"%da371">; +def da372 : NVPTXReg<"%da372">; +def da373 : NVPTXReg<"%da373">; +def da374 : NVPTXReg<"%da374">; +def da375 : NVPTXReg<"%da375">; +def da376 : NVPTXReg<"%da376">; +def da377 : NVPTXReg<"%da377">; +def da378 : NVPTXReg<"%da378">; +def da379 : NVPTXReg<"%da379">; +def da380 : NVPTXReg<"%da380">; +def da381 : NVPTXReg<"%da381">; +def da382 : NVPTXReg<"%da382">; +def da383 : NVPTXReg<"%da383">; +def da384 : NVPTXReg<"%da384">; +def da385 : NVPTXReg<"%da385">; +def da386 : NVPTXReg<"%da386">; +def da387 : NVPTXReg<"%da387">; +def da388 : NVPTXReg<"%da388">; +def da389 : NVPTXReg<"%da389">; +def da390 : NVPTXReg<"%da390">; +def da391 : NVPTXReg<"%da391">; +def da392 : NVPTXReg<"%da392">; +def da393 : NVPTXReg<"%da393">; +def da394 : NVPTXReg<"%da394">; +def da395 : NVPTXReg<"%da395">; + +//===----------------------------------------------------------------------===// +// Register classes +//===----------------------------------------------------------------------===// +def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 395))>; +def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%u", 0, 395))>; +def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%u", 0, 395))>; +def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%u", 0, 395))>; +def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 395))>; +def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%u", 0, 395))>; +def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%u", 0, 395))>; +def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%u", 0, 395))>; +def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%u", 0, 395))>; +def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%u", 0, 395))>; +def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 395))>; + +// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used. +def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>; + +class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList, + NVPTXRegClass sClass, + int e, + string n> + : NVPTXRegClass<regTypes, alignment, regList> +{ + NVPTXRegClass scalarClass=sClass; + int elems=e; + string name=n; +} +def V2F32Regs + : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%u", 0, 395)), + Float32Regs, 2, ".v2.f32">; +def V4F32Regs + : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%u", 0, 395)), + Float32Regs, 4, ".v4.f32">; +def V2I32Regs + : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%u", 0, 395)), + Int32Regs, 2, ".v2.u32">; +def V4I32Regs + : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%u", 0, 395)), + Int32Regs, 4, ".v4.u32">; +def V2F64Regs + : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%u", 0, 395)), + Float64Regs, 2, ".v2.f64">; +def V2I64Regs + : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%u", 0, 395)), + Int64Regs, 2, ".v2.u64">; +def V2I16Regs + : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%u", 0, 395)), + Int16Regs, 2, ".v2.u16">; +def V4I16Regs + : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%u", 0, 395)), + Int16Regs, 4, ".v4.u16">; +def V2I8Regs + : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%u", 0, 395)), + Int8Regs, 2, ".v2.u8">; +def V4I8Regs + : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%u", 0, 395)), + Int8Regs, 4, ".v4.u8">; diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h new file mode 100644 index 0000000..72aad7a --- /dev/null +++ b/lib/Target/NVPTX/NVPTXSection.h @@ -0,0 +1,45 @@ +//===- NVPTXSection.h - NVPTX-specific section representation -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the NVPTXSection class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_NVPTXSECTION_H +#define LLVM_NVPTXSECTION_H + +#include "llvm/MC/MCSection.h" +#include "llvm/GlobalVariable.h" +#include <vector> + +namespace llvm { +/// NVPTXSection - Represents a section in PTX +/// PTX does not have sections. We create this class in order to use +/// the ASMPrint interface. +/// +class NVPTXSection : public MCSection { + +public: + NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K) {} + ~NVPTXSection() {}; + + /// Override this as NVPTX has its own way of printing switching + /// to a section. + virtual void PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS) const {} + + /// Base address of PTX sections is zero. + virtual bool isBaseAddressKnownZero() const { return true; } + virtual bool UseCodeAlign() const { return false; } + virtual bool isVirtualSection() const { return false; } +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp new file mode 100644 index 0000000..2836cad --- /dev/null +++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp @@ -0,0 +1,77 @@ +//===- NVPTXSplitBBatBar.cpp - Split BB at Barrier --*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Split basic blocks so that a basic block that contains a barrier instruction +// only contains the barrier instruction. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Support/InstIterator.h" +#include "NVPTXUtilities.h" +#include "NVPTXSplitBBatBar.h" + +using namespace llvm; + +namespace llvm { +FunctionPass *createSplitBBatBarPass(); +} + +char NVPTXSplitBBatBar::ID = 0; + +bool NVPTXSplitBBatBar::runOnFunction(Function &F) { + + SmallVector<Instruction *, 4> SplitPoints; + bool changed = false; + + // Collect all the split points in SplitPoints + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + BasicBlock::iterator IB = BI->begin(); + BasicBlock::iterator II = IB; + BasicBlock::iterator IE = BI->end(); + + // Skit the first intruction. No splitting is needed at this + // point even if this is a bar. + while (II != IE) { + if (IntrinsicInst *inst = dyn_cast<IntrinsicInst>(II)) { + Intrinsic::ID id = inst->getIntrinsicID(); + // If this is a barrier, split at this instruction + // and the next instruction. + if (llvm::isBarrierIntrinsic(id)) { + if (II != IB) + SplitPoints.push_back(II); + II++; + if ((II != IE) && (!II->isTerminator())) { + SplitPoints.push_back(II); + II++; + } + continue; + } + } + II++; + } + } + + for (unsigned i = 0; i != SplitPoints.size(); i++) { + changed = true; + Instruction *inst = SplitPoints[i]; + inst->getParent()->splitBasicBlock(inst, "bar_split"); + } + + return changed; +} + +// This interface will most likely not be necessary, because this pass will +// not be invoked by the driver, but will be used as a prerequisite to +// another pass. +FunctionPass *llvm::createSplitBBatBarPass() { + return new NVPTXSplitBBatBar(); +} diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.h b/lib/Target/NVPTX/NVPTXSplitBBatBar.h new file mode 100644 index 0000000..9e4d5a0 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.h @@ -0,0 +1,41 @@ +//===-- llvm/lib/Target/NVPTX/NVPTXSplitBBatBar.h ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the NVIDIA specific declarations +// for splitting basic blocks at barrier instructions. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTX_SPLIT_BB_AT_BAR_H +#define NVPTX_SPLIT_BB_AT_BAR_H + +#include "llvm/Pass.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" + +namespace llvm { + +// actual analysis class, which is a functionpass +struct NVPTXSplitBBatBar : public FunctionPass { + static char ID; + + NVPTXSplitBBatBar() : FunctionPass(ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<MachineFunctionAnalysis>(); + } + virtual bool runOnFunction(Function &F); + + virtual const char *getPassName() const { + return "Split basic blocks at barrier"; + } +}; + +extern FunctionPass *createSplitBBatBarPass(); +} + +#endif //NVPTX_SPLIT_BB_AT_BAR_H diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp new file mode 100644 index 0000000..6aadd43 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -0,0 +1,57 @@ +//===- NVPTXSubtarget.cpp - NVPTX Subtarget Information -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the NVPTX specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "NVPTXSubtarget.h" +#define GET_SUBTARGETINFO_ENUM +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "NVPTXGenSubtargetInfo.inc" + +using namespace llvm; + +// Select Driver Interface +#include "llvm/Support/CommandLine.h" +namespace { +cl::opt<NVPTX::DrvInterface> +DriverInterface(cl::desc("Choose driver interface:"), + cl::values( + clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"), + clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"), + clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"), + clEnumValEnd), + cl::init(NVPTX::NVCL)); +} + +NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit) +:NVPTXGenSubtargetInfo(TT, "", FS), // Don't pass CPU to subtarget, + // because we don't register all + // nvptx targets. + Is64Bit(is64Bit) { + + drvInterface = DriverInterface; + + // Provide the default CPU if none + std::string defCPU = "sm_10"; + + // Get the TargetName from the FS if available + if (FS.empty() && CPU.empty()) + TargetName = defCPU; + else if (!CPU.empty()) + TargetName = CPU; + else + llvm_unreachable("we are not using FeatureStr"); + + // Set up the SmVersion + SmVersion = atoi(TargetName.c_str()+3); +} diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h new file mode 100644 index 0000000..8f2a629 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXSubtarget.h @@ -0,0 +1,92 @@ +//=====-- NVPTXSubtarget.h - Define Subtarget for the NVPTX ---*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the NVPTX specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTXSUBTARGET_H +#define NVPTXSUBTARGET_H + +#include "llvm/Target/TargetSubtargetInfo.h" +#include "NVPTX.h" + +#define GET_SUBTARGETINFO_HEADER +#include "NVPTXGenSubtargetInfo.inc" + +#include <string> + +namespace llvm { + +class NVPTXSubtarget : public NVPTXGenSubtargetInfo { + + unsigned int SmVersion; + std::string TargetName; + NVPTX::DrvInterface drvInterface; + bool dummy; // For the 'dummy' feature, see NVPTX.td + bool Is64Bit; + +public: + /// This constructor initializes the data members to match that + /// of the specified module. + /// + NVPTXSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit); + + bool hasBrkPt() const { return SmVersion >= 11; } + bool hasAtomRedG32() const { return SmVersion >= 11; } + bool hasAtomRedS32() const { return SmVersion >= 12; } + bool hasAtomRedG64() const { return SmVersion >= 12; } + bool hasAtomRedS64() const { return SmVersion >= 20; } + bool hasAtomRedGen32() const { return SmVersion >= 20; } + bool hasAtomRedGen64() const { return SmVersion >= 20; } + bool hasAtomAddF32() const { return SmVersion >= 20; } + bool hasVote() const { return SmVersion >= 12; } + bool hasDouble() const { return SmVersion >= 13; } + bool reqPTX20() const { return SmVersion >= 20; } + bool hasF32FTZ() const { return SmVersion >= 20; } + bool hasFMAF32() const { return SmVersion >= 20; } + bool hasFMAF64() const { return SmVersion >= 13; } + bool hasLDU() const { return SmVersion >= 20; } + bool hasGenericLdSt() const { return SmVersion >= 20; } + inline bool hasHWROT32() const { return false; } + inline bool hasSWROT32() const { + return true; + } + inline bool hasROT32() const { return hasHWROT32() || hasSWROT32() ; } + inline bool hasROT64() const { return SmVersion >= 20; } + + + bool is64Bit() const { return Is64Bit; } + + unsigned int getSmVersion() const { return SmVersion; } + NVPTX::DrvInterface getDrvInterface() const { return drvInterface; } + std::string getTargetName() const { return TargetName; } + + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + std::string getDataLayout() const { + const char *p; + if (is64Bit()) + p = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-" + "f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-" + "n16:32:64"; + else + p = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-" + "f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-" + "n16:32:64"; + + return std::string(p); + } + +}; + +} // End llvm namespace + +#endif // NVPTXSUBTARGET_H diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp new file mode 100644 index 0000000..826b1dd --- /dev/null +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -0,0 +1,133 @@ +//===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Top-level implementation for the NVPTX target. +// +//===----------------------------------------------------------------------===// + +#include "NVPTXTargetMachine.h" +#include "NVPTX.h" +#include "NVPTXSplitBBatBar.h" +#include "NVPTXLowerAggrCopies.h" +#include "MCTargetDesc/NVPTXMCAsmInfo.h" +#include "NVPTXAllocaHoisting.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetRegistry.h" + + +using namespace llvm; + + +extern "C" void LLVMInitializeNVPTXTarget() { + // Register the target. + RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32); + RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64); + + RegisterMCAsmInfo<NVPTXMCAsmInfo> A(TheNVPTXTarget32); + RegisterMCAsmInfo<NVPTXMCAsmInfo> B(TheNVPTXTarget64); + +} + +NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, + StringRef TT, + StringRef CPU, + StringRef FS, + const TargetOptions& Options, + Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL, + bool is64bit) +: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, is64bit), + DataLayout(Subtarget.getDataLayout()), + InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit) +/*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ { +} + + + +void NVPTXTargetMachine32::anchor() {} + +NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) +: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { +} + +void NVPTXTargetMachine64::anchor() {} + +NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) +: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) { +} + + +namespace llvm { +class NVPTXPassConfig : public TargetPassConfig { +public: + NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + NVPTXTargetMachine &getNVPTXTargetMachine() const { + return getTM<NVPTXTargetMachine>(); + } + + virtual bool addInstSelector(); + virtual bool addPreRegAlloc(); +}; +} + +TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { + NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); + return PassConfig; +} + +bool NVPTXPassConfig::addInstSelector() { + PM->add(createLowerAggrCopies()); + PM->add(createSplitBBatBarPass()); + PM->add(createAllocaHoisting()); + PM->add(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); + PM->add(createVectorElementizePass(getNVPTXTargetMachine())); + return false; +} + +bool NVPTXPassConfig::addPreRegAlloc() { + return false; +} diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h new file mode 100644 index 0000000..1d82e5c --- /dev/null +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -0,0 +1,131 @@ +//===-- NVPTXTargetMachine.h - Define TargetMachine for NVPTX ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the NVPTX specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + + +#ifndef NVPTX_TARGETMACHINE_H +#define NVPTX_TARGETMACHINE_H + +#include "NVPTXInstrInfo.h" +#include "NVPTXISelLowering.h" +#include "NVPTXRegisterInfo.h" +#include "NVPTXSubtarget.h" +#include "NVPTXFrameLowering.h" +#include "ManagedStringPool.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +/// NVPTXTargetMachine +/// +class NVPTXTargetMachine : public LLVMTargetMachine { + NVPTXSubtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + NVPTXInstrInfo InstrInfo; + NVPTXTargetLowering TLInfo; + TargetSelectionDAGInfo TSInfo; + + // NVPTX does not have any call stack frame, but need a NVPTX specific + // FrameLowering class because TargetFrameLowering is abstract. + NVPTXFrameLowering FrameLowering; + + // Hold Strings that can be free'd all together with NVPTXTargetMachine + ManagedStringPool ManagedStrPool; + + //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level, + // bool DisableVerify, MCContext *&OutCtx); + +public: + //virtual bool addPassesToEmitFile(PassManagerBase &PM, + // formatted_raw_ostream &Out, + // CodeGenFileType FileType, + // CodeGenOpt::Level OptLevel, + // bool DisableVerify = true) ; + + NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OP, + bool is64bit); + + virtual const TargetFrameLowering *getFrameLowering() const { + return &FrameLowering; + } + virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const TargetData *getTargetData() const { return &DataLayout;} + virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget;} + + virtual const NVPTXRegisterInfo *getRegisterInfo() const { + return &(InstrInfo.getRegisterInfo()); + } + + virtual NVPTXTargetLowering *getTargetLowering() const { + return const_cast<NVPTXTargetLowering*>(&TLInfo); + } + + virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const { + return &TSInfo; + } + + //virtual bool addInstSelector(PassManagerBase &PM, + // CodeGenOpt::Level OptLevel); + + //virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level); + + ManagedStringPool *getManagedStrPool() const { + return const_cast<ManagedStringPool*>(&ManagedStrPool); + } + + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + + // Emission of machine code through JITCodeEmitter is not supported. + virtual bool addPassesToEmitMachineCode(PassManagerBase &, + JITCodeEmitter &, + bool = true) { + return true; + } + + // Emission of machine code through MCJIT is not supported. + virtual bool addPassesToEmitMC(PassManagerBase &, + MCContext *&, + raw_ostream &, + bool = true) { + return true; + } + +}; // NVPTXTargetMachine. + +class NVPTXTargetMachine32 : public NVPTXTargetMachine { + virtual void anchor(); +public: + NVPTXTargetMachine32(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); +}; + +class NVPTXTargetMachine64 : public NVPTXTargetMachine { + virtual void anchor(); +public: + NVPTXTargetMachine64(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); +}; + + +} // end namespace llvm + +#endif diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h new file mode 100644 index 0000000..5420958 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -0,0 +1,105 @@ +//===-- NVPTXTargetObjectFile.h - NVPTX Object Info -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H +#define LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H + +#include "NVPTXSection.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include <string> + +namespace llvm { +class GlobalVariable; +class Module; + +class NVPTXTargetObjectFile : public TargetLoweringObjectFile { + +public: + NVPTXTargetObjectFile() {}; + ~NVPTXTargetObjectFile() { + delete TextSection; + delete DataSection; + delete BSSSection; + delete ReadOnlySection; + + delete StaticCtorSection; + delete StaticDtorSection; + delete LSDASection; + delete EHFrameSection; + delete DwarfAbbrevSection; + delete DwarfInfoSection; + delete DwarfLineSection; + delete DwarfFrameSection; + delete DwarfPubTypesSection; + delete DwarfDebugInlineSection; + delete DwarfStrSection; + delete DwarfLocSection; + delete DwarfARangesSection; + delete DwarfRangesSection; + delete DwarfMacroInfoSection; + }; + + virtual void Initialize(MCContext &ctx, const TargetMachine &TM) { + TextSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getText()); + DataSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getDataRel()); + BSSSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getBSS()); + ReadOnlySection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getReadOnly()); + + StaticCtorSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + StaticDtorSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + LSDASection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + EHFrameSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfAbbrevSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfInfoSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfLineSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfFrameSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfPubTypesSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfDebugInlineSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfStrSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfLocSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfARangesSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfRangesSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfMacroInfoSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + }; + + virtual const MCSection *getSectionForConstant(SectionKind Kind) const { + return ReadOnlySection; + }; + + virtual const MCSection * + getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, + const TargetMachine &TM) const { + return DataSection; + }; + +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp new file mode 100644 index 0000000..3f52251 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -0,0 +1,514 @@ +//===- NVPTXUtilities.cpp - Utility Functions -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains miscellaneous utility functions +//===----------------------------------------------------------------------===// + +#include "NVPTXUtilities.h" +#include "NVPTX.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/Constants.h" +#include "llvm/Operator.h" +#include <algorithm> +#include <cstring> +#include <map> +#include <string> +#include <vector> +//#include <iostream> +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/InstIterator.h" + +using namespace llvm; + +typedef std::map<std::string, std::vector<unsigned> > key_val_pair_t; +typedef std::map<const GlobalValue *, key_val_pair_t> global_val_annot_t; +typedef std::map<const Module *, global_val_annot_t> per_module_annot_t; + +ManagedStatic<per_module_annot_t> annotationCache; + + +static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { + assert(md && "Invalid mdnode for annotation"); + assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands"); + // start index = 1, to skip the global variable key + // increment = 2, to skip the value for each property-value pairs + for (unsigned i = 1, e = md->getNumOperands(); i != e; i += 2) { + // property + const MDString *prop = dyn_cast<MDString>(md->getOperand(i)); + assert(prop && "Annotation property not a string"); + + // value + ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i+1)); + assert(Val && "Value operand not a constant int"); + + std::string keyname = prop->getString().str(); + if (retval.find(keyname) != retval.end()) + retval[keyname].push_back(Val->getZExtValue()); + else { + std::vector<unsigned> tmp; + tmp.push_back(Val->getZExtValue()); + retval[keyname] = tmp; + } + } +} + +static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) { + NamedMDNode *NMD = m->getNamedMetadata(llvm::NamedMDForAnnotations); + if (!NMD) + return; + key_val_pair_t tmp; + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + const MDNode *elem = NMD->getOperand(i); + + Value *entity = elem->getOperand(0); + // entity may be null due to DCE + if (!entity) + continue; + if (entity != gv) + continue; + + // accumulate annotations for entity in tmp + cacheAnnotationFromMD(elem, tmp); + } + + if (tmp.empty()) // no annotations for this gv + return; + + if ((*annotationCache).find(m) != (*annotationCache).end()) + (*annotationCache)[m][gv] = tmp; + else { + global_val_annot_t tmp1; + tmp1[gv] = tmp; + (*annotationCache)[m] = tmp1; + } +} + +bool llvm::findOneNVVMAnnotation(const GlobalValue *gv, std::string prop, + unsigned &retval) { + const Module *m = gv->getParent(); + if ((*annotationCache).find(m) == (*annotationCache).end()) + cacheAnnotationFromMD(m, gv); + else if ((*annotationCache)[m].find(gv) == (*annotationCache)[m].end()) + cacheAnnotationFromMD(m, gv); + if ((*annotationCache)[m][gv].find(prop) == (*annotationCache)[m][gv].end()) + return false; + retval = (*annotationCache)[m][gv][prop][0]; + return true; +} + +bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop, + std::vector<unsigned> &retval) { + const Module *m = gv->getParent(); + if ((*annotationCache).find(m) == (*annotationCache).end()) + cacheAnnotationFromMD(m, gv); + else if ((*annotationCache)[m].find(gv) == (*annotationCache)[m].end()) + cacheAnnotationFromMD(m, gv); + if ((*annotationCache)[m][gv].find(prop) == (*annotationCache)[m][gv].end()) + return false; + retval = (*annotationCache)[m][gv][prop]; + return true; +} + +bool llvm::isTexture(const llvm::Value &val) { + if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) { + unsigned annot; + if (llvm::findOneNVVMAnnotation(gv, + llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE], + annot)) { + assert((annot == 1) && "Unexpected annotation on a texture symbol"); + return true; + } + } + return false; +} + +bool llvm::isSurface(const llvm::Value &val) { + if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) { + unsigned annot; + if (llvm::findOneNVVMAnnotation(gv, + llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE], + annot)) { + assert((annot == 1) && "Unexpected annotation on a surface symbol"); + return true; + } + } + return false; +} + +bool llvm::isSampler(const llvm::Value &val) { + if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) { + unsigned annot; + if (llvm::findOneNVVMAnnotation(gv, + llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER], + annot)) { + assert((annot == 1) && "Unexpected annotation on a sampler symbol"); + return true; + } + } + if (const Argument *arg = dyn_cast<Argument>(&val)) { + const Function *func = arg->getParent(); + std::vector<unsigned> annot; + if (llvm::findAllNVVMAnnotation(func, + llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER], + annot)) { + if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end()) + return true; + } + } + return false; +} + +bool llvm::isImageReadOnly(const llvm::Value &val) { + if (const Argument *arg = dyn_cast<Argument>(&val)) { + const Function *func = arg->getParent(); + std::vector<unsigned> annot; + if (llvm::findAllNVVMAnnotation(func, + llvm::PropertyAnnotationNames[llvm::PROPERTY_ISREADONLY_IMAGE_PARAM], + annot)) { + if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end()) + return true; + } + } + return false; +} + +bool llvm::isImageWriteOnly(const llvm::Value &val) { + if (const Argument *arg = dyn_cast<Argument>(&val)) { + const Function *func = arg->getParent(); + std::vector<unsigned> annot; + if (llvm::findAllNVVMAnnotation(func, + llvm::PropertyAnnotationNames[llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM], + annot)) { + if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end()) + return true; + } + } + return false; +} + +bool llvm::isImage(const llvm::Value &val) { + return llvm::isImageReadOnly(val) || llvm::isImageWriteOnly(val); +} + +std::string llvm::getTextureName(const llvm::Value &val) { + assert(val.hasName() && "Found texture variable with no name"); + return val.getName(); +} + +std::string llvm::getSurfaceName(const llvm::Value &val) { + assert(val.hasName() && "Found surface variable with no name"); + return val.getName(); +} + +std::string llvm::getSamplerName(const llvm::Value &val) { + assert(val.hasName() && "Found sampler variable with no name"); + return val.getName(); +} + +bool llvm::getMaxNTIDx(const Function &F, unsigned &x) { + return (llvm::findOneNVVMAnnotation(&F, + llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X], + x)); +} + +bool llvm::getMaxNTIDy(const Function &F, unsigned &y) { + return (llvm::findOneNVVMAnnotation(&F, + llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y], + y)); +} + +bool llvm::getMaxNTIDz(const Function &F, unsigned &z) { + return (llvm::findOneNVVMAnnotation(&F, + llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z], + z)); +} + +bool llvm::getReqNTIDx(const Function &F, unsigned &x) { + return (llvm::findOneNVVMAnnotation(&F, + llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X], + x)); +} + +bool llvm::getReqNTIDy(const Function &F, unsigned &y) { + return (llvm::findOneNVVMAnnotation(&F, + llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y], + y)); +} + +bool llvm::getReqNTIDz(const Function &F, unsigned &z) { + return (llvm::findOneNVVMAnnotation(&F, + llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z], + z)); +} + +bool llvm::getMinCTASm(const Function &F, unsigned &x) { + return (llvm::findOneNVVMAnnotation(&F, + llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM], + x)); +} + +bool llvm::isKernelFunction(const Function &F) { + unsigned x = 0; + bool retval = llvm::findOneNVVMAnnotation(&F, + llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], + x); + if (retval == false) { + // There is no NVVM metadata, check the calling convention + if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel) + return true; + else + return false; + } + return (x==1); +} + +bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) { + std::vector<unsigned> Vs; + bool retval = llvm::findAllNVVMAnnotation(&F, + llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], + Vs); + if (retval == false) + return false; + for (int i=0, e=Vs.size(); i<e; i++) { + unsigned v = Vs[i]; + if ( (v >> 16) == index ) { + align = v & 0xFFFF; + return true; + } + } + return false; +} + +bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) { + if (MDNode *alignNode = I.getMetadata("callalign")) { + for (int i=0, n = alignNode->getNumOperands(); + i<n; i++) { + if (const ConstantInt *CI = + dyn_cast<ConstantInt>(alignNode->getOperand(i))) { + unsigned v = CI->getZExtValue(); + if ( (v>>16) == index ) { + align = v & 0xFFFF; + return true; + } + if ( (v>>16) > index ) { + return false; + } + } + } + } + return false; +} + +bool llvm::isBarrierIntrinsic(Intrinsic::ID id) { + if ((id == Intrinsic::nvvm_barrier0) || + (id == Intrinsic::nvvm_barrier0_popc) || + (id == Intrinsic::nvvm_barrier0_and) || + (id == Intrinsic::nvvm_barrier0_or) || + (id == Intrinsic::cuda_syncthreads)) + return true; + return false; +} + +// Interface for checking all memory space transfer related intrinsics +bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) { + if (id == Intrinsic::nvvm_ptr_local_to_gen || + id == Intrinsic::nvvm_ptr_shared_to_gen || + id == Intrinsic::nvvm_ptr_global_to_gen || + id == Intrinsic::nvvm_ptr_constant_to_gen || + id == Intrinsic::nvvm_ptr_gen_to_global || + id == Intrinsic::nvvm_ptr_gen_to_shared || + id == Intrinsic::nvvm_ptr_gen_to_local || + id == Intrinsic::nvvm_ptr_gen_to_constant || + id == Intrinsic::nvvm_ptr_gen_to_param) { + return true; + } + + return false; +} + +// consider several special intrinsics in striping pointer casts, and +// provide an option to ignore GEP indicies for find out the base address only +// which could be used in simple alias disambigurate. +const Value *llvm::skipPointerTransfer(const Value *V, + bool ignore_GEP_indices) { + V = V->stripPointerCasts(); + while (true) { + if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) { + if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) { + V = IS->getArgOperand(0)->stripPointerCasts(); + continue; + } + } else if (ignore_GEP_indices) + if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + V = GEP->getPointerOperand()->stripPointerCasts(); + continue; + } + break; + } + return V; +} + +// consider several special intrinsics in striping pointer casts, and +// - ignore GEP indicies for find out the base address only, and +// - tracking PHINode +// which could be used in simple alias disambigurate. +const Value *llvm::skipPointerTransfer(const Value *V, + std::set<const Value *> &processed) { + if (processed.find(V) != processed.end()) + return NULL; + processed.insert(V); + + const Value *V2 = V->stripPointerCasts(); + if (V2 != V && processed.find(V2) != processed.end()) + return NULL; + processed.insert(V2); + + V = V2; + + while (true) { + if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) { + if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) { + V = IS->getArgOperand(0)->stripPointerCasts(); + continue; + } + } else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + V = GEP->getPointerOperand()->stripPointerCasts(); + continue; + } else if (const PHINode *PN = dyn_cast<PHINode>(V)) { + if (V != V2 && processed.find(V) != processed.end()) + return NULL; + processed.insert(PN); + const Value *common = 0; + for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { + const Value *pv = PN->getIncomingValue(i); + const Value *base = skipPointerTransfer(pv, processed); + if (base) { + if (common == 0) + common = base; + else if (common != base) + return PN; + } + } + if (common == 0) + return PN; + V = common; + } + break; + } + return V; +} + + +// The following are some useful utilities for debuggung + +BasicBlock *llvm::getParentBlock(Value *v) { + if (BasicBlock *B = dyn_cast<BasicBlock>(v)) + return B; + + if (Instruction *I = dyn_cast<Instruction>(v)) + return I->getParent(); + + return 0; +} + +Function *llvm::getParentFunction(Value *v) { + if (Function *F = dyn_cast<Function>(v)) + return F; + + if (Instruction *I = dyn_cast<Instruction>(v)) + return I->getParent()->getParent(); + + if (BasicBlock *B = dyn_cast<BasicBlock>(v)) + return B->getParent(); + + return 0; +} + +// Dump a block by name +void llvm::dumpBlock(Value *v, char *blockName) { + Function *F = getParentFunction(v); + if (F == 0) + return; + + for (Function::iterator it = F->begin(), ie = F->end(); it != ie; ++it) { + BasicBlock *B = it; + if (strcmp(B->getName().data(), blockName) == 0) { + B->dump(); + return; + } + } +} + +// Find an instruction by name +Instruction *llvm::getInst(Value *base, char *instName) { + Function *F = getParentFunction(base); + if (F == 0) + return 0; + + for (inst_iterator it = inst_begin(F), ie = inst_end(F); it != ie; ++it) { + Instruction *I = &*it; + if (strcmp(I->getName().data(), instName) == 0) { + return I; + } + } + + return 0; +} + +// Dump an instruction by nane +void llvm::dumpInst(Value *base, char *instName) { + Instruction *I = getInst(base, instName); + if (I) + I->dump(); +} + +// Dump an instruction and all dependent instructions +void llvm::dumpInstRec(Value *v, std::set<Instruction *> *visited) { + if (Instruction *I = dyn_cast<Instruction>(v)) { + + if (visited->find(I) != visited->end()) + return; + + visited->insert(I); + + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + dumpInstRec(I->getOperand(i), visited); + + I->dump(); + } +} + +// Dump an instruction and all dependent instructions +void llvm::dumpInstRec(Value *v) { + std::set<Instruction *> visited; + + //BasicBlock *B = getParentBlock(v); + + dumpInstRec(v, &visited); +} + +// Dump the parent for Instruction, block or function +void llvm::dumpParent(Value *v) { + if (Instruction *I = dyn_cast<Instruction>(v)) { + I->getParent()->dump(); + return; + } + + if (BasicBlock *B = dyn_cast<BasicBlock>(v)) { + B->getParent()->dump(); + return; + } + + if (Function *F = dyn_cast<Function>(v)) { + F->getParent()->dump(); + return; + } +} diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h new file mode 100644 index 0000000..fe6ad55 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXUtilities.h @@ -0,0 +1,94 @@ +//===-- NVPTXUtilities - Utilities -----------------------------*- C++ -*-====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the NVVM specific utility functions. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTXUTILITIES_H +#define NVPTXUTILITIES_H + +#include "llvm/Value.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Function.h" +#include "llvm/IntrinsicInst.h" +#include <cstdarg> +#include <set> +#include <string> +#include <vector> + +namespace llvm +{ + +#define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly" +#define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly" + +bool findOneNVVMAnnotation(const llvm::GlobalValue *, std::string, unsigned &); +bool findAllNVVMAnnotation(const llvm::GlobalValue *, std::string, + std::vector<unsigned> &); + +bool isTexture(const llvm::Value &); +bool isSurface(const llvm::Value &); +bool isSampler(const llvm::Value &); +bool isImage(const llvm::Value &); +bool isImageReadOnly(const llvm::Value &); +bool isImageWriteOnly(const llvm::Value &); + +std::string getTextureName(const llvm::Value &); +std::string getSurfaceName(const llvm::Value &); +std::string getSamplerName(const llvm::Value &); + +bool getMaxNTIDx(const llvm::Function &, unsigned &); +bool getMaxNTIDy(const llvm::Function &, unsigned &); +bool getMaxNTIDz(const llvm::Function &, unsigned &); + +bool getReqNTIDx(const llvm::Function &, unsigned &); +bool getReqNTIDy(const llvm::Function &, unsigned &); +bool getReqNTIDz(const llvm::Function &, unsigned &); + +bool getMinCTASm(const llvm::Function &, unsigned &); +bool isKernelFunction(const llvm::Function &); + +bool getAlign(const llvm::Function &, unsigned index, unsigned &); +bool getAlign(const llvm::CallInst &, unsigned index, unsigned &); + +bool isBarrierIntrinsic(llvm::Intrinsic::ID); + +/// make_vector - Helper function which is useful for building temporary vectors +/// to pass into type construction of CallInst ctors. This turns a null +/// terminated list of pointers (or other value types) into a real live vector. +/// +template<typename T> +inline std::vector<T> make_vector(T A, ...) { + va_list Args; + va_start(Args, A); + std::vector<T> Result; + Result.push_back(A); + while (T Val = va_arg(Args, T)) + Result.push_back(Val); + va_end(Args); + return Result; +} + +bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id); +const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices); +const Value *skipPointerTransfer(const Value *V, + std::set<const Value *> &processed); +BasicBlock *getParentBlock(Value *v); +Function *getParentFunction(Value *v); +void dumpBlock(Value *v, char *blockName); +Instruction *getInst(Value *base, char *instName); +void dumpInst(Value *base, char *instName); +void dumpInstRec(Value *v, std::set<Instruction *> *visited); +void dumpInstRec(Value *v); +void dumpParent(Value *v); + +} + +#endif diff --git a/lib/Target/NVPTX/NVPTXVector.td b/lib/Target/NVPTX/NVPTXVector.td new file mode 100644 index 0000000..775df19 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXVector.td @@ -0,0 +1,1481 @@ +//===- NVPTXVector.td - NVPTX Vector Specific Instruction defs -*- tblgen-*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//----------------------------------- +// Vector Specific +//----------------------------------- + +// +// All vector instructions derive from NVPTXVecInst +// + +class NVPTXVecInst<dag outs, dag ins, string asmstr, list<dag> pattern, + NVPTXInst sInst=NOP> + : NVPTXInst<outs, ins, asmstr, pattern> { + NVPTXInst scalarInst=sInst; +} + +let isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in { +// Extract v2i16 +def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), + (ins V2I16Regs:$src, i8imm:$c), + "mov.u16 \t$dst, $src${c:vecelem};", + [(set Int16Regs:$dst, (vector_extract + (v2i16 V2I16Regs:$src), imm:$c))], + IMOV16rr>; + +// Extract v4i16 +def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), + (ins V4I16Regs:$src, i8imm:$c), + "mov.u16 \t$dst, $src${c:vecelem};", + [(set Int16Regs:$dst, (vector_extract + (v4i16 V4I16Regs:$src), imm:$c))], + IMOV16rr>; + +// Extract v2i8 +def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), + (ins V2I8Regs:$src, i8imm:$c), + "mov.u16 \t$dst, $src${c:vecelem};", + [(set Int8Regs:$dst, (vector_extract + (v2i8 V2I8Regs:$src), imm:$c))], + IMOV8rr>; + +// Extract v4i8 +def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), + (ins V4I8Regs:$src, i8imm:$c), + "mov.u16 \t$dst, $src${c:vecelem};", + [(set Int8Regs:$dst, (vector_extract + (v4i8 V4I8Regs:$src), imm:$c))], + IMOV8rr>; + +// Extract v2i32 +def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), + (ins V2I32Regs:$src, i8imm:$c), + "mov.u32 \t$dst, $src${c:vecelem};", + [(set Int32Regs:$dst, (vector_extract + (v2i32 V2I32Regs:$src), imm:$c))], + IMOV32rr>; + +// Extract v2f32 +def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), + (ins V2F32Regs:$src, i8imm:$c), + "mov.f32 \t$dst, $src${c:vecelem};", + [(set Float32Regs:$dst, (vector_extract + (v2f32 V2F32Regs:$src), imm:$c))], + FMOV32rr>; + +// Extract v2i64 +def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst), + (ins V2I64Regs:$src, i8imm:$c), + "mov.u64 \t$dst, $src${c:vecelem};", + [(set Int64Regs:$dst, (vector_extract + (v2i64 V2I64Regs:$src), imm:$c))], + IMOV64rr>; + +// Extract v2f64 +def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst), + (ins V2F64Regs:$src, i8imm:$c), + "mov.f64 \t$dst, $src${c:vecelem};", + [(set Float64Regs:$dst, (vector_extract + (v2f64 V2F64Regs:$src), imm:$c))], + FMOV64rr>; + +// Extract v4i32 +def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), + (ins V4I32Regs:$src, i8imm:$c), + "mov.u32 \t$dst, $src${c:vecelem};", + [(set Int32Regs:$dst, (vector_extract + (v4i32 V4I32Regs:$src), imm:$c))], + IMOV32rr>; + +// Extract v4f32 +def V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), + (ins V4F32Regs:$src, i8imm:$c), + "mov.f32 \t$dst, $src${c:vecelem};", + [(set Float32Regs:$dst, (vector_extract + (v4f32 V4F32Regs:$src), imm:$c))], + FMOV32rr>; +} + +let isAsCheapAsAMove=1, VecInstType=isVecInsert.Value in { +// Insert v2i8 +def V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst), + (ins V2I8Regs:$src, Int8Regs:$val, i8imm:$c), + "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};" + "\n\tmov.u16 \t$dst${c:vecelem}, $val;", + [(set V2I8Regs:$dst, + (vector_insert V2I8Regs:$src, Int8Regs:$val, imm:$c))], + IMOV8rr>; + +// Insert v4i8 +def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst), + (ins V4I8Regs:$src, Int8Regs:$val, i8imm:$c), + "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};" + "\n\tmov.u16 \t$dst${c:vecelem}, $val;", + [(set V4I8Regs:$dst, + (vector_insert V4I8Regs:$src, Int8Regs:$val, imm:$c))], + IMOV8rr>; + +// Insert v2i16 +def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst), + (ins V2I16Regs:$src, Int16Regs:$val, i8imm:$c), + "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};" + "\n\tmov.u16 \t$dst${c:vecelem}, $val;", + [(set V2I16Regs:$dst, + (vector_insert V2I16Regs:$src, Int16Regs:$val, imm:$c))], + IMOV16rr>; + +// Insert v4i16 +def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst), + (ins V4I16Regs:$src, Int16Regs:$val, i8imm:$c), + "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};" + "\n\tmov.u16 \t$dst${c:vecelem}, $val;", + [(set V4I16Regs:$dst, + (vector_insert V4I16Regs:$src, Int16Regs:$val, imm:$c))], + IMOV16rr>; + +// Insert v2i32 +def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst), + (ins V2I32Regs:$src, Int32Regs:$val, i8imm:$c), + "mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};" + "\n\tmov.u32 \t$dst${c:vecelem}, $val;", + [(set V2I32Regs:$dst, + (vector_insert V2I32Regs:$src, Int32Regs:$val, imm:$c))], + IMOV32rr>; + +// Insert v2f32 +def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst), + (ins V2F32Regs:$src, Float32Regs:$val, i8imm:$c), + "mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};" + "\n\tmov.f32 \t$dst${c:vecelem}, $val;", + [(set V2F32Regs:$dst, + (vector_insert V2F32Regs:$src, Float32Regs:$val, imm:$c))], + FMOV32rr>; + +// Insert v2i64 +def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst), + (ins V2I64Regs:$src, Int64Regs:$val, i8imm:$c), + "mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};" + "\n\tmov.u64 \t$dst${c:vecelem}, $val;", + [(set V2I64Regs:$dst, + (vector_insert V2I64Regs:$src, Int64Regs:$val, imm:$c))], + IMOV64rr>; + +// Insert v2f64 +def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst), + (ins V2F64Regs:$src, Float64Regs:$val, i8imm:$c), + "mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};" + "\n\tmov.f64 \t$dst${c:vecelem}, $val;", + [(set V2F64Regs:$dst, + (vector_insert V2F64Regs:$src, Float64Regs:$val, imm:$c))], + FMOV64rr>; + +// Insert v4i32 +def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst), + (ins V4I32Regs:$src, Int32Regs:$val, i8imm:$c), + "mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};" + "\n\tmov.u32 \t$dst${c:vecelem}, $val;", + [(set V4I32Regs:$dst, + (vector_insert V4I32Regs:$src, Int32Regs:$val, imm:$c))], + IMOV32rr>; + +// Insert v4f32 +def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst), + (ins V4F32Regs:$src, Float32Regs:$val, i8imm:$c), + "mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};" + "\n\tmov.f32 \t$dst${c:vecelem}, $val;", + [(set V4F32Regs:$dst, + (vector_insert V4F32Regs:$src, Float32Regs:$val, imm:$c))], + FMOV32rr>; +} + +class BinOpAsmString<string c> { + string s = c; +} + +class V4AsmStr<string opcode> : BinOpAsmString< + !strconcat(!strconcat(!strconcat(!strconcat( + !strconcat(!strconcat(!strconcat( + opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"), + opcode), " \t${dst}_1, ${a}_1, ${b}_1;\n\t"), + opcode), " \t${dst}_2, ${a}_2, ${b}_2;\n\t"), + opcode), " \t${dst}_3, ${a}_3, ${b}_3;")>; + +class V2AsmStr<string opcode> : BinOpAsmString< + !strconcat(!strconcat(!strconcat( + opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"), + opcode), " \t${dst}_1, ${a}_1, ${b}_1;")>; + +class V4MADStr<string opcode> : BinOpAsmString< + !strconcat(!strconcat(!strconcat(!strconcat( + !strconcat(!strconcat(!strconcat( + opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"), + opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;\n\t"), + opcode), " \t${dst}_2, ${a}_2, ${b}_2, ${c}_2;\n\t"), + opcode), " \t${dst}_3, ${a}_3, ${b}_3, ${c}_3;")>; + +class V2MADStr<string opcode> : BinOpAsmString< + !strconcat(!strconcat(!strconcat( + opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"), + opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;")>; + +class V4UnaryStr<string opcode> : BinOpAsmString< + !strconcat(!strconcat(!strconcat(!strconcat( + !strconcat(!strconcat(!strconcat( + opcode, " \t${dst}_0, ${a}_0;\n\t"), + opcode), " \t${dst}_1, ${a}_1;\n\t"), + opcode), " \t${dst}_2, ${a}_2;\n\t"), + opcode), " \t${dst}_3, ${a}_3;")>; + +class V2UnaryStr<string opcode> : BinOpAsmString< + !strconcat(!strconcat(!strconcat( + opcode, " \t${dst}_0, ${a}_0;\n\t"), + opcode), " \t${dst}_1, ${a}_1;")>; + +class VecBinaryOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass, + NVPTXInst sInst=NOP> : + NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a, regclass:$b), + asmstr.s, + [(set regclass:$dst, (OpNode regclass:$a, regclass:$b))], + sInst>; + +class VecShiftOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass1, + NVPTXRegClass regclass2, NVPTXInst sInst=NOP> : + NVPTXVecInst<(outs regclass1:$dst), (ins regclass1:$a, regclass2:$b), + asmstr.s, + [(set regclass1:$dst, (OpNode regclass1:$a, regclass2:$b))], + sInst>; + +class VecUnaryOp<BinOpAsmString asmstr, PatFrag OpNode, NVPTXRegClass regclass, + NVPTXInst sInst=NOP> : + NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a), + asmstr.s, + [(set regclass:$dst, (OpNode regclass:$a))], sInst>; + +multiclass IntBinVOp<string asmstr, SDNode OpNode, + NVPTXInst i64op=NOP, NVPTXInst i32op=NOP, NVPTXInst + i16op=NOP, NVPTXInst i8op=NOP> { + def V2I64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "64")>, OpNode, V2I64Regs, + i64op>; + def V4I32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "32")>, OpNode, V4I32Regs, + i32op>; + def V2I32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "32")>, OpNode, V2I32Regs, + i32op>; + def V4I16 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I16Regs, + i16op>; + def V2I16 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I16Regs, + i16op>; + def V4I8 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I8Regs, + i8op>; + def V2I8 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I8Regs, + i8op>; +} + +multiclass FloatBinVOp<string asmstr, SDNode OpNode, + NVPTXInst f64=NOP, NVPTXInst f32=NOP, + NVPTXInst f32_ftz=NOP> { + def V2F64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f64")>, OpNode, + V2F64Regs, f64>; + def V4F32_ftz : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode, + V4F32Regs, f32_ftz>, Requires<[doF32FTZ]>; + def V2F32_ftz : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode, + V2F32Regs, f32_ftz>, Requires<[doF32FTZ]>; + def V4F32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "f32")>, OpNode, + V4F32Regs, f32>; + def V2F32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f32")>, OpNode, + V2F32Regs, f32>; +} + +multiclass IntUnaryVOp<string asmstr, PatFrag OpNode, + NVPTXInst i64op=NOP, NVPTXInst i32op=NOP, + NVPTXInst i16op=NOP, NVPTXInst i8op=NOP> { + def V2I64 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "64")>, OpNode, + V2I64Regs, i64op>; + def V4I32 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "32")>, OpNode, + V4I32Regs, i32op>; + def V2I32 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "32")>, OpNode, + V2I32Regs, i32op>; + def V4I16 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode, + V4I16Regs, i16op>; + def V2I16 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode, + V2I16Regs, i16op>; + def V4I8 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode, + V4I8Regs, i8op>; + def V2I8 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode, + V2I8Regs, i8op>; +} + + +// Integer Arithmetic +let VecInstType=isVecOther.Value in { +defm VAdd : IntBinVOp<"add.s", add, ADDi64rr, ADDi32rr, ADDi16rr, ADDi8rr>; +defm VSub : IntBinVOp<"sub.s", sub, SUBi64rr, SUBi32rr, SUBi16rr, SUBi8rr>; + +def AddCCV4I32 : VecBinaryOp<V4AsmStr<"add.cc.s32">, addc, V4I32Regs, + ADDCCi32rr>; +def AddCCV2I32 : VecBinaryOp<V2AsmStr<"add.cc.s32">, addc, V2I32Regs, + ADDCCi32rr>; +def SubCCV4I32 : VecBinaryOp<V4AsmStr<"sub.cc.s32">, subc, V4I32Regs, + SUBCCi32rr>; +def SubCCV2I32 : VecBinaryOp<V2AsmStr<"sub.cc.s32">, subc, V2I32Regs, + SUBCCi32rr>; +def AddCCCV4I32 : VecBinaryOp<V4AsmStr<"addc.cc.s32">, adde, V4I32Regs, + ADDCCCi32rr>; +def AddCCCV2I32 : VecBinaryOp<V2AsmStr<"addc.cc.s32">, adde, V2I32Regs, + ADDCCCi32rr>; +def SubCCCV4I32 : VecBinaryOp<V4AsmStr<"subc.cc.s32">, sube, V4I32Regs, + SUBCCCi32rr>; +def SubCCCV2I32 : VecBinaryOp<V2AsmStr<"subc.cc.s32">, sube, V2I32Regs, + SUBCCCi32rr>; + +def ShiftLV2I64 : VecShiftOp<V2AsmStr<"shl.b64">, shl, V2I64Regs, V2I32Regs, + SHLi64rr>; +def ShiftLV2I32 : VecShiftOp<V2AsmStr<"shl.b32">, shl, V2I32Regs, V2I32Regs, + SHLi32rr>; +def ShiftLV4I32 : VecShiftOp<V4AsmStr<"shl.b32">, shl, V4I32Regs, V4I32Regs, + SHLi32rr>; +def ShiftLV2I16 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I16Regs, V2I32Regs, + SHLi16rr>; +def ShiftLV4I16 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I16Regs, V4I32Regs, + SHLi16rr>; +def ShiftLV2I8 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I8Regs, V2I32Regs, + SHLi8rr>; +def ShiftLV4I8 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I8Regs, V4I32Regs, + SHLi8rr>; +} + +// cvt to v*i32, helpers for shift +class CVTtoVeci32<NVPTXRegClass inclass, NVPTXRegClass outclass, string asmstr, + NVPTXInst sInst=NOP> : + NVPTXVecInst<(outs outclass:$d), (ins inclass:$s), asmstr, [], sInst>; + +class VecCVTStrHelper<string op, string dest, string src> { + string s=!strconcat(op, !strconcat("\t", + !strconcat(dest, !strconcat(", ", !strconcat(src, ";"))))); +} + +class Vec2CVTStr<string op> { + string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s, + !strconcat("\n\t", VecCVTStrHelper<op, "${d}_1", "${s}_1">.s)); +} + +class Vec4CVTStr<string op> { + string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s, + !strconcat("\n\t", + !strconcat(VecCVTStrHelper<op, "${d}_1", "${s}_1">.s, + !strconcat("\n\t", + !strconcat(VecCVTStrHelper<op, "${d}_2", "${s}_2">.s, + !strconcat("\n\t", VecCVTStrHelper<op, "${d}_3", "${s}_3">.s)))))); +} + +let VecInstType=isVecOther.Value in { +def CVTv2i8tov2i32 : CVTtoVeci32<V2I8Regs, V2I32Regs, + Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>; +def CVTv2i16tov2i32 : CVTtoVeci32<V2I16Regs, V2I32Regs, + Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>; +def CVTv4i8tov4i32 : CVTtoVeci32<V4I8Regs, V4I32Regs, + Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>; +def CVTv4i16tov4i32 : CVTtoVeci32<V4I16Regs, V4I32Regs, + Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>; +def CVTv2i64tov2i32 : CVTtoVeci32<V2I64Regs, V2I32Regs, + Vec2CVTStr<"cvt.u32.u64">.s, TRUNC_64to32>; +} + +def : Pat<(shl V2I16Regs:$src1, V2I16Regs:$src2), + (ShiftLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; +def : Pat<(shl V2I8Regs:$src1, V2I8Regs:$src2), + (ShiftLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; +def : Pat<(shl V2I64Regs:$src1, V2I64Regs:$src2), + (ShiftLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; + +def : Pat<(shl V4I16Regs:$src1, V4I16Regs:$src2), + (ShiftLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; +def : Pat<(shl V4I8Regs:$src1, V4I8Regs:$src2), + (ShiftLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; + +let VecInstType=isVecOther.Value in { +def ShiftRAV2I64 : VecShiftOp<V2AsmStr<"shr.s64">, sra, V2I64Regs, V2I32Regs, + SRAi64rr>; +def ShiftRAV2I32 : VecShiftOp<V2AsmStr<"shr.s32">, sra, V2I32Regs, V2I32Regs, + SRAi32rr>; +def ShiftRAV4I32 : VecShiftOp<V4AsmStr<"shr.s32">, sra, V4I32Regs, V4I32Regs, + SRAi32rr>; +def ShiftRAV2I16 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I16Regs, V2I32Regs, + SRAi16rr>; +def ShiftRAV4I16 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I16Regs, V4I32Regs, + SRAi16rr>; +def ShiftRAV2I8 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I8Regs, V2I32Regs, + SRAi8rr>; +def ShiftRAV4I8 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I8Regs, V4I32Regs, + SRAi8rr>; + +def ShiftRLV2I64 : VecShiftOp<V2AsmStr<"shr.u64">, srl, V2I64Regs, V2I32Regs, + SRLi64rr>; +def ShiftRLV2I32 : VecShiftOp<V2AsmStr<"shr.u32">, srl, V2I32Regs, V2I32Regs, + SRLi32rr>; +def ShiftRLV4I32 : VecShiftOp<V4AsmStr<"shr.u32">, srl, V4I32Regs, V4I32Regs, + SRLi32rr>; +def ShiftRLV2I16 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I16Regs, V2I32Regs, + SRLi16rr>; +def ShiftRLV4I16 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I16Regs, V4I32Regs, + SRLi16rr>; +def ShiftRLV2I8 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I8Regs, V2I32Regs, + SRLi8rr>; +def ShiftRLV4I8 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I8Regs, V4I32Regs, + SRLi8rr>; + +defm VMult : IntBinVOp<"mul.lo.s", mul, MULTi64rr, MULTi32rr, MULTi16rr, + MULTi8rr>; +defm VMultHS : IntBinVOp<"mul.hi.s", mulhs, MULTHSi64rr, MULTHSi32rr, + MULTHSi16rr, + MULTHSi8rr>; +defm VMultHU : IntBinVOp<"mul.hi.u", mulhu, MULTHUi64rr, MULTHUi32rr, + MULTHUi16rr, + MULTHUi8rr>; +defm VSDiv : IntBinVOp<"div.s", sdiv, SDIVi64rr, SDIVi32rr, SDIVi16rr, + SDIVi8rr>; +defm VUDiv : IntBinVOp<"div.u", udiv, UDIVi64rr, UDIVi32rr, UDIVi16rr, + UDIVi8rr>; +defm VSRem : IntBinVOp<"rem.s", srem, SREMi64rr, SREMi32rr, SREMi16rr, + SREMi8rr>; +defm VURem : IntBinVOp<"rem.u", urem, UREMi64rr, UREMi32rr, UREMi16rr, + UREMi8rr>; +} + +def : Pat<(sra V2I16Regs:$src1, V2I16Regs:$src2), + (ShiftRAV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; +def : Pat<(sra V2I8Regs:$src1, V2I8Regs:$src2), + (ShiftRAV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; +def : Pat<(sra V2I64Regs:$src1, V2I64Regs:$src2), + (ShiftRAV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; + +def : Pat<(sra V4I16Regs:$src1, V4I16Regs:$src2), + (ShiftRAV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; +def : Pat<(sra V4I8Regs:$src1, V4I8Regs:$src2), + (ShiftRAV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; + +def : Pat<(srl V2I16Regs:$src1, V2I16Regs:$src2), + (ShiftRLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; +def : Pat<(srl V2I8Regs:$src1, V2I8Regs:$src2), + (ShiftRLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; +def : Pat<(srl V2I64Regs:$src1, V2I64Regs:$src2), + (ShiftRLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; + +def : Pat<(srl V4I16Regs:$src1, V4I16Regs:$src2), + (ShiftRLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; +def : Pat<(srl V4I8Regs:$src1, V4I8Regs:$src2), + (ShiftRLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; + +multiclass VMAD<string asmstr, NVPTXRegClass regclassv4, + NVPTXRegClass regclassv2, + SDNode an=add, SDNode mn=mul, NVPTXInst sop=NOP, + Predicate Pred> { + def V4 : NVPTXVecInst<(outs regclassv4:$dst), + (ins regclassv4:$a, regclassv4:$b, regclassv4:$c), + V4MADStr<asmstr>.s, + [(set regclassv4:$dst, + (an (mn regclassv4:$a, regclassv4:$b), regclassv4:$c))], + sop>, + Requires<[Pred]>; + def V2 : NVPTXVecInst<(outs regclassv2:$dst), + (ins regclassv2:$a, regclassv2:$b, regclassv2:$c), + V2MADStr<asmstr>.s, + [(set regclassv2:$dst, + (an (mn regclassv2:$a, regclassv2:$b), regclassv2:$c))], + sop>, + Requires<[Pred]>; +} + +multiclass VMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP, + Predicate Pred> { + def V2 : NVPTXVecInst<(outs regclass:$dst), + (ins regclass:$a, regclass:$b, regclass:$c), + V2MADStr<asmstr>.s, + [(set regclass:$dst, (add + (mul regclass:$a, regclass:$b), regclass:$c))], sop>, + Requires<[Pred]>; +} +multiclass VFMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP, + Predicate Pred> { + def V2 : NVPTXVecInst<(outs regclass:$dst), + (ins regclass:$a, regclass:$b, regclass:$c), + V2MADStr<asmstr>.s, + [(set regclass:$dst, (fadd + (fmul regclass:$a, regclass:$b), regclass:$c))], sop>, + Requires<[Pred]>; +} + +let VecInstType=isVecOther.Value in { +defm I8MAD : VMAD<"mad.lo.s16", V4I8Regs, V2I8Regs, add, mul, MAD8rrr, true>; +defm I16MAD : VMAD<"mad.lo.s16", V4I16Regs, V2I16Regs, add, mul, MAD16rrr, + true>; +defm I32MAD : VMAD<"mad.lo.s32", V4I32Regs, V2I32Regs, add, mul, MAD32rrr, + true>; +defm I64MAD : VMADV2Only<"mad.lo.s64", V2I64Regs, MAD64rrr, true>; + +defm VNeg : IntUnaryVOp<"neg.s", ineg, INEG64, INEG32, INEG16, INEG8>; + +defm VAddf : FloatBinVOp<"add.", fadd, FADDf64rr, FADDf32rr, FADDf32rr_ftz>; +defm VSubf : FloatBinVOp<"sub.", fsub, FSUBf64rr, FSUBf32rr, FSUBf32rr_ftz>; +defm VMulf : FloatBinVOp<"mul.", fmul, FMULf64rr, FMULf32rr, FMULf32rr_ftz>; + +defm F32MAD_ftz : VMAD<"mad.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul, + FMAD32_ftzrrr, doFMADF32_ftz>; +defm F32FMA_ftz : VMAD<"fma.rn.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul, + FMA32_ftzrrr, doFMAF32_ftz>; +defm F32MAD : VMAD<"mad.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMAD32rrr, + doFMADF32>; +defm F32FMA : VMAD<"fma.rn.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMA32rrr, + doFMAF32>; +defm F64FMA : VFMADV2Only<"fma.rn.f64", V2F64Regs, FMA64rrr, doFMAF64>; +} + +let VecInstType=isVecOther.Value in { +def V4F32Div_prec_ftz : VecBinaryOp<V4AsmStr<"div.rn.ftz.f32">, fdiv, V4F32Regs, + FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>; +def V2F32Div_prec_ftz : VecBinaryOp<V2AsmStr<"div.rn.ftz.f32">, fdiv, V2F32Regs, + FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>; +def V4F32Div_prec : VecBinaryOp<V4AsmStr<"div.rn.f32">, fdiv, V4F32Regs, + FDIV32rr_prec>, Requires<[reqPTX20]>; +def V2F32Div_prec : VecBinaryOp<V2AsmStr<"div.rn.f32">, fdiv, V2F32Regs, + FDIV32rr_prec>, Requires<[reqPTX20]>; +def V2F32Div_ftz : VecBinaryOp<V2AsmStr<"div.full.ftz.f32">, fdiv, V2F32Regs, + FDIV32rr_ftz>, Requires<[doF32FTZ]>; +def V4F32Div_ftz : VecBinaryOp<V4AsmStr<"div.full.ftz.f32">, fdiv, V4F32Regs, + FDIV32rr_ftz>, Requires<[doF32FTZ]>; +def V2F32Div : VecBinaryOp<V2AsmStr<"div.full.f32">, fdiv, V2F32Regs, FDIV32rr>; +def V4F32Div : VecBinaryOp<V4AsmStr<"div.full.f32">, fdiv, V4F32Regs, FDIV32rr>; +def V2F64Div : VecBinaryOp<V2AsmStr<"div.rn.f64">, fdiv, V2F64Regs, FDIV64rr>; +} + +def fnegpat : PatFrag<(ops node:$in), (fneg node:$in)>; + +let VecInstType=isVecOther.Value in { +def VNegv2f32_ftz : VecUnaryOp<V2UnaryStr<"neg.ftz.f32">, fnegpat, V2F32Regs, + FNEGf32_ftz>, Requires<[doF32FTZ]>; +def VNegv4f32_ftz : VecUnaryOp<V4UnaryStr<"neg.ftz.f32">, fnegpat, V4F32Regs, + FNEGf32_ftz>, Requires<[doF32FTZ]>; +def VNegv2f32 : VecUnaryOp<V2UnaryStr<"neg.f32">, fnegpat, V2F32Regs, FNEGf32>; +def VNegv4f32 : VecUnaryOp<V4UnaryStr<"neg.f32">, fnegpat, V4F32Regs, FNEGf32>; +def VNegv2f64 : VecUnaryOp<V2UnaryStr<"neg.f64">, fnegpat, V2F64Regs, FNEGf64>; + +// Logical Arithmetic +defm VAnd : IntBinVOp<"and.b", and, ANDb64rr, ANDb32rr, ANDb16rr, ANDb8rr>; +defm VOr : IntBinVOp<"or.b", or, ORb64rr, ORb32rr, ORb16rr, ORb8rr>; +defm VXor : IntBinVOp<"xor.b", xor, XORb64rr, XORb32rr, XORb16rr, XORb8rr>; + +defm VNot : IntUnaryVOp<"not.b", not, NOT64, NOT32, NOT16, NOT8>; +} + + +multiclass V2FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> { + def : Pat<(fsub V2F32Regs:$a, (fmul V2F32Regs:$b, V2F32Regs:$c)), + (Inst (VNegv2f32 V2F32Regs:$b), V2F32Regs:$c, V2F32Regs:$a)>, + Requires<[Pred]>; + + def : Pat<(fsub (fmul V2F32Regs:$a, V2F32Regs:$b), V2F32Regs:$c), + (Inst V2F32Regs:$a, V2F32Regs:$b, (VNegv2f32 V2F32Regs:$c))>, + Requires<[Pred]>; +} + +defm V2FMAF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32FMA_ftzV2, doFMAF32AGG_ftz>; +defm V2FMADF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32MAD_ftzV2, doFMADF32_ftz>; +defm V2FMAF32ext : V2FPCONTRACT32_SUB_PAT<F32FMAV2, doFMAF32AGG>; +defm V2FMADF32ext : V2FPCONTRACT32_SUB_PAT<F32MADV2, doFMADF32>; + +multiclass V4FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> { + def : Pat<(fsub V4F32Regs:$a, (fmul V4F32Regs:$b, V4F32Regs:$c)), + (Inst (VNegv4f32 V4F32Regs:$b), V4F32Regs:$c, V4F32Regs:$a)>, + Requires<[Pred]>; + + def : Pat<(fsub (fmul V4F32Regs:$a, V4F32Regs:$b), V4F32Regs:$c), + (Inst V4F32Regs:$a, V4F32Regs:$b, (VNegv4f32 V4F32Regs:$c))>, + Requires<[Pred]>; +} + +defm V4FMAF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32FMA_ftzV4, doFMAF32AGG_ftz>; +defm V4FMADF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32MAD_ftzV4, doFMADF32_ftz>; +defm V4FMAF32ext : V4FPCONTRACT32_SUB_PAT<F32FMAV4, doFMAF32AGG>; +defm V4FMADF32ext : V4FPCONTRACT32_SUB_PAT<F32MADV4, doFMADF32>; + +multiclass V2FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> { + def : Pat<(fsub V2F64Regs:$a, (fmul V2F64Regs:$b, V2F64Regs:$c)), + (Inst (VNegv2f64 V2F64Regs:$b), V2F64Regs:$c, V2F64Regs:$a)>, + Requires<[Pred]>; + + def : Pat<(fsub (fmul V2F64Regs:$a, V2F64Regs:$b), V2F64Regs:$c), + (Inst V2F64Regs:$a, V2F64Regs:$b, (VNegv2f64 V2F64Regs:$c))>, + Requires<[Pred]>; +} + +defm V2FMAF64ext : V2FPCONTRACT64_SUB_PAT<F64FMAV2, doFMAF64AGG>; + +class VecModStr<string vecsize, string elem, string extra, string l=""> +{ + string t1 = !strconcat("${c", elem); + string t2 = !strconcat(t1, ":vecv"); + string t3 = !strconcat(t2, vecsize); + string t4 = !strconcat(t3, extra); + string t5 = !strconcat(t4, l); + string s = !strconcat(t5, "}"); +} +class ShuffleOneLine<string vecsize, string elem, string type> +{ + string t1 = VecModStr<vecsize, elem, "comm", "1">.s; + string t2 = !strconcat(t1, "mov."); + string t3 = !strconcat(t2, type); + string t4 = !strconcat(t3, " \t${dst}_"); + string t5 = !strconcat(t4, elem); + string t6 = !strconcat(t5, ", $src1"); + string t7 = !strconcat(t6, VecModStr<vecsize, elem, "pos">.s); + string t8 = !strconcat(t7, ";\n\t"); + string t9 = !strconcat(t8, VecModStr<vecsize, elem, "comm", "2">.s); + string t10 = !strconcat(t9, "mov."); + string t11 = !strconcat(t10, type); + string t12 = !strconcat(t11, " \t${dst}_"); + string t13 = !strconcat(t12, elem); + string t14 = !strconcat(t13, ", $src2"); + string t15 = !strconcat(t14, VecModStr<vecsize, elem, "pos">.s); + string s = !strconcat(t15, ";"); +} +class ShuffleAsmStr2<string type> +{ + string t1 = ShuffleOneLine<"2", "0", type>.s; + string t2 = !strconcat(t1, "\n\t"); + string s = !strconcat(t2, ShuffleOneLine<"2", "1", type>.s); +} +class ShuffleAsmStr4<string type> +{ + string t1 = ShuffleOneLine<"4", "0", type>.s; + string t2 = !strconcat(t1, "\n\t"); + string t3 = !strconcat(t2, ShuffleOneLine<"4", "1", type>.s); + string t4 = !strconcat(t3, "\n\t"); + string t5 = !strconcat(t4, ShuffleOneLine<"4", "2", type>.s); + string t6 = !strconcat(t5, "\n\t"); + string s = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s); +} + +let neverHasSideEffects=1, VecInstType=isVecShuffle.Value in { +def VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst), + (ins V4F32Regs:$src1, V4F32Regs:$src2, + i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), + !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", + ShuffleAsmStr4<"f32">.s), + [], FMOV32rr>; + +def VecShuffle_v4i32 : NVPTXVecInst<(outs V4I32Regs:$dst), + (ins V4I32Regs:$src1, V4I32Regs:$src2, + i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), + !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", + ShuffleAsmStr4<"u32">.s), + [], IMOV32rr>; + +def VecShuffle_v4i16 : NVPTXVecInst<(outs V4I16Regs:$dst), + (ins V4I16Regs:$src1, V4I16Regs:$src2, + i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), + !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", + ShuffleAsmStr4<"u16">.s), + [], IMOV16rr>; + +def VecShuffle_v4i8 : NVPTXVecInst<(outs V4I8Regs:$dst), + (ins V4I8Regs:$src1, V4I8Regs:$src2, + i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), + !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", + ShuffleAsmStr4<"u16">.s), + [], IMOV8rr>; + +def VecShuffle_v2f32 : NVPTXVecInst<(outs V2F32Regs:$dst), + (ins V2F32Regs:$src1, V2F32Regs:$src2, + i8imm:$c0, i8imm:$c1), + !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", + ShuffleAsmStr2<"f32">.s), + [], FMOV32rr>; + +def VecShuffle_v2i32 : NVPTXVecInst<(outs V2I32Regs:$dst), + (ins V2I32Regs:$src1, V2I32Regs:$src2, + i8imm:$c0, i8imm:$c1), + !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", + ShuffleAsmStr2<"u32">.s), + [], IMOV32rr>; + +def VecShuffle_v2i8 : NVPTXVecInst<(outs V2I8Regs:$dst), + (ins V2I8Regs:$src1, V2I8Regs:$src2, + i8imm:$c0, i8imm:$c1), + !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", + ShuffleAsmStr2<"u16">.s), + [], IMOV8rr>; + +def VecShuffle_v2i16 : NVPTXVecInst<(outs V2I16Regs:$dst), + (ins V2I16Regs:$src1, V2I16Regs:$src2, + i8imm:$c0, i8imm:$c1), + !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", + ShuffleAsmStr2<"u16">.s), + [], IMOV16rr>; + +def VecShuffle_v2f64 : NVPTXVecInst<(outs V2F64Regs:$dst), + (ins V2F64Regs:$src1, V2F64Regs:$src2, + i8imm:$c0, i8imm:$c1), + !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", + ShuffleAsmStr2<"f64">.s), + [], FMOV64rr>; + +def VecShuffle_v2i64 : NVPTXVecInst<(outs V2I64Regs:$dst), + (ins V2I64Regs:$src1, V2I64Regs:$src2, + i8imm:$c0, i8imm:$c1), + !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", + ShuffleAsmStr2<"u64">.s), + [], IMOV64rr>; +} + +def ShuffleMask0 : SDNodeXForm<vector_shuffle, [{ + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + return CurDAG->getTargetConstant(SVOp->getMaskElt(0), MVT::i32); +}]>; +def ShuffleMask1 : SDNodeXForm<vector_shuffle, [{ + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + return CurDAG->getTargetConstant(SVOp->getMaskElt(1), MVT::i32); +}]>; +def ShuffleMask2 : SDNodeXForm<vector_shuffle, [{ + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + return CurDAG->getTargetConstant(SVOp->getMaskElt(2), MVT::i32); +}]>; +def ShuffleMask3 : SDNodeXForm<vector_shuffle, [{ + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + return CurDAG->getTargetConstant(SVOp->getMaskElt(3), MVT::i32); +}]>; + +// The spurious call is here to silence a compiler warning about N being +// unused. +def vec_shuf : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), + [{ N->getGluedNode(); return true; }]>; + +def : Pat<(v2f64 (vec_shuf:$op V2F64Regs:$src1, V2F64Regs:$src2)), + (VecShuffle_v2f64 V2F64Regs:$src1, V2F64Regs:$src2, + (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; + +def : Pat<(v4f32 (vec_shuf:$op V4F32Regs:$src1, V4F32Regs:$src2)), + (VecShuffle_v4f32 V4F32Regs:$src1, V4F32Regs:$src2, + (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), + (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; + +def : Pat<(v2f32 (vec_shuf:$op V2F32Regs:$src1, V2F32Regs:$src2)), + (VecShuffle_v2f32 V2F32Regs:$src1, V2F32Regs:$src2, + (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; + +def : Pat<(v2i64 (vec_shuf:$op V2I64Regs:$src1, V2I64Regs:$src2)), + (VecShuffle_v2i64 V2I64Regs:$src1, V2I64Regs:$src2, + (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; + +def : Pat<(v4i32 (vec_shuf:$op V4I32Regs:$src1, V4I32Regs:$src2)), + (VecShuffle_v4i32 V4I32Regs:$src1, V4I32Regs:$src2, + (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), + (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; + +def : Pat<(v2i32 (vec_shuf:$op V2I32Regs:$src1, V2I32Regs:$src2)), + (VecShuffle_v2i32 V2I32Regs:$src1, V2I32Regs:$src2, + (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; + +def : Pat<(v4i16 (vec_shuf:$op V4I16Regs:$src1, V4I16Regs:$src2)), + (VecShuffle_v4i16 V4I16Regs:$src1, V4I16Regs:$src2, + (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), + (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; + +def : Pat<(v2i16 (vec_shuf:$op V2I16Regs:$src1, V2I16Regs:$src2)), + (VecShuffle_v2i16 V2I16Regs:$src1, V2I16Regs:$src2, + (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; + +def : Pat<(v4i8 (vec_shuf:$op V4I8Regs:$src1, V4I8Regs:$src2)), + (VecShuffle_v4i8 V4I8Regs:$src1, V4I8Regs:$src2, + (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), + (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; + +def : Pat<(v2i8 (vec_shuf:$op V2I8Regs:$src1, V2I8Regs:$src2)), + (VecShuffle_v2i8 V2I8Regs:$src1, V2I8Regs:$src2, + (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; + +class Build_Vector2<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass, + NVPTXInst si> + : NVPTXVecInst<(outs vclass:$dst), + (ins sclass:$a1, sclass:$a2), + !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2}};"), + [(set vclass:$dst, (build_vector sclass:$a1, sclass:$a2))], + si>; +class Build_Vector4<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass, + NVPTXInst si> + : NVPTXVecInst<(outs vclass:$dst), + (ins sclass:$a1, sclass:$a2, sclass:$a3, sclass:$a4), + !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2, $a3, $a4}};"), + [(set vclass:$dst, + (build_vector sclass:$a1, sclass:$a2, + sclass:$a3, sclass:$a4))], si>; + +let isAsCheapAsAMove=1, VecInstType=isVecBuild.Value in { +def Build_Vector2_f32 : Build_Vector2<"mov.v2.f32", V2F32Regs, Float32Regs, + FMOV32rr>; +def Build_Vector2_f64 : Build_Vector2<"mov.v2.f64", V2F64Regs, Float64Regs, + FMOV64rr>; + +def Build_Vector2_i32 : Build_Vector2<"mov.v2.u32", V2I32Regs, Int32Regs, + IMOV32rr>; +def Build_Vector2_i64 : Build_Vector2<"mov.v2.u64", V2I64Regs, Int64Regs, + IMOV64rr>; +def Build_Vector2_i16 : Build_Vector2<"mov.v2.u16", V2I16Regs, Int16Regs, + IMOV16rr>; +def Build_Vector2_i8 : Build_Vector2<"mov.v2.u16", V2I8Regs, Int8Regs, + IMOV8rr>; + +def Build_Vector4_f32 : Build_Vector4<"mov.v4.f32", V4F32Regs, Float32Regs, + FMOV32rr>; + +def Build_Vector4_i32 : Build_Vector4<"mov.v4.u32", V4I32Regs, Int32Regs, + IMOV32rr>; +def Build_Vector4_i16 : Build_Vector4<"mov.v4.u16", V4I16Regs, Int16Regs, + IMOV16rr>; +def Build_Vector4_i8 : Build_Vector4<"mov.v4.u16", V4I8Regs, Int8Regs, + IMOV8rr>; +} + +class Vec_Move<string asmstr, NVPTXRegClass vclass, NVPTXInst sop=NOP> + : NVPTXVecInst<(outs vclass:$dst), (ins vclass:$src), + !strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"), + [], sop>; + +let isAsCheapAsAMove=1, neverHasSideEffects=1, IsSimpleMove=1, + VecInstType=isVecOther.Value in { +def V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>; +def V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>; + +def V4i32Mov : Vec_Move<"mov.v4.u32", V4I32Regs, IMOV32rr>; +def V2i32Mov : Vec_Move<"mov.v2.u32", V2I32Regs, IMOV32rr>; + +def V4i16Mov : Vec_Move<"mov.v4.u16", V4I16Regs, IMOV16rr>; +def V2i16Mov : Vec_Move<"mov.v2.u16", V2I16Regs, IMOV16rr>; + +def V4i8Mov : Vec_Move<"mov.v4.u16", V4I8Regs, IMOV8rr>; +def V2i8Mov : Vec_Move<"mov.v2.u16", V2I8Regs, IMOV8rr>; + +def V2f64Mov : Vec_Move<"mov.v2.f64", V2F64Regs, FMOV64rr>; +def V2i64Mov : Vec_Move<"mov.v2.u64", V2I64Regs, IMOV64rr>; +} + +// extract subvector patterns +def extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR", + SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>>; + +def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 0)), + (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 0), + (V4f32Extract V4F32Regs:$src, 1))>; +def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 2)), + (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 2), + (V4f32Extract V4F32Regs:$src, 3))>; +def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 0)), + (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 0), + (V4i32Extract V4I32Regs:$src, 1))>; +def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 2)), + (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 2), + (V4i32Extract V4I32Regs:$src, 3))>; +def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 0)), + (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 0), + (V4i16Extract V4I16Regs:$src, 1))>; +def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 2)), + (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 2), + (V4i16Extract V4I16Regs:$src, 3))>; +def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 0)), + (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 0), + (V4i8Extract V4I8Regs:$src, 1))>; +def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 2)), + (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 2), + (V4i8Extract V4I8Regs:$src, 3))>; + +// Select instructions +class Select_OneLine<string type, string pos> { + string t1 = !strconcat("selp.", type); + string t2 = !strconcat(t1, " \t${dst}_"); + string t3 = !strconcat(t2, pos); + string t4 = !strconcat(t3, ", ${src1}_"); + string t5 = !strconcat(t4, pos); + string t6 = !strconcat(t5, ", ${src2}_"); + string t7 = !strconcat(t6, pos); + string s = !strconcat(t7, ", $p;"); +} + +class Select_Str2<string type> { + string t1 = Select_OneLine<type, "0">.s; + string t2 = !strconcat(t1, "\n\t"); + string s = !strconcat(t2, Select_OneLine<type, "1">.s); +} + +class Select_Str4<string type> { + string t1 = Select_OneLine<type, "0">.s; + string t2 = !strconcat(t1, "\n\t"); + string t3 = !strconcat(t2, Select_OneLine<type, "1">.s); + string t4 = !strconcat(t3, "\n\t"); + string t5 = !strconcat(t4, Select_OneLine<type, "2">.s); + string t6 = !strconcat(t5, "\n\t"); + string s = !strconcat(t6, Select_OneLine<type, "3">.s); + +} + +class Vec_Select<NVPTXRegClass vclass, string asmstr, NVPTXInst sop> + : NVPTXVecInst<(outs vclass:$dst), + (ins vclass:$src1, vclass:$src2, Int1Regs:$p), + asmstr, + [(set vclass:$dst, (select Int1Regs:$p, vclass:$src1, + vclass:$src2))], + sop>; + +let VecInstType=isVecOther.Value in { +def V2I64_Select : Vec_Select<V2I64Regs, Select_Str2<"b64">.s, SELECTi64rr>; +def V4I32_Select : Vec_Select<V4I32Regs, Select_Str4<"b32">.s, SELECTi32rr>; +def V2I32_Select : Vec_Select<V2I32Regs, Select_Str2<"b32">.s, SELECTi32rr>; +def V4I16_Select : Vec_Select<V4I16Regs, Select_Str4<"b16">.s, SELECTi16rr>; +def V2I16_Select : Vec_Select<V2I16Regs, Select_Str2<"b16">.s, SELECTi16rr>; +def V4I8_Select : Vec_Select<V4I8Regs, Select_Str4<"b16">.s, SELECTi8rr>; +def V2I8_Select : Vec_Select<V2I8Regs, Select_Str2<"b16">.s, SELECTi8rr>; + +def V2F64_Select : Vec_Select<V2F64Regs, Select_Str2<"f64">.s, SELECTf64rr>; +def V4F32_Select : Vec_Select<V4F32Regs, Select_Str4<"f32">.s, SELECTf32rr>; +def V2F32_Select : Vec_Select<V2F32Regs, Select_Str2<"f32">.s, SELECTf32rr>; +} + +// Comparison instructions + +// setcc convenience fragments. +def vsetoeq : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETOEQ)>; +def vsetogt : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETOGT)>; +def vsetoge : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETOGE)>; +def vsetolt : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETOLT)>; +def vsetole : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETOLE)>; +def vsetone : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETONE)>; +def vseto : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETO)>; +def vsetuo : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETUO)>; +def vsetueq : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETUEQ)>; +def vsetugt : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETUGT)>; +def vsetuge : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETUGE)>; +def vsetult : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETULT)>; +def vsetule : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETULE)>; +def vsetune : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETUNE)>; +def vseteq : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETEQ)>; +def vsetgt : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETGT)>; +def vsetge : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETGE)>; +def vsetlt : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETLT)>; +def vsetle : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETLE)>; +def vsetne : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETNE)>; + +class Vec_Compare<PatFrag op, NVPTXRegClass outrclass, NVPTXRegClass inrclass, + NVPTXInst sop> + : NVPTXVecInst<(outs outrclass:$dst), + (ins inrclass:$a, inrclass:$b), + "Unsupported", + [(set outrclass:$dst, (op inrclass:$a, inrclass:$b))], + sop>; + +multiclass Vec_Compare_All<PatFrag op, + NVPTXInst inst8, + NVPTXInst inst16, + NVPTXInst inst32, + NVPTXInst inst64> +{ + def V2I8 : Vec_Compare<op, V2I8Regs, V2I8Regs, inst8>; + def V4I8 : Vec_Compare<op, V4I8Regs, V4I8Regs, inst8>; + def V2I16 : Vec_Compare<op, V2I16Regs, V2I16Regs, inst16>; + def V4I16 : Vec_Compare<op, V4I16Regs, V4I16Regs, inst16>; + def V2I32 : Vec_Compare<op, V2I32Regs, V2I32Regs, inst32>; + def V4I32 : Vec_Compare<op, V4I32Regs, V4I32Regs, inst32>; + def V2I64 : Vec_Compare<op, V2I64Regs, V2I64Regs, inst64>; +} + +let VecInstType=isVecOther.Value in { + defm VecSGT : Vec_Compare_All<vsetgt, ISetSGTi8rr_toi8, ISetSGTi16rr_toi16, + ISetSGTi32rr_toi32, ISetSGTi64rr_toi64>; + defm VecUGT : Vec_Compare_All<vsetugt, ISetUGTi8rr_toi8, ISetUGTi16rr_toi16, + ISetUGTi32rr_toi32, ISetUGTi64rr_toi64>; + defm VecSLT : Vec_Compare_All<vsetlt, ISetSLTi8rr_toi8, ISetSLTi16rr_toi16, + ISetSLTi32rr_toi32, ISetSLTi64rr_toi64>; + defm VecULT : Vec_Compare_All<vsetult, ISetULTi8rr_toi8, ISetULTi16rr_toi16, + ISetULTi32rr_toi32, ISetULTi64rr_toi64>; + defm VecSGE : Vec_Compare_All<vsetge, ISetSGEi8rr_toi8, ISetSGEi16rr_toi16, + ISetSGEi32rr_toi32, ISetSGEi64rr_toi64>; + defm VecUGE : Vec_Compare_All<vsetuge, ISetUGEi8rr_toi8, ISetUGEi16rr_toi16, + ISetUGEi32rr_toi32, ISetUGEi64rr_toi64>; + defm VecSLE : Vec_Compare_All<vsetle, ISetSLEi8rr_toi8, ISetSLEi16rr_toi16, + ISetSLEi32rr_toi32, ISetSLEi64rr_toi64>; + defm VecULE : Vec_Compare_All<vsetule, ISetULEi8rr_toi8, ISetULEi16rr_toi16, + ISetULEi32rr_toi32, ISetULEi64rr_toi64>; + defm VecSEQ : Vec_Compare_All<vseteq, ISetSEQi8rr_toi8, ISetSEQi16rr_toi16, + ISetSEQi32rr_toi32, ISetSEQi64rr_toi64>; + defm VecUEQ : Vec_Compare_All<vsetueq, ISetUEQi8rr_toi8, ISetUEQi16rr_toi16, + ISetUEQi32rr_toi32, ISetUEQi64rr_toi64>; + defm VecSNE : Vec_Compare_All<vsetne, ISetSNEi8rr_toi8, ISetSNEi16rr_toi16, + ISetSNEi32rr_toi32, ISetSNEi64rr_toi64>; + defm VecUNE : Vec_Compare_All<vsetune, ISetUNEi8rr_toi8, ISetUNEi16rr_toi16, + ISetUNEi32rr_toi32, ISetUNEi64rr_toi64>; +} + +multiclass FVec_Compare_All<PatFrag op, + NVPTXInst instf32, + NVPTXInst instf64> +{ + def V2F32 : Vec_Compare<op, V2I32Regs, V2F32Regs, instf32>; + def V4F32 : Vec_Compare<op, V4I32Regs, V4F32Regs, instf32>; + def V2F64 : Vec_Compare<op, V2I64Regs, V2F64Regs, instf64>; +} + +let VecInstType=isVecOther.Value in { + defm FVecGT : FVec_Compare_All<vsetogt, FSetGTf32rr_toi32, + FSetGTf64rr_toi64>; + defm FVecLT : FVec_Compare_All<vsetolt, FSetLTf32rr_toi32, + FSetLTf64rr_toi64>; + defm FVecGE : FVec_Compare_All<vsetoge, FSetGEf32rr_toi32, + FSetGEf64rr_toi64>; + defm FVecLE : FVec_Compare_All<vsetole, FSetLEf32rr_toi32, + FSetLEf64rr_toi64>; + defm FVecEQ : FVec_Compare_All<vsetoeq, FSetEQf32rr_toi32, + FSetEQf64rr_toi64>; + defm FVecNE : FVec_Compare_All<vsetone, FSetNEf32rr_toi32, + FSetNEf64rr_toi64>; + + defm FVecUGT : FVec_Compare_All<vsetugt, FSetUGTf32rr_toi32, + FSetUGTf64rr_toi64>; + defm FVecULT : FVec_Compare_All<vsetult, FSetULTf32rr_toi32, + FSetULTf64rr_toi64>; + defm FVecUGE : FVec_Compare_All<vsetuge, FSetUGEf32rr_toi32, + FSetUGEf64rr_toi64>; + defm FVecULE : FVec_Compare_All<vsetule, FSetULEf32rr_toi32, + FSetULEf64rr_toi64>; + defm FVecUEQ : FVec_Compare_All<vsetueq, FSetUEQf32rr_toi32, + FSetUEQf64rr_toi64>; + defm FVecUNE : FVec_Compare_All<vsetune, FSetUNEf32rr_toi32, + FSetUNEf64rr_toi64>; + + defm FVecNUM : FVec_Compare_All<vseto, FSetNUMf32rr_toi32, + FSetNUMf64rr_toi64>; + defm FVecNAN : FVec_Compare_All<vsetuo, FSetNANf32rr_toi32, + FSetNANf64rr_toi64>; +} + +class LoadParamScalar4Inst<NVPTXRegClass regclass, string opstr> : + NVPTXInst<(outs regclass:$d1, regclass:$d2, regclass:$d3, regclass:$d4), + (ins i32imm:$a, i32imm:$b), + !strconcat(!strconcat("ld.param", opstr), + "\t{{$d1, $d2, $d3, $d4}}, [retval0+$b];"), []>; + +class LoadParamScalar2Inst<NVPTXRegClass regclass, string opstr> : + NVPTXInst<(outs regclass:$d1, regclass:$d2), + (ins i32imm:$a, i32imm:$b), + !strconcat(!strconcat("ld.param", opstr), + "\t{{$d1, $d2}}, [retval0+$b];"), []>; + + +class StoreParamScalar4Inst<NVPTXRegClass regclass, string opstr> : + NVPTXInst<(outs), + (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4, + i32imm:$a, i32imm:$b), + !strconcat(!strconcat("st.param", opstr), + "\t[param$a+$b], {{$s1, $s2, $s3, $s4}};"), []>; + +class StoreParamScalar2Inst<NVPTXRegClass regclass, string opstr> : + NVPTXInst<(outs), + (ins regclass:$s1, regclass:$s2, i32imm:$a, i32imm:$b), + !strconcat(!strconcat("st.param", opstr), + "\t[param$a+$b], {{$s1, $s2}};"), []>; + +class StoreRetvalScalar4Inst<NVPTXRegClass regclass, string opstr> : + NVPTXInst<(outs), + (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4, + i32imm:$a), + !strconcat(!strconcat("st.param", opstr), + "\t[func_retval+$a], {{$s1, $s2, $s3, $s4}};"), []>; + +class StoreRetvalScalar2Inst<NVPTXRegClass regclass, string opstr> : + NVPTXInst<(outs), + (ins regclass:$s1, regclass:$s2, i32imm:$a), + !strconcat(!strconcat("st.param", opstr), + "\t[func_retval+$a], {{$s1, $s2}};"), []>; + +def LoadParamScalar4I32 : LoadParamScalar4Inst<Int32Regs, ".v4.b32">; +def LoadParamScalar4I16 : LoadParamScalar4Inst<Int16Regs, ".v4.b16">; +def LoadParamScalar4I8 : LoadParamScalar4Inst<Int8Regs, ".v4.b8">; + +def LoadParamScalar2I64 : LoadParamScalar2Inst<Int32Regs, ".v2.b64">; +def LoadParamScalar2I32 : LoadParamScalar2Inst<Int32Regs, ".v2.b32">; +def LoadParamScalar2I16 : LoadParamScalar2Inst<Int32Regs, ".v2.b16">; +def LoadParamScalar2I8 : LoadParamScalar2Inst<Int32Regs, ".v2.b8">; + +def LoadParamScalar4F32 : LoadParamScalar4Inst<Float32Regs, ".v4.f32">; +def LoadParamScalar2F32 : LoadParamScalar2Inst<Float32Regs, ".v2.f32">; +def LoadParamScalar2F64 : LoadParamScalar2Inst<Float64Regs, ".v2.f64">; + +def StoreParamScalar4I32 : StoreParamScalar4Inst<Int32Regs, ".v4.b32">; +def StoreParamScalar4I16 : StoreParamScalar4Inst<Int16Regs, ".v4.b16">; +def StoreParamScalar4I8 : StoreParamScalar4Inst<Int8Regs, ".v4.b8">; + +def StoreParamScalar2I64 : StoreParamScalar2Inst<Int64Regs, ".v2.b64">; +def StoreParamScalar2I32 : StoreParamScalar2Inst<Int32Regs, ".v2.b32">; +def StoreParamScalar2I16 : StoreParamScalar2Inst<Int16Regs, ".v2.b16">; +def StoreParamScalar2I8 : StoreParamScalar2Inst<Int8Regs, ".v2.b8">; + +def StoreParamScalar4F32 : StoreParamScalar4Inst<Float32Regs, ".v4.f32">; +def StoreParamScalar2F32 : StoreParamScalar2Inst<Float32Regs, ".v2.f32">; +def StoreParamScalar2F64 : StoreParamScalar2Inst<Float64Regs, ".v2.f64">; + +def StoreRetvalScalar4I32 : StoreRetvalScalar4Inst<Int32Regs, ".v4.b32">; +def StoreRetvalScalar4I16 : StoreRetvalScalar4Inst<Int16Regs, ".v4.b16">; +def StoreRetvalScalar4I8 : StoreRetvalScalar4Inst<Int8Regs, ".v4.b8">; + +def StoreRetvalScalar2I64 : StoreRetvalScalar2Inst<Int64Regs, ".v2.b64">; +def StoreRetvalScalar2I32 : StoreRetvalScalar2Inst<Int32Regs, ".v2.b32">; +def StoreRetvalScalar2I16 : StoreRetvalScalar2Inst<Int16Regs, ".v2.b16">; +def StoreRetvalScalar2I8 : StoreRetvalScalar2Inst<Int8Regs, ".v2.b8">; + +def StoreRetvalScalar4F32 : StoreRetvalScalar4Inst<Float32Regs, ".v4.f32">; +def StoreRetvalScalar2F32 : StoreRetvalScalar2Inst<Float32Regs, ".v2.f32">; +def StoreRetvalScalar2F64 : StoreRetvalScalar2Inst<Float64Regs, ".v2.f64">; + +class LoadParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>: + NVPTXVecInst<(outs regclass:$dst), (ins i32imm:$a, i32imm:$b), + "loadparam : $dst <- [$a, $b]", + [(set regclass:$dst, (LoadParam (i32 imm:$a), (i32 imm:$b)))], + sop>; + +class StoreParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP> + : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), + "storeparam : [$a, $b] <- $val", + [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)], sop>; + +class StoreRetvalVecInst<NVPTXRegClass regclass, string opstr, + NVPTXInst sop=NOP> + : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a), + "storeretval : retval[$a] <- $val", + [(StoreRetval (i32 imm:$a), regclass:$val)], sop>; + +let VecInstType=isVecLD.Value in { +def LoadParamV4I32 : LoadParamVecInst<V4I32Regs, ".v4.b32", + LoadParamScalar4I32>; +def LoadParamV4I16 : LoadParamVecInst<V4I16Regs, ".v4.b16", + LoadParamScalar4I16>; +def LoadParamV4I8 : LoadParamVecInst<V4I8Regs, ".v4.b8", + LoadParamScalar4I8>; + +def LoadParamV2I64 : LoadParamVecInst<V2I64Regs, ".v2.b64", + LoadParamScalar2I64>; +def LoadParamV2I32 : LoadParamVecInst<V2I32Regs, ".v2.b32", + LoadParamScalar2I32>; +def LoadParamV2I16 : LoadParamVecInst<V2I16Regs, ".v2.b16", + LoadParamScalar2I16>; +def LoadParamV2I8 : LoadParamVecInst<V2I8Regs, ".v2.b8", + LoadParamScalar2I8>; + +def LoadParamV4F32 : LoadParamVecInst<V4F32Regs, ".v4.f32", + LoadParamScalar4F32>; +def LoadParamV2F32 : LoadParamVecInst<V2F32Regs, ".v2.f32", + LoadParamScalar2F32>; +def LoadParamV2F64 : LoadParamVecInst<V2F64Regs, ".v2.f64", + LoadParamScalar2F64>; +} + +let VecInstType=isVecST.Value in { +def StoreParamV4I32 : StoreParamVecInst<V4I32Regs, ".v4.b32", + StoreParamScalar4I32>; +def StoreParamV4I16 : StoreParamVecInst<V4I16Regs, ".v4.b16", + StoreParamScalar4I16>; +def StoreParamV4I8 : StoreParamVecInst<V4I8Regs, ".v4.b8", + StoreParamScalar4I8>; + +def StoreParamV2I64 : StoreParamVecInst<V2I64Regs, ".v2.b64", + StoreParamScalar2I64>; +def StoreParamV2I32 : StoreParamVecInst<V2I32Regs, ".v2.b32", + StoreParamScalar2I32>; +def StoreParamV2I16 : StoreParamVecInst<V2I16Regs, ".v2.b16", + StoreParamScalar2I16>; +def StoreParamV2I8 : StoreParamVecInst<V2I8Regs, ".v2.b8", + StoreParamScalar2I8>; + +def StoreParamV4F32 : StoreParamVecInst<V4F32Regs, ".v4.f32", + StoreParamScalar4F32>; +def StoreParamV2F32 : StoreParamVecInst<V2F32Regs, ".v2.f32", + StoreParamScalar2F32>; +def StoreParamV2F64 : StoreParamVecInst<V2F64Regs, ".v2.f64", + StoreParamScalar2F64>; + +def StoreRetvalV4I32 : StoreRetvalVecInst<V4I32Regs, ".v4.b32", + StoreRetvalScalar4I32>; +def StoreRetvalV4I16 : StoreRetvalVecInst<V4I16Regs, ".v4.b16", + StoreRetvalScalar4I16>; +def StoreRetvalV4I8 : StoreRetvalVecInst<V4I8Regs, ".v4.b8", + StoreRetvalScalar4I8>; + +def StoreRetvalV2I64 : StoreRetvalVecInst<V2I64Regs, ".v2.b64", + StoreRetvalScalar2I64>; +def StoreRetvalV2I32 : StoreRetvalVecInst<V2I32Regs, ".v2.b32", + StoreRetvalScalar2I32>; +def StoreRetvalV2I16 : StoreRetvalVecInst<V2I16Regs, ".v2.b16", + StoreRetvalScalar2I16>; +def StoreRetvalV2I8 : StoreRetvalVecInst<V2I8Regs, ".v2.b8", + StoreRetvalScalar2I8>; + +def StoreRetvalV4F32 : StoreRetvalVecInst<V4F32Regs, ".v4.f32", + StoreRetvalScalar4F32>; +def StoreRetvalV2F32 : StoreRetvalVecInst<V2F32Regs, ".v2.f32", + StoreRetvalScalar2F32>; +def StoreRetvalV2F64 : StoreRetvalVecInst<V2F64Regs, ".v2.f64", + StoreRetvalScalar2F64>; + +} + + +// Int vector to int scalar bit convert +// v4i8 -> i32 +def : Pat<(i32 (bitconvert V4I8Regs:$s)), + (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), + (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3))>; +// v4i16 -> i64 +def : Pat<(i64 (bitconvert V4I16Regs:$s)), + (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), + (V4i16Extract V4I16Regs:$s,1), + (V4i16Extract V4I16Regs:$s,2), + (V4i16Extract V4I16Regs:$s,3))>; +// v2i8 -> i16 +def : Pat<(i16 (bitconvert V2I8Regs:$s)), + (V2I8toI16 (V2i8Extract V2I8Regs:$s,0), (V2i8Extract V2I8Regs:$s,1))>; +// v2i16 -> i32 +def : Pat<(i32 (bitconvert V2I16Regs:$s)), + (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), + (V2i16Extract V2I16Regs:$s,1))>; +// v2i32 -> i64 +def : Pat<(i64 (bitconvert V2I32Regs:$s)), + (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), + (V2i32Extract V2I32Regs:$s,1))>; + +// Int scalar to int vector bit convert +let VecInstType=isVecDest.Value in { +// i32 -> v4i8 +def VecI32toV4I8 : NVPTXVecInst<(outs V4I8Regs:$d), (ins Int32Regs:$s), + "Error!", + [(set V4I8Regs:$d, (bitconvert Int32Regs:$s))], + I32toV4I8>; +// i64 -> v4i16 +def VecI64toV4I16 : NVPTXVecInst<(outs V4I16Regs:$d), (ins Int64Regs:$s), + "Error!", + [(set V4I16Regs:$d, (bitconvert Int64Regs:$s))], + I64toV4I16>; +// i16 -> v2i8 +def VecI16toV2I8 : NVPTXVecInst<(outs V2I8Regs:$d), (ins Int16Regs:$s), + "Error!", + [(set V2I8Regs:$d, (bitconvert Int16Regs:$s))], + I16toV2I8>; +// i32 -> v2i16 +def VecI32toV2I16 : NVPTXVecInst<(outs V2I16Regs:$d), (ins Int32Regs:$s), + "Error!", + [(set V2I16Regs:$d, (bitconvert Int32Regs:$s))], + I32toV2I16>; +// i64 -> v2i32 +def VecI64toV2I32 : NVPTXVecInst<(outs V2I32Regs:$d), (ins Int64Regs:$s), + "Error!", + [(set V2I32Regs:$d, (bitconvert Int64Regs:$s))], + I64toV2I32>; +} + +// Int vector to int vector bit convert +// v4i8 -> v2i16 +def : Pat<(v2i16 (bitconvert V4I8Regs:$s)), + (VecI32toV2I16 + (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), + (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>; +// v4i16 -> v2i32 +def : Pat<(v2i32 (bitconvert V4I16Regs:$s)), + (VecI64toV2I32 + (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1), + (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>; +// v2i16 -> v4i8 +def : Pat<(v4i8 (bitconvert V2I16Regs:$s)), + (VecI32toV4I8 + (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>; +// v2i32 -> v4i16 +def : Pat<(v4i16 (bitconvert V2I32Regs:$s)), + (VecI64toV4I16 + (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>; +// v2i64 -> v4i32 +def : Pat<(v4i32 (bitconvert V2I64Regs:$s)), + (Build_Vector4_i32 + (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 0), + (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 1), + (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 0), + (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 1))>; +// v4i32 -> v2i64 +def : Pat<(v2i64 (bitconvert V4I32Regs:$s)), + (Build_Vector2_i64 + (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), (V4i32Extract V4I32Regs:$s,1)), + (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), (V4i32Extract V4I32Regs:$s,3)))>; + +// Fp scalar to fp vector convert +// f64 -> v2f32 +let VecInstType=isVecDest.Value in { +def VecF64toV2F32 : NVPTXVecInst<(outs V2F32Regs:$d), (ins Float64Regs:$s), + "Error!", + [(set V2F32Regs:$d, (bitconvert Float64Regs:$s))], + F64toV2F32>; +} + +// Fp vector to fp scalar convert +// v2f32 -> f64 +def : Pat<(f64 (bitconvert V2F32Regs:$s)), + (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1))>; + +// Fp scalar to int vector convert +// f32 -> v4i8 +def : Pat<(v4i8 (bitconvert Float32Regs:$s)), + (VecI32toV4I8 (BITCONVERT_32_F2I Float32Regs:$s))>; +// f32 -> v2i16 +def : Pat<(v2i16 (bitconvert Float32Regs:$s)), + (VecI32toV2I16 (BITCONVERT_32_F2I Float32Regs:$s))>; +// f64 -> v4i16 +def : Pat<(v4i16 (bitconvert Float64Regs:$s)), + (VecI64toV4I16 (BITCONVERT_64_F2I Float64Regs:$s))>; +// f64 -> v2i32 +def : Pat<(v2i32 (bitconvert Float64Regs:$s)), + (VecI64toV2I32 (BITCONVERT_64_F2I Float64Regs:$s))>; + +// Int vector to fp scalar convert +// v4i8 -> f32 +def : Pat<(f32 (bitconvert V4I8Regs:$s)), + (BITCONVERT_32_I2F + (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), + (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>; +// v4i16 -> f64 +def : Pat<(f64 (bitconvert V4I16Regs:$s)), + (BITCONVERT_64_I2F + (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1), + (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>; +// v2i16 -> f32 +def : Pat<(f32 (bitconvert V2I16Regs:$s)), + (BITCONVERT_32_I2F + (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>; +// v2i32 -> f64 +def : Pat<(f64 (bitconvert V2I32Regs:$s)), + (BITCONVERT_64_I2F + (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>; + +// Int scalar to fp vector convert +// i64 -> v2f32 +def : Pat<(v2f32 (bitconvert Int64Regs:$s)), + (VecF64toV2F32 (BITCONVERT_64_I2F Int64Regs:$s))>; + +// Fp vector to int scalar convert +// v2f32 -> i64 +def : Pat<(i64 (bitconvert V2F32Regs:$s)), + (BITCONVERT_64_F2I + (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1)))>; + +// Int vector to fp vector convert +// v2i64 -> v4f32 +def : Pat<(v4f32 (bitconvert V2I64Regs:$s)), + (Build_Vector4_f32 + (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 + (V2i64Extract V2I64Regs:$s, 0)), 0)), + (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 + (V2i64Extract V2I64Regs:$s, 0)), 1)), + (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 + (V2i64Extract V2I64Regs:$s, 1)), 0)), + (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 + (V2i64Extract V2I64Regs:$s, 1)), 1)))>; +// v2i64 -> v2f64 +def : Pat<(v2f64 (bitconvert V2I64Regs:$s)), + (Build_Vector2_f64 + (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,0)), + (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,1)))>; +// v2i32 -> v2f32 +def : Pat<(v2f32 (bitconvert V2I32Regs:$s)), + (Build_Vector2_f32 + (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,0)), + (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,1)))>; +// v4i32 -> v2f64 +def : Pat<(v2f64 (bitconvert V4I32Regs:$s)), + (Build_Vector2_f64 + (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), + (V4i32Extract V4I32Regs:$s,1))), + (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), + (V4i32Extract V4I32Regs:$s,3))))>; +// v4i32 -> v4f32 +def : Pat<(v4f32 (bitconvert V4I32Regs:$s)), + (Build_Vector4_f32 + (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,0)), + (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,1)), + (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,2)), + (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,3)))>; +// v4i16 -> v2f32 +def : Pat<(v2f32 (bitconvert V4I16Regs:$s)), + (VecF64toV2F32 (BITCONVERT_64_I2F + (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), + (V4i16Extract V4I16Regs:$s,1), + (V4i16Extract V4I16Regs:$s,2), + (V4i16Extract V4I16Regs:$s,3))))>; + +// Fp vector to int vector convert +// v2i64 <- v4f32 +def : Pat<(v2i64 (bitconvert V4F32Regs:$s)), + (Build_Vector2_i64 + (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,0), + (V4f32Extract V4F32Regs:$s,1))), + (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,2), + (V4f32Extract V4F32Regs:$s,3))))>; +// v2i64 <- v2f64 +def : Pat<(v2i64 (bitconvert V2F64Regs:$s)), + (Build_Vector2_i64 + (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,0)), + (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,1)))>; +// v2i32 <- v2f32 +def : Pat<(v2i32 (bitconvert V2F32Regs:$s)), + (Build_Vector2_i32 + (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,0)), + (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,1)))>; +// v4i32 <- v2f64 +def : Pat<(v4i32 (bitconvert V2F64Regs:$s)), + (Build_Vector4_i32 + (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 + (V2f64Extract V2F64Regs:$s, 0)), 0)), + (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 + (V2f64Extract V2F64Regs:$s, 0)), 1)), + (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 + (V2f64Extract V2F64Regs:$s, 1)), 0)), + (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 + (V2f64Extract V2F64Regs:$s, 1)), 1)))>; +// v4i32 <- v4f32 +def : Pat<(v4i32 (bitconvert V4F32Regs:$s)), + (Build_Vector4_i32 + (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,0)), + (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,1)), + (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,2)), + (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,3)))>; +// v4i16 <- v2f32 +def : Pat<(v4i16 (bitconvert V2F32Regs:$s)), + (VecI64toV4I16 (BITCONVERT_64_F2I + (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), + (V2f32Extract V2F32Regs:$s,1))))>; diff --git a/lib/Target/NVPTX/NVPTXutil.cpp b/lib/Target/NVPTX/NVPTXutil.cpp new file mode 100644 index 0000000..de311d2 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXutil.cpp @@ -0,0 +1,91 @@ +//===-- NVPTXutil.cpp - Functions exported to CodeGen --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the functions that can be used in CodeGen. +// +//===----------------------------------------------------------------------===// + +#include "NVPTXutil.h" +#include "NVPTX.h" + +using namespace llvm; + +namespace llvm { + +bool isParamLoad(const MachineInstr *MI) +{ + if ((MI->getOpcode() != NVPTX::LD_i32_avar) && + (MI->getOpcode() != NVPTX::LD_i64_avar)) + return false; + if (MI->getOperand(2).isImm() == false) + return false; + if (MI->getOperand(2).getImm() != NVPTX::PTXLdStInstCode::PARAM) + return false; + return true; +} + +#define DATA_MASK 0x7f +#define DIGIT_WIDTH 7 +#define MORE_BYTES 0x80 + +static int encode_leb128(uint64_t val, int *nbytes, + char *space, int splen) +{ + char *a; + char *end = space + splen; + + a = space; + do { + unsigned char uc; + + if (a >= end) + return 1; + uc = val & DATA_MASK; + val >>= DIGIT_WIDTH; + if (val != 0) + uc |= MORE_BYTES; + *a = uc; + a++; + } while (val); + *nbytes = a - space; + return 0; +} + +#undef DATA_MASK +#undef DIGIT_WIDTH +#undef MORE_BYTES + +uint64_t encode_leb128(const char *str) +{ + union { uint64_t x; char a[8]; } temp64; + + temp64.x = 0; + + for (unsigned i=0,e=strlen(str); i!=e; ++i) + temp64.a[i] = str[e-1-i]; + + char encoded[16]; + int nbytes; + + int retval = encode_leb128(temp64.x, &nbytes, encoded, 16); + + assert(retval == 0 && + "Encoding to leb128 failed"); + + assert(nbytes <= 8 && + "Cannot support register names with leb128 encoding > 8 bytes"); + + temp64.x = 0; + for (int i=0; i<nbytes; ++i) + temp64.a[i] = encoded[i]; + + return temp64.x; +} + +} // end namespace llvm diff --git a/lib/Target/NVPTX/NVPTXutil.h b/lib/Target/NVPTX/NVPTXutil.h new file mode 100644 index 0000000..d1d1171 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXutil.h @@ -0,0 +1,25 @@ +//===-- NVPTXutil.h - Functions exported to CodeGen --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the functions that can be used in CodeGen. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_NVPTX_UTIL_H +#define LLVM_TARGET_NVPTX_UTIL_H + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" + +namespace llvm { +bool isParamLoad(const MachineInstr *); +uint64_t encode_leb128(const char *str); +} + +#endif diff --git a/lib/Target/NVPTX/TargetInfo/CMakeLists.txt b/lib/Target/NVPTX/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000..0bf1334 --- /dev/null +++ b/lib/Target/NVPTX/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +#include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMNVPTXInfo + NVPTXTargetInfo.cpp + ) + +add_dependencies(LLVMNVPTXInfo NVPTXCommonTableGen) diff --git a/lib/Target/NVPTX/TargetInfo/LLVMBuild.txt b/lib/Target/NVPTX/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000..ef12b0e --- /dev/null +++ b/lib/Target/NVPTX/TargetInfo/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/NVPTX/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = NVPTXInfo +parent = NVPTX +required_libraries = MC Support Target +add_to_library_groups = NVPTX diff --git a/lib/Target/NVPTX/TargetInfo/Makefile b/lib/Target/NVPTX/TargetInfo/Makefile new file mode 100644 index 0000000..8622315 --- /dev/null +++ b/lib/Target/NVPTX/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/NVPTX/TargetInfo/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMNVPTXInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp new file mode 100644 index 0000000..f3624b9 --- /dev/null +++ b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp @@ -0,0 +1,23 @@ +//===-- NVPTXTargetInfo.cpp - NVPTX Target Implementation -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "llvm/Module.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheNVPTXTarget32; +Target llvm::TheNVPTXTarget64; + +extern "C" void LLVMInitializeNVPTXTargetInfo() { + RegisterTarget<Triple::nvptx> X(TheNVPTXTarget32, "nvptx", + "NVIDIA PTX 32-bit"); + RegisterTarget<Triple::nvptx64> Y(TheNVPTXTarget64, "nvptx64", + "NVIDIA PTX 64-bit"); +} diff --git a/lib/Target/NVPTX/VectorElementize.cpp b/lib/Target/NVPTX/VectorElementize.cpp new file mode 100644 index 0000000..a0152bf --- /dev/null +++ b/lib/Target/NVPTX/VectorElementize.cpp @@ -0,0 +1,1250 @@ +//===-- VectorElementize.cpp - Remove unreachable blocks for codegen --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass converts operations on vector types to operations on their +// element types. +// +// For generic binary and unary vector instructions, the conversion is simple. +// Suppose we have +// av = bv Vop cv +// where av, bv, and cv are vector virtual registers, and Vop is a vector op. +// This gets converted to the following : +// a1 = b1 Sop c1 +// a2 = b2 Sop c2 +// +// VectorToScalarMap maintains the vector vreg to scalar vreg mapping. +// For the above example, the map will look as follows: +// av => [a1, a2] +// bv => [b1, b2] +// +// In addition, initVectorInfo creates the following opcode->opcode map. +// Vop => Sop +// OtherVop => OtherSop +// ... +// +// For vector specific instructions like vecbuild, vecshuffle etc, the +// conversion is different. Look at comments near the functions with +// prefix createVec<...>. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/Constant.h" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "NVPTX.h" +#include "NVPTXTargetMachine.h" + +using namespace llvm; + +namespace { + +class LLVM_LIBRARY_VISIBILITY VectorElementize : public MachineFunctionPass { + virtual bool runOnMachineFunction(MachineFunction &F); + + NVPTXTargetMachine &TM; + MachineRegisterInfo *MRI; + const NVPTXRegisterInfo *RegInfo; + const NVPTXInstrInfo *InstrInfo; + + llvm::DenseMap<const TargetRegisterClass *, const TargetRegisterClass *> + RegClassMap; + llvm::DenseMap<unsigned, bool> SimpleMoveMap; + + llvm::DenseMap<unsigned, SmallVector<unsigned, 4> > VectorToScalarMap; + + bool isVectorInstr(MachineInstr *); + + SmallVector<unsigned, 4> getScalarRegisters(unsigned); + unsigned getScalarVersion(unsigned); + unsigned getScalarVersion(MachineInstr *); + + bool isVectorRegister(unsigned); + const TargetRegisterClass *getScalarRegClass(const TargetRegisterClass *RC); + unsigned numCopiesNeeded(MachineInstr *); + + void createLoadCopy(MachineFunction&, MachineInstr *, + std::vector<MachineInstr *>&); + void createStoreCopy(MachineFunction&, MachineInstr *, + std::vector<MachineInstr *>&); + + void createVecDest(MachineFunction&, MachineInstr *, + std::vector<MachineInstr *>&); + + void createCopies(MachineFunction&, MachineInstr *, + std::vector<MachineInstr *>&); + + unsigned copyProp(MachineFunction&); + unsigned removeDeadMoves(MachineFunction&); + + void elementize(MachineFunction&); + + bool isSimpleMove(MachineInstr *); + + void createVecShuffle(MachineFunction& F, MachineInstr *Instr, + std::vector<MachineInstr *>& copies); + + void createVecExtract(MachineFunction& F, MachineInstr *Instr, + std::vector<MachineInstr *>& copies); + + void createVecInsert(MachineFunction& F, MachineInstr *Instr, + std::vector<MachineInstr *>& copies); + + void createVecBuild(MachineFunction& F, MachineInstr *Instr, + std::vector<MachineInstr *>& copies); + +public: + + static char ID; // Pass identification, replacement for typeid + VectorElementize(NVPTXTargetMachine &tm) + : MachineFunctionPass(ID), TM(tm) {} + + virtual const char *getPassName() const { + return "Convert LLVM vector types to their element types"; + } +}; + +char VectorElementize::ID = 1; +} + +static cl::opt<bool> +RemoveRedundantMoves("nvptx-remove-redundant-moves", + cl::desc("NVPTX: Remove redundant moves introduced by vector lowering"), + cl::init(true)); + +#define VECINST(x) ((((x)->getDesc().TSFlags) & NVPTX::VecInstTypeMask) \ + >> NVPTX::VecInstTypeShift) +#define ISVECINST(x) (VECINST(x) != NVPTX::VecNOP) +#define ISVECLOAD(x) (VECINST(x) == NVPTX::VecLoad) +#define ISVECSTORE(x) (VECINST(x) == NVPTX::VecStore) +#define ISVECBUILD(x) (VECINST(x) == NVPTX::VecBuild) +#define ISVECSHUFFLE(x) (VECINST(x) == NVPTX::VecShuffle) +#define ISVECEXTRACT(x) (VECINST(x) == NVPTX::VecExtract) +#define ISVECINSERT(x) (VECINST(x) == NVPTX::VecInsert) +#define ISVECDEST(x) (VECINST(x) == NVPTX::VecDest) + +bool VectorElementize::isSimpleMove(MachineInstr *mi) { + if (mi->isCopy()) + return true; + unsigned TSFlags = (mi->getDesc().TSFlags & NVPTX::SimpleMoveMask) + >> NVPTX::SimpleMoveShift; + return (TSFlags == 1); +} + +bool VectorElementize::isVectorInstr(MachineInstr *mi) { + if ((mi->getOpcode() == NVPTX::PHI) || + (mi->getOpcode() == NVPTX::IMPLICIT_DEF) || mi->isCopy()) { + MachineOperand dest = mi->getOperand(0); + return isVectorRegister(dest.getReg()); + } + return ISVECINST(mi); +} + +unsigned VectorElementize::getScalarVersion(MachineInstr *mi) { + return getScalarVersion(mi->getOpcode()); +} + +///============================================================================= +///Instr is assumed to be a vector instruction. For most vector instructions, +///the size of the destination vector register gives the number of scalar copies +///needed. For VecStore, size of getOperand(1) gives the number of scalar copies +///needed. For VecExtract, the dest is a scalar. So getOperand(1) gives the +///number of scalar copies needed. +///============================================================================= +unsigned VectorElementize::numCopiesNeeded(MachineInstr *Instr) { + unsigned numDefs=0; + unsigned def; + for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) { + MachineOperand oper = Instr->getOperand(i); + + if (!oper.isReg()) continue; + if (!oper.isDef()) continue; + def = i; + numDefs++; + } + assert((numDefs <= 1) && "Only 0 or 1 defs supported"); + + if (numDefs == 1) { + unsigned regnum = Instr->getOperand(def).getReg(); + if (ISVECEXTRACT(Instr)) + regnum = Instr->getOperand(1).getReg(); + return getNVPTXVectorSize(MRI->getRegClass(regnum)); + } + else if (numDefs == 0) { + assert(ISVECSTORE(Instr) + && "Only 0 def instruction supported is vector store"); + + unsigned regnum = Instr->getOperand(0).getReg(); + return getNVPTXVectorSize(MRI->getRegClass(regnum)); + } + return 1; +} + +const TargetRegisterClass *VectorElementize:: +getScalarRegClass(const TargetRegisterClass *RC) { + assert(isNVPTXVectorRegClass(RC) && + "Not a vector register class"); + return getNVPTXElemClass(RC); +} + +bool VectorElementize::isVectorRegister(unsigned reg) { + const TargetRegisterClass *RC=MRI->getRegClass(reg); + return isNVPTXVectorRegClass(RC); +} + +///============================================================================= +///For every vector register 'v' that is not already in the VectorToScalarMap, +///create n scalar registers of the corresponding element type, where n +///is 2 or 4 (getNVPTXVectorSize) and add it VectorToScalarMap. +///============================================================================= +SmallVector<unsigned, 4> VectorElementize::getScalarRegisters(unsigned regnum) { + assert(isVectorRegister(regnum) && "Expecting a vector register here"); + // Create the scalar registers and put them in the map, if not already there. + if (VectorToScalarMap.find(regnum) == VectorToScalarMap.end()) { + const TargetRegisterClass *vecClass = MRI->getRegClass(regnum); + const TargetRegisterClass *scalarClass = getScalarRegClass(vecClass); + + SmallVector<unsigned, 4> temp; + + for (unsigned i=0, e=getNVPTXVectorSize(vecClass); i!=e; ++i) + temp.push_back(MRI->createVirtualRegister(scalarClass)); + + VectorToScalarMap[regnum] = temp; + } + return VectorToScalarMap[regnum]; +} + +///============================================================================= +///For a vector load of the form +///va <= ldv2 [addr] +///the following multi output instruction is created : +///[v1, v2] <= LD [addr] +///Look at NVPTXVector.td for the definitions of multi output loads. +///============================================================================= +void VectorElementize::createLoadCopy(MachineFunction& F, MachineInstr *Instr, + std::vector<MachineInstr *>& copies) { + copies.push_back(F.CloneMachineInstr(Instr)); + + MachineInstr *copy=copies[0]; + copy->setDesc(InstrInfo->get(getScalarVersion(copy))); + + // Remove the dest, that should be a vector operand. + MachineOperand dest = copy->getOperand(0); + unsigned regnum = dest.getReg(); + + SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum); + copy->RemoveOperand(0); + + std::vector<MachineOperand> otherOperands; + for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) + otherOperands.push_back(copy->getOperand(i)); + + for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) + copy->RemoveOperand(0); + + for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) { + copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true)); + } + + for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) + copy->addOperand(otherOperands[i]); + +} + +///============================================================================= +///For a vector store of the form +///stv2 va, [addr] +///the following multi input instruction is created : +///ST v1, v2, [addr] +///Look at NVPTXVector.td for the definitions of multi input stores. +///============================================================================= +void VectorElementize::createStoreCopy(MachineFunction& F, MachineInstr *Instr, + std::vector<MachineInstr *>& copies) { + copies.push_back(F.CloneMachineInstr(Instr)); + + MachineInstr *copy=copies[0]; + copy->setDesc(InstrInfo->get(getScalarVersion(copy))); + + MachineOperand src = copy->getOperand(0); + unsigned regnum = src.getReg(); + + SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum); + copy->RemoveOperand(0); + + std::vector<MachineOperand> otherOperands; + for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) + otherOperands.push_back(copy->getOperand(i)); + + for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) + copy->RemoveOperand(0); + + for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) + copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], false)); + + for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) + copy->addOperand(otherOperands[i]); +} + +///============================================================================= +///va <= shufflev2 vb, vc, <i1>, <i2> +///gets converted to 2 moves into a1 and a2. The source of the moves depend on +///i1 and i2. i1, i2 can belong to the set {0, 1, 2, 3} for shufflev2. For +///shufflev4 the set is {0,..7}. For example, if i1=3, i2=0, the move +///instructions will be +///a1 <= c2 +///a2 <= b1 +///============================================================================= +void VectorElementize::createVecShuffle(MachineFunction& F, MachineInstr *Instr, + std::vector<MachineInstr *>& copies) { + unsigned numcopies=numCopiesNeeded(Instr); + + unsigned destregnum = Instr->getOperand(0).getReg(); + unsigned src1regnum = Instr->getOperand(1).getReg(); + unsigned src2regnum = Instr->getOperand(2).getReg(); + + SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum); + SmallVector<unsigned, 4> src1 = getScalarRegisters(src1regnum); + SmallVector<unsigned, 4> src2 = getScalarRegisters(src2regnum); + + DebugLoc DL = Instr->getDebugLoc(); + + for (unsigned i=0; i<numcopies; i++) { + MachineInstr *copy = BuildMI(F, DL, + InstrInfo->get(getScalarVersion(Instr)), dest[i]); + MachineOperand which=Instr->getOperand(3+i); + assert(which.isImm() && "Shuffle operand not a constant"); + + int src=which.getImm(); + int elem=src%numcopies; + + if (which.getImm() < numcopies) + copy->addOperand(MachineOperand::CreateReg(src1[elem], false)); + else + copy->addOperand(MachineOperand::CreateReg(src2[elem], false)); + copies.push_back(copy); + } +} + +///============================================================================= +///a <= extractv2 va, <i1> +///gets turned into a simple move to the scalar register a. The source depends +///on i1. +///============================================================================= +void VectorElementize::createVecExtract(MachineFunction& F, MachineInstr *Instr, + std::vector<MachineInstr *>& copies) { + unsigned srcregnum = Instr->getOperand(1).getReg(); + + SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum); + + MachineOperand which = Instr->getOperand(2); + assert(which.isImm() && "Extract operand not a constant"); + + DebugLoc DL = Instr->getDebugLoc(); + + MachineInstr *copy = BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), + Instr->getOperand(0).getReg()); + copy->addOperand(MachineOperand::CreateReg(src[which.getImm()], false)); + + copies.push_back(copy); +} + +///============================================================================= +///va <= vecinsertv2 vb, c, <i1> +///This instruction copies all elements of vb to va, except the 'i1'th element. +///The scalar value c becomes the 'i1'th element of va. +///This gets translated to 2 (4 for vecinsertv4) moves. +///============================================================================= +void VectorElementize::createVecInsert(MachineFunction& F, MachineInstr *Instr, + std::vector<MachineInstr *>& copies) { + unsigned numcopies=numCopiesNeeded(Instr); + + unsigned destregnum = Instr->getOperand(0).getReg(); + unsigned srcregnum = Instr->getOperand(1).getReg(); + + SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum); + SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum); + + MachineOperand which=Instr->getOperand(3); + assert(which.isImm() && "Insert operand not a constant"); + unsigned int elem=which.getImm(); + + DebugLoc DL = Instr->getDebugLoc(); + + for (unsigned i=0; i<numcopies; i++) { + MachineInstr *copy = BuildMI(F, DL, + InstrInfo->get(getScalarVersion(Instr)), dest[i]); + + if (i != elem) + copy->addOperand(MachineOperand::CreateReg(src[i], false)); + else + copy->addOperand(Instr->getOperand(2)); + + copies.push_back(copy); + } + +} + +///============================================================================= +///va <= buildv2 b1, b2 +///gets translated to +///a1 <= b1 +///a2 <= b2 +///============================================================================= +void VectorElementize::createVecBuild(MachineFunction& F, MachineInstr *Instr, + std::vector<MachineInstr *>& copies) { + unsigned numcopies=numCopiesNeeded(Instr); + + unsigned destregnum = Instr->getOperand(0).getReg(); + + SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum); + + DebugLoc DL = Instr->getDebugLoc(); + + for (unsigned i=0; i<numcopies; i++) { + MachineInstr *copy = BuildMI(F, DL, + InstrInfo->get(getScalarVersion(Instr)), dest[i]); + + copy->addOperand(Instr->getOperand(1+i)); + + copies.push_back(copy); + } + +} + +///============================================================================= +///For a tex inst of the form +///va <= op [scalar operands] +///the following multi output instruction is created : +///[v1, v2] <= op' [scalar operands] +///============================================================================= +void VectorElementize::createVecDest(MachineFunction& F, MachineInstr *Instr, + std::vector<MachineInstr *>& copies) { + copies.push_back(F.CloneMachineInstr(Instr)); + + MachineInstr *copy=copies[0]; + copy->setDesc(InstrInfo->get(getScalarVersion(copy))); + + // Remove the dest, that should be a vector operand. + MachineOperand dest = copy->getOperand(0); + unsigned regnum = dest.getReg(); + + SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum); + copy->RemoveOperand(0); + + std::vector<MachineOperand> otherOperands; + for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) + otherOperands.push_back(copy->getOperand(i)); + + for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) + copy->RemoveOperand(0); + + for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) + copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true)); + + for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) + copy->addOperand(otherOperands[i]); +} + +///============================================================================= +///Look at the vector instruction type and dispatch to the createVec<...> +///function that creates the scalar copies. +///============================================================================= +void VectorElementize::createCopies(MachineFunction& F, MachineInstr *Instr, + std::vector<MachineInstr *>& copies) { + if (ISVECLOAD(Instr)) { + createLoadCopy(F, Instr, copies); + return; + } + if (ISVECSTORE(Instr)) { + createStoreCopy(F, Instr, copies); + return; + } + if (ISVECSHUFFLE(Instr)) { + createVecShuffle(F, Instr, copies); + return; + } + if (ISVECEXTRACT(Instr)) { + createVecExtract(F, Instr, copies); + return; + } + if (ISVECINSERT(Instr)) { + createVecInsert(F, Instr, copies); + return; + } + if (ISVECDEST(Instr)) { + createVecDest(F, Instr, copies); + return; + } + if (ISVECBUILD(Instr)) { + createVecBuild(F, Instr, copies); + return; + } + + unsigned numcopies=numCopiesNeeded(Instr); + + for (unsigned i=0; i<numcopies; ++i) + copies.push_back(F.CloneMachineInstr(Instr)); + + for (unsigned i=0; i<numcopies; ++i) { + MachineInstr *copy = copies[i]; + + std::vector<MachineOperand> allOperands; + std::vector<bool> isDef; + + for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) { + MachineOperand oper = copy->getOperand(j); + allOperands.push_back(oper); + if (oper.isReg()) + isDef.push_back(oper.isDef()); + else + isDef.push_back(false); + } + + for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) + copy->RemoveOperand(0); + + copy->setDesc(InstrInfo->get(getScalarVersion(Instr))); + + for (unsigned j=0, e=allOperands.size(); j!=e; ++j) { + MachineOperand oper=allOperands[j]; + if (oper.isReg()) { + unsigned regnum = oper.getReg(); + if (isVectorRegister(regnum)) { + + SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum); + copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], isDef[j])); + } + else + copy->addOperand(oper); + } + else + copy->addOperand(oper); + } + } +} + +///============================================================================= +///Scan through all basic blocks, looking for vector instructions. +///For each vector instruction I, insert the scalar copies before I, and +///add I into toRemove vector. Finally remove all instructions in toRemove. +///============================================================================= +void VectorElementize::elementize(MachineFunction &F) { + for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); + BI!=BE; ++BI) { + MachineBasicBlock *BB = &*BI; + + std::vector<MachineInstr *> copies; + std::vector<MachineInstr *> toRemove; + + for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); + II!=IE; ++II) { + MachineInstr *Instr = &*II; + + if (!isVectorInstr(Instr)) + continue; + + copies.clear(); + createCopies(F, Instr, copies); + for (unsigned i=0, e=copies.size(); i!=e; ++i) + BB->insert(II, copies[i]); + + assert((copies.size() > 0) && "Problem in createCopies"); + toRemove.push_back(Instr); + } + for (unsigned i=0, e=toRemove.size(); i!=e; ++i) + F.DeleteMachineInstr(toRemove[i]->getParent()->remove(toRemove[i])); + } +} + +///============================================================================= +///a <= b +///... +///... +///x <= op(a, ...) +///gets converted to +/// +///x <= op(b, ...) +///The original move is still present. This works on SSA form machine code. +///Note that a <= b should be a simple vreg-to-vreg move instruction. +///TBD : I didn't find a function that can do replaceOperand, so I remove +///all operands and add all of them again, replacing the one while adding. +///============================================================================= +unsigned VectorElementize::copyProp(MachineFunction &F) { + unsigned numReplacements = 0; + + for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE; + ++BI) { + MachineBasicBlock *BB = &*BI; + + for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE; + ++II) { + MachineInstr *Instr = &*II; + + // Don't do copy propagation on PHI as it will cause unnecessary + // live range overlap. + if ((Instr->getOpcode() == TargetOpcode::PHI) || + (Instr->getOpcode() == TargetOpcode::DBG_VALUE)) + continue; + + bool needsReplacement = false; + + for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) { + MachineOperand oper = Instr->getOperand(i); + if (!oper.isReg()) continue; + if (oper.isDef()) continue; + if (!RegInfo->isVirtualRegister(oper.getReg())) continue; + + MachineInstr *defInstr = MRI->getVRegDef(oper.getReg()); + + if (!defInstr) continue; + + if (!isSimpleMove(defInstr)) continue; + + MachineOperand defSrc = defInstr->getOperand(1); + if (!defSrc.isReg()) continue; + if (!RegInfo->isVirtualRegister(defSrc.getReg())) continue; + + needsReplacement = true; + + } + if (!needsReplacement) continue; + + numReplacements++; + + std::vector<MachineOperand> operands; + + for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) { + MachineOperand oper = Instr->getOperand(i); + bool flag = false; + do { + if (!(oper.isReg())) + break; + if (oper.isDef()) + break; + if (!(RegInfo->isVirtualRegister(oper.getReg()))) + break; + MachineInstr *defInstr = MRI->getVRegDef(oper.getReg()); + if (!(isSimpleMove(defInstr))) + break; + MachineOperand defSrc = defInstr->getOperand(1); + if (!(defSrc.isReg())) + break; + if (!(RegInfo->isVirtualRegister(defSrc.getReg()))) + break; + operands.push_back(defSrc); + flag = true; + } while (0); + if (flag == false) + operands.push_back(oper); + } + + for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) + Instr->RemoveOperand(0); + for (unsigned i=0, e=operands.size(); i!=e; ++i) + Instr->addOperand(operands[i]); + + } + } + return numReplacements; +} + +///============================================================================= +///Look for simple vreg-to-vreg instructions whose use_empty() is true, add +///them to deadMoves vector. Then remove all instructions in deadMoves. +///============================================================================= +unsigned VectorElementize::removeDeadMoves(MachineFunction &F) { + std::vector<MachineInstr *> deadMoves; + for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE; + ++BI) { + MachineBasicBlock *BB = &*BI; + + for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE; + ++II) { + MachineInstr *Instr = &*II; + + if (!isSimpleMove(Instr)) continue; + + MachineOperand dest = Instr->getOperand(0); + assert(dest.isReg() && "dest of move not a register"); + assert(RegInfo->isVirtualRegister(dest.getReg()) && + "dest of move not a virtual register"); + + if (MRI->use_empty(dest.getReg())) { + deadMoves.push_back(Instr); + } + } + } + + for (unsigned i=0, e=deadMoves.size(); i!=e; ++i) + F.DeleteMachineInstr(deadMoves[i]->getParent()->remove(deadMoves[i])); + + return deadMoves.size(); +} + +///============================================================================= +///Main function for this pass. +///============================================================================= +bool VectorElementize::runOnMachineFunction(MachineFunction &F) { + MRI = &F.getRegInfo(); + + RegInfo = TM.getRegisterInfo(); + InstrInfo = TM.getInstrInfo(); + + VectorToScalarMap.clear(); + + elementize(F); + + if (RemoveRedundantMoves) + while (1) { + if (copyProp(F) == 0) break; + removeDeadMoves(F); + } + + return true; +} + +FunctionPass *llvm::createVectorElementizePass(NVPTXTargetMachine &tm) { + return new VectorElementize(tm); +} + +unsigned VectorElementize::getScalarVersion(unsigned opcode) { + if (opcode == NVPTX::PHI) + return opcode; + if (opcode == NVPTX::IMPLICIT_DEF) + return opcode; + switch(opcode) { + default: + assert(0 && "Scalar version not set, fix NVPTXVector.td"); + return 0; + case TargetOpcode::COPY: return TargetOpcode::COPY; + case NVPTX::AddCCCV2I32: return NVPTX::ADDCCCi32rr; + case NVPTX::AddCCCV4I32: return NVPTX::ADDCCCi32rr; + case NVPTX::AddCCV2I32: return NVPTX::ADDCCi32rr; + case NVPTX::AddCCV4I32: return NVPTX::ADDCCi32rr; + case NVPTX::Build_Vector2_f32: return NVPTX::FMOV32rr; + case NVPTX::Build_Vector2_f64: return NVPTX::FMOV64rr; + case NVPTX::Build_Vector2_i16: return NVPTX::IMOV16rr; + case NVPTX::Build_Vector2_i32: return NVPTX::IMOV32rr; + case NVPTX::Build_Vector2_i64: return NVPTX::IMOV64rr; + case NVPTX::Build_Vector2_i8: return NVPTX::IMOV8rr; + case NVPTX::Build_Vector4_f32: return NVPTX::FMOV32rr; + case NVPTX::Build_Vector4_i16: return NVPTX::IMOV16rr; + case NVPTX::Build_Vector4_i32: return NVPTX::IMOV32rr; + case NVPTX::Build_Vector4_i8: return NVPTX::IMOV8rr; + case NVPTX::CVTv2i16tov2i32: return NVPTX::Zint_extendext16to32; + case NVPTX::CVTv2i64tov2i32: return NVPTX::TRUNC_64to32; + case NVPTX::CVTv2i8tov2i32: return NVPTX::Zint_extendext8to32; + case NVPTX::CVTv4i16tov4i32: return NVPTX::Zint_extendext16to32; + case NVPTX::CVTv4i8tov4i32: return NVPTX::Zint_extendext8to32; + case NVPTX::F32MAD_ftzV2: return NVPTX::FMAD32_ftzrrr; + case NVPTX::F32MADV2: return NVPTX::FMAD32rrr; + case NVPTX::F32MAD_ftzV4: return NVPTX::FMAD32_ftzrrr; + case NVPTX::F32MADV4: return NVPTX::FMAD32rrr; + case NVPTX::F32FMA_ftzV2: return NVPTX::FMA32_ftzrrr; + case NVPTX::F32FMAV2: return NVPTX::FMA32rrr; + case NVPTX::F32FMA_ftzV4: return NVPTX::FMA32_ftzrrr; + case NVPTX::F32FMAV4: return NVPTX::FMA32rrr; + case NVPTX::F64FMAV2: return NVPTX::FMA64rrr; + case NVPTX::FVecEQV2F32: return NVPTX::FSetEQf32rr_toi32; + case NVPTX::FVecEQV2F64: return NVPTX::FSetEQf64rr_toi64; + case NVPTX::FVecEQV4F32: return NVPTX::FSetEQf32rr_toi32; + case NVPTX::FVecGEV2F32: return NVPTX::FSetGEf32rr_toi32; + case NVPTX::FVecGEV2F64: return NVPTX::FSetGEf64rr_toi64; + case NVPTX::FVecGEV4F32: return NVPTX::FSetGEf32rr_toi32; + case NVPTX::FVecGTV2F32: return NVPTX::FSetGTf32rr_toi32; + case NVPTX::FVecGTV2F64: return NVPTX::FSetGTf64rr_toi64; + case NVPTX::FVecGTV4F32: return NVPTX::FSetGTf32rr_toi32; + case NVPTX::FVecLEV2F32: return NVPTX::FSetLEf32rr_toi32; + case NVPTX::FVecLEV2F64: return NVPTX::FSetLEf64rr_toi64; + case NVPTX::FVecLEV4F32: return NVPTX::FSetLEf32rr_toi32; + case NVPTX::FVecLTV2F32: return NVPTX::FSetLTf32rr_toi32; + case NVPTX::FVecLTV2F64: return NVPTX::FSetLTf64rr_toi64; + case NVPTX::FVecLTV4F32: return NVPTX::FSetLTf32rr_toi32; + case NVPTX::FVecNANV2F32: return NVPTX::FSetNANf32rr_toi32; + case NVPTX::FVecNANV2F64: return NVPTX::FSetNANf64rr_toi64; + case NVPTX::FVecNANV4F32: return NVPTX::FSetNANf32rr_toi32; + case NVPTX::FVecNEV2F32: return NVPTX::FSetNEf32rr_toi32; + case NVPTX::FVecNEV2F64: return NVPTX::FSetNEf64rr_toi64; + case NVPTX::FVecNEV4F32: return NVPTX::FSetNEf32rr_toi32; + case NVPTX::FVecNUMV2F32: return NVPTX::FSetNUMf32rr_toi32; + case NVPTX::FVecNUMV2F64: return NVPTX::FSetNUMf64rr_toi64; + case NVPTX::FVecNUMV4F32: return NVPTX::FSetNUMf32rr_toi32; + case NVPTX::FVecUEQV2F32: return NVPTX::FSetUEQf32rr_toi32; + case NVPTX::FVecUEQV2F64: return NVPTX::FSetUEQf64rr_toi64; + case NVPTX::FVecUEQV4F32: return NVPTX::FSetUEQf32rr_toi32; + case NVPTX::FVecUGEV2F32: return NVPTX::FSetUGEf32rr_toi32; + case NVPTX::FVecUGEV2F64: return NVPTX::FSetUGEf64rr_toi64; + case NVPTX::FVecUGEV4F32: return NVPTX::FSetUGEf32rr_toi32; + case NVPTX::FVecUGTV2F32: return NVPTX::FSetUGTf32rr_toi32; + case NVPTX::FVecUGTV2F64: return NVPTX::FSetUGTf64rr_toi64; + case NVPTX::FVecUGTV4F32: return NVPTX::FSetUGTf32rr_toi32; + case NVPTX::FVecULEV2F32: return NVPTX::FSetULEf32rr_toi32; + case NVPTX::FVecULEV2F64: return NVPTX::FSetULEf64rr_toi64; + case NVPTX::FVecULEV4F32: return NVPTX::FSetULEf32rr_toi32; + case NVPTX::FVecULTV2F32: return NVPTX::FSetULTf32rr_toi32; + case NVPTX::FVecULTV2F64: return NVPTX::FSetULTf64rr_toi64; + case NVPTX::FVecULTV4F32: return NVPTX::FSetULTf32rr_toi32; + case NVPTX::FVecUNEV2F32: return NVPTX::FSetUNEf32rr_toi32; + case NVPTX::FVecUNEV2F64: return NVPTX::FSetUNEf64rr_toi64; + case NVPTX::FVecUNEV4F32: return NVPTX::FSetUNEf32rr_toi32; + case NVPTX::I16MADV2: return NVPTX::MAD16rrr; + case NVPTX::I16MADV4: return NVPTX::MAD16rrr; + case NVPTX::I32MADV2: return NVPTX::MAD32rrr; + case NVPTX::I32MADV4: return NVPTX::MAD32rrr; + case NVPTX::I64MADV2: return NVPTX::MAD64rrr; + case NVPTX::I8MADV2: return NVPTX::MAD8rrr; + case NVPTX::I8MADV4: return NVPTX::MAD8rrr; + case NVPTX::ShiftLV2I16: return NVPTX::SHLi16rr; + case NVPTX::ShiftLV2I32: return NVPTX::SHLi32rr; + case NVPTX::ShiftLV2I64: return NVPTX::SHLi64rr; + case NVPTX::ShiftLV2I8: return NVPTX::SHLi8rr; + case NVPTX::ShiftLV4I16: return NVPTX::SHLi16rr; + case NVPTX::ShiftLV4I32: return NVPTX::SHLi32rr; + case NVPTX::ShiftLV4I8: return NVPTX::SHLi8rr; + case NVPTX::ShiftRAV2I16: return NVPTX::SRAi16rr; + case NVPTX::ShiftRAV2I32: return NVPTX::SRAi32rr; + case NVPTX::ShiftRAV2I64: return NVPTX::SRAi64rr; + case NVPTX::ShiftRAV2I8: return NVPTX::SRAi8rr; + case NVPTX::ShiftRAV4I16: return NVPTX::SRAi16rr; + case NVPTX::ShiftRAV4I32: return NVPTX::SRAi32rr; + case NVPTX::ShiftRAV4I8: return NVPTX::SRAi8rr; + case NVPTX::ShiftRLV2I16: return NVPTX::SRLi16rr; + case NVPTX::ShiftRLV2I32: return NVPTX::SRLi32rr; + case NVPTX::ShiftRLV2I64: return NVPTX::SRLi64rr; + case NVPTX::ShiftRLV2I8: return NVPTX::SRLi8rr; + case NVPTX::ShiftRLV4I16: return NVPTX::SRLi16rr; + case NVPTX::ShiftRLV4I32: return NVPTX::SRLi32rr; + case NVPTX::ShiftRLV4I8: return NVPTX::SRLi8rr; + case NVPTX::SubCCCV2I32: return NVPTX::SUBCCCi32rr; + case NVPTX::SubCCCV4I32: return NVPTX::SUBCCCi32rr; + case NVPTX::SubCCV2I32: return NVPTX::SUBCCi32rr; + case NVPTX::SubCCV4I32: return NVPTX::SUBCCi32rr; + case NVPTX::V2F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz; + case NVPTX::V2F32Div_prec: return NVPTX::FDIV32rr_prec; + case NVPTX::V2F32Div_ftz: return NVPTX::FDIV32rr_ftz; + case NVPTX::V2F32Div: return NVPTX::FDIV32rr; + case NVPTX::V2F32_Select: return NVPTX::SELECTf32rr; + case NVPTX::V2F64Div: return NVPTX::FDIV64rr; + case NVPTX::V2F64_Select: return NVPTX::SELECTf64rr; + case NVPTX::V2I16_Select: return NVPTX::SELECTi16rr; + case NVPTX::V2I32_Select: return NVPTX::SELECTi32rr; + case NVPTX::V2I64_Select: return NVPTX::SELECTi64rr; + case NVPTX::V2I8_Select: return NVPTX::SELECTi8rr; + case NVPTX::V2f32Extract: return NVPTX::FMOV32rr; + case NVPTX::V2f32Insert: return NVPTX::FMOV32rr; + case NVPTX::V2f32Mov: return NVPTX::FMOV32rr; + case NVPTX::V2f64Extract: return NVPTX::FMOV64rr; + case NVPTX::V2f64Insert: return NVPTX::FMOV64rr; + case NVPTX::V2f64Mov: return NVPTX::FMOV64rr; + case NVPTX::V2i16Extract: return NVPTX::IMOV16rr; + case NVPTX::V2i16Insert: return NVPTX::IMOV16rr; + case NVPTX::V2i16Mov: return NVPTX::IMOV16rr; + case NVPTX::V2i32Extract: return NVPTX::IMOV32rr; + case NVPTX::V2i32Insert: return NVPTX::IMOV32rr; + case NVPTX::V2i32Mov: return NVPTX::IMOV32rr; + case NVPTX::V2i64Extract: return NVPTX::IMOV64rr; + case NVPTX::V2i64Insert: return NVPTX::IMOV64rr; + case NVPTX::V2i64Mov: return NVPTX::IMOV64rr; + case NVPTX::V2i8Extract: return NVPTX::IMOV8rr; + case NVPTX::V2i8Insert: return NVPTX::IMOV8rr; + case NVPTX::V2i8Mov: return NVPTX::IMOV8rr; + case NVPTX::V4F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz; + case NVPTX::V4F32Div_prec: return NVPTX::FDIV32rr_prec; + case NVPTX::V4F32Div_ftz: return NVPTX::FDIV32rr_ftz; + case NVPTX::V4F32Div: return NVPTX::FDIV32rr; + case NVPTX::V4F32_Select: return NVPTX::SELECTf32rr; + case NVPTX::V4I16_Select: return NVPTX::SELECTi16rr; + case NVPTX::V4I32_Select: return NVPTX::SELECTi32rr; + case NVPTX::V4I8_Select: return NVPTX::SELECTi8rr; + case NVPTX::V4f32Extract: return NVPTX::FMOV32rr; + case NVPTX::V4f32Insert: return NVPTX::FMOV32rr; + case NVPTX::V4f32Mov: return NVPTX::FMOV32rr; + case NVPTX::V4i16Extract: return NVPTX::IMOV16rr; + case NVPTX::V4i16Insert: return NVPTX::IMOV16rr; + case NVPTX::V4i16Mov: return NVPTX::IMOV16rr; + case NVPTX::V4i32Extract: return NVPTX::IMOV32rr; + case NVPTX::V4i32Insert: return NVPTX::IMOV32rr; + case NVPTX::V4i32Mov: return NVPTX::IMOV32rr; + case NVPTX::V4i8Extract: return NVPTX::IMOV8rr; + case NVPTX::V4i8Insert: return NVPTX::IMOV8rr; + case NVPTX::V4i8Mov: return NVPTX::IMOV8rr; + case NVPTX::VAddV2I16: return NVPTX::ADDi16rr; + case NVPTX::VAddV2I32: return NVPTX::ADDi32rr; + case NVPTX::VAddV2I64: return NVPTX::ADDi64rr; + case NVPTX::VAddV2I8: return NVPTX::ADDi8rr; + case NVPTX::VAddV4I16: return NVPTX::ADDi16rr; + case NVPTX::VAddV4I32: return NVPTX::ADDi32rr; + case NVPTX::VAddV4I8: return NVPTX::ADDi8rr; + case NVPTX::VAddfV2F32: return NVPTX::FADDf32rr; + case NVPTX::VAddfV2F32_ftz: return NVPTX::FADDf32rr_ftz; + case NVPTX::VAddfV2F64: return NVPTX::FADDf64rr; + case NVPTX::VAddfV4F32: return NVPTX::FADDf32rr; + case NVPTX::VAddfV4F32_ftz: return NVPTX::FADDf32rr_ftz; + case NVPTX::VAndV2I16: return NVPTX::ANDb16rr; + case NVPTX::VAndV2I32: return NVPTX::ANDb32rr; + case NVPTX::VAndV2I64: return NVPTX::ANDb64rr; + case NVPTX::VAndV2I8: return NVPTX::ANDb8rr; + case NVPTX::VAndV4I16: return NVPTX::ANDb16rr; + case NVPTX::VAndV4I32: return NVPTX::ANDb32rr; + case NVPTX::VAndV4I8: return NVPTX::ANDb8rr; + case NVPTX::VMulfV2F32_ftz: return NVPTX::FMULf32rr_ftz; + case NVPTX::VMulfV2F32: return NVPTX::FMULf32rr; + case NVPTX::VMulfV2F64: return NVPTX::FMULf64rr; + case NVPTX::VMulfV4F32_ftz: return NVPTX::FMULf32rr_ftz; + case NVPTX::VMulfV4F32: return NVPTX::FMULf32rr; + case NVPTX::VMultHSV2I16: return NVPTX::MULTHSi16rr; + case NVPTX::VMultHSV2I32: return NVPTX::MULTHSi32rr; + case NVPTX::VMultHSV2I64: return NVPTX::MULTHSi64rr; + case NVPTX::VMultHSV2I8: return NVPTX::MULTHSi8rr; + case NVPTX::VMultHSV4I16: return NVPTX::MULTHSi16rr; + case NVPTX::VMultHSV4I32: return NVPTX::MULTHSi32rr; + case NVPTX::VMultHSV4I8: return NVPTX::MULTHSi8rr; + case NVPTX::VMultHUV2I16: return NVPTX::MULTHUi16rr; + case NVPTX::VMultHUV2I32: return NVPTX::MULTHUi32rr; + case NVPTX::VMultHUV2I64: return NVPTX::MULTHUi64rr; + case NVPTX::VMultHUV2I8: return NVPTX::MULTHUi8rr; + case NVPTX::VMultHUV4I16: return NVPTX::MULTHUi16rr; + case NVPTX::VMultHUV4I32: return NVPTX::MULTHUi32rr; + case NVPTX::VMultHUV4I8: return NVPTX::MULTHUi8rr; + case NVPTX::VMultV2I16: return NVPTX::MULTi16rr; + case NVPTX::VMultV2I32: return NVPTX::MULTi32rr; + case NVPTX::VMultV2I64: return NVPTX::MULTi64rr; + case NVPTX::VMultV2I8: return NVPTX::MULTi8rr; + case NVPTX::VMultV4I16: return NVPTX::MULTi16rr; + case NVPTX::VMultV4I32: return NVPTX::MULTi32rr; + case NVPTX::VMultV4I8: return NVPTX::MULTi8rr; + case NVPTX::VNegV2I16: return NVPTX::INEG16; + case NVPTX::VNegV2I32: return NVPTX::INEG32; + case NVPTX::VNegV2I64: return NVPTX::INEG64; + case NVPTX::VNegV2I8: return NVPTX::INEG8; + case NVPTX::VNegV4I16: return NVPTX::INEG16; + case NVPTX::VNegV4I32: return NVPTX::INEG32; + case NVPTX::VNegV4I8: return NVPTX::INEG8; + case NVPTX::VNegv2f32: return NVPTX::FNEGf32; + case NVPTX::VNegv2f32_ftz: return NVPTX::FNEGf32_ftz; + case NVPTX::VNegv2f64: return NVPTX::FNEGf64; + case NVPTX::VNegv4f32: return NVPTX::FNEGf32; + case NVPTX::VNegv4f32_ftz: return NVPTX::FNEGf32_ftz; + case NVPTX::VNotV2I16: return NVPTX::NOT16; + case NVPTX::VNotV2I32: return NVPTX::NOT32; + case NVPTX::VNotV2I64: return NVPTX::NOT64; + case NVPTX::VNotV2I8: return NVPTX::NOT8; + case NVPTX::VNotV4I16: return NVPTX::NOT16; + case NVPTX::VNotV4I32: return NVPTX::NOT32; + case NVPTX::VNotV4I8: return NVPTX::NOT8; + case NVPTX::VOrV2I16: return NVPTX::ORb16rr; + case NVPTX::VOrV2I32: return NVPTX::ORb32rr; + case NVPTX::VOrV2I64: return NVPTX::ORb64rr; + case NVPTX::VOrV2I8: return NVPTX::ORb8rr; + case NVPTX::VOrV4I16: return NVPTX::ORb16rr; + case NVPTX::VOrV4I32: return NVPTX::ORb32rr; + case NVPTX::VOrV4I8: return NVPTX::ORb8rr; + case NVPTX::VSDivV2I16: return NVPTX::SDIVi16rr; + case NVPTX::VSDivV2I32: return NVPTX::SDIVi32rr; + case NVPTX::VSDivV2I64: return NVPTX::SDIVi64rr; + case NVPTX::VSDivV2I8: return NVPTX::SDIVi8rr; + case NVPTX::VSDivV4I16: return NVPTX::SDIVi16rr; + case NVPTX::VSDivV4I32: return NVPTX::SDIVi32rr; + case NVPTX::VSDivV4I8: return NVPTX::SDIVi8rr; + case NVPTX::VSRemV2I16: return NVPTX::SREMi16rr; + case NVPTX::VSRemV2I32: return NVPTX::SREMi32rr; + case NVPTX::VSRemV2I64: return NVPTX::SREMi64rr; + case NVPTX::VSRemV2I8: return NVPTX::SREMi8rr; + case NVPTX::VSRemV4I16: return NVPTX::SREMi16rr; + case NVPTX::VSRemV4I32: return NVPTX::SREMi32rr; + case NVPTX::VSRemV4I8: return NVPTX::SREMi8rr; + case NVPTX::VSubV2I16: return NVPTX::SUBi16rr; + case NVPTX::VSubV2I32: return NVPTX::SUBi32rr; + case NVPTX::VSubV2I64: return NVPTX::SUBi64rr; + case NVPTX::VSubV2I8: return NVPTX::SUBi8rr; + case NVPTX::VSubV4I16: return NVPTX::SUBi16rr; + case NVPTX::VSubV4I32: return NVPTX::SUBi32rr; + case NVPTX::VSubV4I8: return NVPTX::SUBi8rr; + case NVPTX::VSubfV2F32_ftz: return NVPTX::FSUBf32rr_ftz; + case NVPTX::VSubfV2F32: return NVPTX::FSUBf32rr; + case NVPTX::VSubfV2F64: return NVPTX::FSUBf64rr; + case NVPTX::VSubfV4F32_ftz: return NVPTX::FSUBf32rr_ftz; + case NVPTX::VSubfV4F32: return NVPTX::FSUBf32rr; + case NVPTX::VUDivV2I16: return NVPTX::UDIVi16rr; + case NVPTX::VUDivV2I32: return NVPTX::UDIVi32rr; + case NVPTX::VUDivV2I64: return NVPTX::UDIVi64rr; + case NVPTX::VUDivV2I8: return NVPTX::UDIVi8rr; + case NVPTX::VUDivV4I16: return NVPTX::UDIVi16rr; + case NVPTX::VUDivV4I32: return NVPTX::UDIVi32rr; + case NVPTX::VUDivV4I8: return NVPTX::UDIVi8rr; + case NVPTX::VURemV2I16: return NVPTX::UREMi16rr; + case NVPTX::VURemV2I32: return NVPTX::UREMi32rr; + case NVPTX::VURemV2I64: return NVPTX::UREMi64rr; + case NVPTX::VURemV2I8: return NVPTX::UREMi8rr; + case NVPTX::VURemV4I16: return NVPTX::UREMi16rr; + case NVPTX::VURemV4I32: return NVPTX::UREMi32rr; + case NVPTX::VURemV4I8: return NVPTX::UREMi8rr; + case NVPTX::VXorV2I16: return NVPTX::XORb16rr; + case NVPTX::VXorV2I32: return NVPTX::XORb32rr; + case NVPTX::VXorV2I64: return NVPTX::XORb64rr; + case NVPTX::VXorV2I8: return NVPTX::XORb8rr; + case NVPTX::VXorV4I16: return NVPTX::XORb16rr; + case NVPTX::VXorV4I32: return NVPTX::XORb32rr; + case NVPTX::VXorV4I8: return NVPTX::XORb8rr; + case NVPTX::VecSEQV2I16: return NVPTX::ISetSEQi16rr_toi16; + case NVPTX::VecSEQV2I32: return NVPTX::ISetSEQi32rr_toi32; + case NVPTX::VecSEQV2I64: return NVPTX::ISetSEQi64rr_toi64; + case NVPTX::VecSEQV2I8: return NVPTX::ISetSEQi8rr_toi8; + case NVPTX::VecSEQV4I16: return NVPTX::ISetSEQi16rr_toi16; + case NVPTX::VecSEQV4I32: return NVPTX::ISetSEQi32rr_toi32; + case NVPTX::VecSEQV4I8: return NVPTX::ISetSEQi8rr_toi8; + case NVPTX::VecSGEV2I16: return NVPTX::ISetSGEi16rr_toi16; + case NVPTX::VecSGEV2I32: return NVPTX::ISetSGEi32rr_toi32; + case NVPTX::VecSGEV2I64: return NVPTX::ISetSGEi64rr_toi64; + case NVPTX::VecSGEV2I8: return NVPTX::ISetSGEi8rr_toi8; + case NVPTX::VecSGEV4I16: return NVPTX::ISetSGEi16rr_toi16; + case NVPTX::VecSGEV4I32: return NVPTX::ISetSGEi32rr_toi32; + case NVPTX::VecSGEV4I8: return NVPTX::ISetSGEi8rr_toi8; + case NVPTX::VecSGTV2I16: return NVPTX::ISetSGTi16rr_toi16; + case NVPTX::VecSGTV2I32: return NVPTX::ISetSGTi32rr_toi32; + case NVPTX::VecSGTV2I64: return NVPTX::ISetSGTi64rr_toi64; + case NVPTX::VecSGTV2I8: return NVPTX::ISetSGTi8rr_toi8; + case NVPTX::VecSGTV4I16: return NVPTX::ISetSGTi16rr_toi16; + case NVPTX::VecSGTV4I32: return NVPTX::ISetSGTi32rr_toi32; + case NVPTX::VecSGTV4I8: return NVPTX::ISetSGTi8rr_toi8; + case NVPTX::VecSLEV2I16: return NVPTX::ISetSLEi16rr_toi16; + case NVPTX::VecSLEV2I32: return NVPTX::ISetSLEi32rr_toi32; + case NVPTX::VecSLEV2I64: return NVPTX::ISetSLEi64rr_toi64; + case NVPTX::VecSLEV2I8: return NVPTX::ISetSLEi8rr_toi8; + case NVPTX::VecSLEV4I16: return NVPTX::ISetSLEi16rr_toi16; + case NVPTX::VecSLEV4I32: return NVPTX::ISetSLEi32rr_toi32; + case NVPTX::VecSLEV4I8: return NVPTX::ISetSLEi8rr_toi8; + case NVPTX::VecSLTV2I16: return NVPTX::ISetSLTi16rr_toi16; + case NVPTX::VecSLTV2I32: return NVPTX::ISetSLTi32rr_toi32; + case NVPTX::VecSLTV2I64: return NVPTX::ISetSLTi64rr_toi64; + case NVPTX::VecSLTV2I8: return NVPTX::ISetSLTi8rr_toi8; + case NVPTX::VecSLTV4I16: return NVPTX::ISetSLTi16rr_toi16; + case NVPTX::VecSLTV4I32: return NVPTX::ISetSLTi32rr_toi32; + case NVPTX::VecSLTV4I8: return NVPTX::ISetSLTi8rr_toi8; + case NVPTX::VecSNEV2I16: return NVPTX::ISetSNEi16rr_toi16; + case NVPTX::VecSNEV2I32: return NVPTX::ISetSNEi32rr_toi32; + case NVPTX::VecSNEV2I64: return NVPTX::ISetSNEi64rr_toi64; + case NVPTX::VecSNEV2I8: return NVPTX::ISetSNEi8rr_toi8; + case NVPTX::VecSNEV4I16: return NVPTX::ISetSNEi16rr_toi16; + case NVPTX::VecSNEV4I32: return NVPTX::ISetSNEi32rr_toi32; + case NVPTX::VecSNEV4I8: return NVPTX::ISetSNEi8rr_toi8; + case NVPTX::VecShuffle_v2f32: return NVPTX::FMOV32rr; + case NVPTX::VecShuffle_v2f64: return NVPTX::FMOV64rr; + case NVPTX::VecShuffle_v2i16: return NVPTX::IMOV16rr; + case NVPTX::VecShuffle_v2i32: return NVPTX::IMOV32rr; + case NVPTX::VecShuffle_v2i64: return NVPTX::IMOV64rr; + case NVPTX::VecShuffle_v2i8: return NVPTX::IMOV8rr; + case NVPTX::VecShuffle_v4f32: return NVPTX::FMOV32rr; + case NVPTX::VecShuffle_v4i16: return NVPTX::IMOV16rr; + case NVPTX::VecShuffle_v4i32: return NVPTX::IMOV32rr; + case NVPTX::VecShuffle_v4i8: return NVPTX::IMOV8rr; + case NVPTX::VecUEQV2I16: return NVPTX::ISetUEQi16rr_toi16; + case NVPTX::VecUEQV2I32: return NVPTX::ISetUEQi32rr_toi32; + case NVPTX::VecUEQV2I64: return NVPTX::ISetUEQi64rr_toi64; + case NVPTX::VecUEQV2I8: return NVPTX::ISetUEQi8rr_toi8; + case NVPTX::VecUEQV4I16: return NVPTX::ISetUEQi16rr_toi16; + case NVPTX::VecUEQV4I32: return NVPTX::ISetUEQi32rr_toi32; + case NVPTX::VecUEQV4I8: return NVPTX::ISetUEQi8rr_toi8; + case NVPTX::VecUGEV2I16: return NVPTX::ISetUGEi16rr_toi16; + case NVPTX::VecUGEV2I32: return NVPTX::ISetUGEi32rr_toi32; + case NVPTX::VecUGEV2I64: return NVPTX::ISetUGEi64rr_toi64; + case NVPTX::VecUGEV2I8: return NVPTX::ISetUGEi8rr_toi8; + case NVPTX::VecUGEV4I16: return NVPTX::ISetUGEi16rr_toi16; + case NVPTX::VecUGEV4I32: return NVPTX::ISetUGEi32rr_toi32; + case NVPTX::VecUGEV4I8: return NVPTX::ISetUGEi8rr_toi8; + case NVPTX::VecUGTV2I16: return NVPTX::ISetUGTi16rr_toi16; + case NVPTX::VecUGTV2I32: return NVPTX::ISetUGTi32rr_toi32; + case NVPTX::VecUGTV2I64: return NVPTX::ISetUGTi64rr_toi64; + case NVPTX::VecUGTV2I8: return NVPTX::ISetUGTi8rr_toi8; + case NVPTX::VecUGTV4I16: return NVPTX::ISetUGTi16rr_toi16; + case NVPTX::VecUGTV4I32: return NVPTX::ISetUGTi32rr_toi32; + case NVPTX::VecUGTV4I8: return NVPTX::ISetUGTi8rr_toi8; + case NVPTX::VecULEV2I16: return NVPTX::ISetULEi16rr_toi16; + case NVPTX::VecULEV2I32: return NVPTX::ISetULEi32rr_toi32; + case NVPTX::VecULEV2I64: return NVPTX::ISetULEi64rr_toi64; + case NVPTX::VecULEV2I8: return NVPTX::ISetULEi8rr_toi8; + case NVPTX::VecULEV4I16: return NVPTX::ISetULEi16rr_toi16; + case NVPTX::VecULEV4I32: return NVPTX::ISetULEi32rr_toi32; + case NVPTX::VecULEV4I8: return NVPTX::ISetULEi8rr_toi8; + case NVPTX::VecULTV2I16: return NVPTX::ISetULTi16rr_toi16; + case NVPTX::VecULTV2I32: return NVPTX::ISetULTi32rr_toi32; + case NVPTX::VecULTV2I64: return NVPTX::ISetULTi64rr_toi64; + case NVPTX::VecULTV2I8: return NVPTX::ISetULTi8rr_toi8; + case NVPTX::VecULTV4I16: return NVPTX::ISetULTi16rr_toi16; + case NVPTX::VecULTV4I32: return NVPTX::ISetULTi32rr_toi32; + case NVPTX::VecULTV4I8: return NVPTX::ISetULTi8rr_toi8; + case NVPTX::VecUNEV2I16: return NVPTX::ISetUNEi16rr_toi16; + case NVPTX::VecUNEV2I32: return NVPTX::ISetUNEi32rr_toi32; + case NVPTX::VecUNEV2I64: return NVPTX::ISetUNEi64rr_toi64; + case NVPTX::VecUNEV2I8: return NVPTX::ISetUNEi8rr_toi8; + case NVPTX::VecUNEV4I16: return NVPTX::ISetUNEi16rr_toi16; + case NVPTX::VecUNEV4I32: return NVPTX::ISetUNEi32rr_toi32; + case NVPTX::VecUNEV4I8: return NVPTX::ISetUNEi8rr_toi8; + case NVPTX::INT_PTX_LDU_G_v2i8_32: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; + case NVPTX::INT_PTX_LDU_G_v4i8_32: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; + case NVPTX::INT_PTX_LDU_G_v2i16_32: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; + case NVPTX::INT_PTX_LDU_G_v4i16_32: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; + case NVPTX::INT_PTX_LDU_G_v2i32_32: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; + case NVPTX::INT_PTX_LDU_G_v4i32_32: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; + case NVPTX::INT_PTX_LDU_G_v2f32_32: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; + case NVPTX::INT_PTX_LDU_G_v4f32_32: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; + case NVPTX::INT_PTX_LDU_G_v2i64_32: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; + case NVPTX::INT_PTX_LDU_G_v2f64_32: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; + case NVPTX::INT_PTX_LDU_G_v2i8_64: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; + case NVPTX::INT_PTX_LDU_G_v4i8_64: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; + case NVPTX::INT_PTX_LDU_G_v2i16_64: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; + case NVPTX::INT_PTX_LDU_G_v4i16_64: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; + case NVPTX::INT_PTX_LDU_G_v2i32_64: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; + case NVPTX::INT_PTX_LDU_G_v4i32_64: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; + case NVPTX::INT_PTX_LDU_G_v2f32_64: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; + case NVPTX::INT_PTX_LDU_G_v4f32_64: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; + case NVPTX::INT_PTX_LDU_G_v2i64_64: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; + case NVPTX::INT_PTX_LDU_G_v2f64_64: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; + + case NVPTX::LoadParamV4I32: return NVPTX::LoadParamScalar4I32; + case NVPTX::LoadParamV4I16: return NVPTX::LoadParamScalar4I16; + case NVPTX::LoadParamV4I8: return NVPTX::LoadParamScalar4I8; + case NVPTX::LoadParamV2I64: return NVPTX::LoadParamScalar2I64; + case NVPTX::LoadParamV2I32: return NVPTX::LoadParamScalar2I32; + case NVPTX::LoadParamV2I16: return NVPTX::LoadParamScalar2I16; + case NVPTX::LoadParamV2I8: return NVPTX::LoadParamScalar2I8; + case NVPTX::LoadParamV4F32: return NVPTX::LoadParamScalar4F32; + case NVPTX::LoadParamV2F32: return NVPTX::LoadParamScalar2F32; + case NVPTX::LoadParamV2F64: return NVPTX::LoadParamScalar2F64; + case NVPTX::StoreParamV4I32: return NVPTX::StoreParamScalar4I32; + case NVPTX::StoreParamV4I16: return NVPTX::StoreParamScalar4I16; + case NVPTX::StoreParamV4I8: return NVPTX::StoreParamScalar4I8; + case NVPTX::StoreParamV2I64: return NVPTX::StoreParamScalar2I64; + case NVPTX::StoreParamV2I32: return NVPTX::StoreParamScalar2I32; + case NVPTX::StoreParamV2I16: return NVPTX::StoreParamScalar2I16; + case NVPTX::StoreParamV2I8: return NVPTX::StoreParamScalar2I8; + case NVPTX::StoreParamV4F32: return NVPTX::StoreParamScalar4F32; + case NVPTX::StoreParamV2F32: return NVPTX::StoreParamScalar2F32; + case NVPTX::StoreParamV2F64: return NVPTX::StoreParamScalar2F64; + case NVPTX::StoreRetvalV4I32: return NVPTX::StoreRetvalScalar4I32; + case NVPTX::StoreRetvalV4I16: return NVPTX::StoreRetvalScalar4I16; + case NVPTX::StoreRetvalV4I8: return NVPTX::StoreRetvalScalar4I8; + case NVPTX::StoreRetvalV2I64: return NVPTX::StoreRetvalScalar2I64; + case NVPTX::StoreRetvalV2I32: return NVPTX::StoreRetvalScalar2I32; + case NVPTX::StoreRetvalV2I16: return NVPTX::StoreRetvalScalar2I16; + case NVPTX::StoreRetvalV2I8: return NVPTX::StoreRetvalScalar2I8; + case NVPTX::StoreRetvalV4F32: return NVPTX::StoreRetvalScalar4F32; + case NVPTX::StoreRetvalV2F32: return NVPTX::StoreRetvalScalar2F32; + case NVPTX::StoreRetvalV2F64: return NVPTX::StoreRetvalScalar2F64; + case NVPTX::VecI32toV4I8: return NVPTX::I32toV4I8; + case NVPTX::VecI64toV4I16: return NVPTX::I64toV4I16; + case NVPTX::VecI16toV2I8: return NVPTX::I16toV2I8; + case NVPTX::VecI32toV2I16: return NVPTX::I32toV2I16; + case NVPTX::VecI64toV2I32: return NVPTX::I64toV2I32; + case NVPTX::VecF64toV2F32: return NVPTX::F64toV2F32; + + case NVPTX::LD_v2i8_avar: return NVPTX::LDV_i8_v2_avar; + case NVPTX::LD_v2i8_areg: return NVPTX::LDV_i8_v2_areg; + case NVPTX::LD_v2i8_ari: return NVPTX::LDV_i8_v2_ari; + case NVPTX::LD_v2i8_asi: return NVPTX::LDV_i8_v2_asi; + case NVPTX::LD_v4i8_avar: return NVPTX::LDV_i8_v4_avar; + case NVPTX::LD_v4i8_areg: return NVPTX::LDV_i8_v4_areg; + case NVPTX::LD_v4i8_ari: return NVPTX::LDV_i8_v4_ari; + case NVPTX::LD_v4i8_asi: return NVPTX::LDV_i8_v4_asi; + + case NVPTX::LD_v2i16_avar: return NVPTX::LDV_i16_v2_avar; + case NVPTX::LD_v2i16_areg: return NVPTX::LDV_i16_v2_areg; + case NVPTX::LD_v2i16_ari: return NVPTX::LDV_i16_v2_ari; + case NVPTX::LD_v2i16_asi: return NVPTX::LDV_i16_v2_asi; + case NVPTX::LD_v4i16_avar: return NVPTX::LDV_i16_v4_avar; + case NVPTX::LD_v4i16_areg: return NVPTX::LDV_i16_v4_areg; + case NVPTX::LD_v4i16_ari: return NVPTX::LDV_i16_v4_ari; + case NVPTX::LD_v4i16_asi: return NVPTX::LDV_i16_v4_asi; + + case NVPTX::LD_v2i32_avar: return NVPTX::LDV_i32_v2_avar; + case NVPTX::LD_v2i32_areg: return NVPTX::LDV_i32_v2_areg; + case NVPTX::LD_v2i32_ari: return NVPTX::LDV_i32_v2_ari; + case NVPTX::LD_v2i32_asi: return NVPTX::LDV_i32_v2_asi; + case NVPTX::LD_v4i32_avar: return NVPTX::LDV_i32_v4_avar; + case NVPTX::LD_v4i32_areg: return NVPTX::LDV_i32_v4_areg; + case NVPTX::LD_v4i32_ari: return NVPTX::LDV_i32_v4_ari; + case NVPTX::LD_v4i32_asi: return NVPTX::LDV_i32_v4_asi; + + case NVPTX::LD_v2f32_avar: return NVPTX::LDV_f32_v2_avar; + case NVPTX::LD_v2f32_areg: return NVPTX::LDV_f32_v2_areg; + case NVPTX::LD_v2f32_ari: return NVPTX::LDV_f32_v2_ari; + case NVPTX::LD_v2f32_asi: return NVPTX::LDV_f32_v2_asi; + case NVPTX::LD_v4f32_avar: return NVPTX::LDV_f32_v4_avar; + case NVPTX::LD_v4f32_areg: return NVPTX::LDV_f32_v4_areg; + case NVPTX::LD_v4f32_ari: return NVPTX::LDV_f32_v4_ari; + case NVPTX::LD_v4f32_asi: return NVPTX::LDV_f32_v4_asi; + + case NVPTX::LD_v2i64_avar: return NVPTX::LDV_i64_v2_avar; + case NVPTX::LD_v2i64_areg: return NVPTX::LDV_i64_v2_areg; + case NVPTX::LD_v2i64_ari: return NVPTX::LDV_i64_v2_ari; + case NVPTX::LD_v2i64_asi: return NVPTX::LDV_i64_v2_asi; + case NVPTX::LD_v2f64_avar: return NVPTX::LDV_f64_v2_avar; + case NVPTX::LD_v2f64_areg: return NVPTX::LDV_f64_v2_areg; + case NVPTX::LD_v2f64_ari: return NVPTX::LDV_f64_v2_ari; + case NVPTX::LD_v2f64_asi: return NVPTX::LDV_f64_v2_asi; + + case NVPTX::ST_v2i8_avar: return NVPTX::STV_i8_v2_avar; + case NVPTX::ST_v2i8_areg: return NVPTX::STV_i8_v2_areg; + case NVPTX::ST_v2i8_ari: return NVPTX::STV_i8_v2_ari; + case NVPTX::ST_v2i8_asi: return NVPTX::STV_i8_v2_asi; + case NVPTX::ST_v4i8_avar: return NVPTX::STV_i8_v4_avar; + case NVPTX::ST_v4i8_areg: return NVPTX::STV_i8_v4_areg; + case NVPTX::ST_v4i8_ari: return NVPTX::STV_i8_v4_ari; + case NVPTX::ST_v4i8_asi: return NVPTX::STV_i8_v4_asi; + + case NVPTX::ST_v2i16_avar: return NVPTX::STV_i16_v2_avar; + case NVPTX::ST_v2i16_areg: return NVPTX::STV_i16_v2_areg; + case NVPTX::ST_v2i16_ari: return NVPTX::STV_i16_v2_ari; + case NVPTX::ST_v2i16_asi: return NVPTX::STV_i16_v2_asi; + case NVPTX::ST_v4i16_avar: return NVPTX::STV_i16_v4_avar; + case NVPTX::ST_v4i16_areg: return NVPTX::STV_i16_v4_areg; + case NVPTX::ST_v4i16_ari: return NVPTX::STV_i16_v4_ari; + case NVPTX::ST_v4i16_asi: return NVPTX::STV_i16_v4_asi; + + case NVPTX::ST_v2i32_avar: return NVPTX::STV_i32_v2_avar; + case NVPTX::ST_v2i32_areg: return NVPTX::STV_i32_v2_areg; + case NVPTX::ST_v2i32_ari: return NVPTX::STV_i32_v2_ari; + case NVPTX::ST_v2i32_asi: return NVPTX::STV_i32_v2_asi; + case NVPTX::ST_v4i32_avar: return NVPTX::STV_i32_v4_avar; + case NVPTX::ST_v4i32_areg: return NVPTX::STV_i32_v4_areg; + case NVPTX::ST_v4i32_ari: return NVPTX::STV_i32_v4_ari; + case NVPTX::ST_v4i32_asi: return NVPTX::STV_i32_v4_asi; + + case NVPTX::ST_v2f32_avar: return NVPTX::STV_f32_v2_avar; + case NVPTX::ST_v2f32_areg: return NVPTX::STV_f32_v2_areg; + case NVPTX::ST_v2f32_ari: return NVPTX::STV_f32_v2_ari; + case NVPTX::ST_v2f32_asi: return NVPTX::STV_f32_v2_asi; + case NVPTX::ST_v4f32_avar: return NVPTX::STV_f32_v4_avar; + case NVPTX::ST_v4f32_areg: return NVPTX::STV_f32_v4_areg; + case NVPTX::ST_v4f32_ari: return NVPTX::STV_f32_v4_ari; + case NVPTX::ST_v4f32_asi: return NVPTX::STV_f32_v4_asi; + + case NVPTX::ST_v2i64_avar: return NVPTX::STV_i64_v2_avar; + case NVPTX::ST_v2i64_areg: return NVPTX::STV_i64_v2_areg; + case NVPTX::ST_v2i64_ari: return NVPTX::STV_i64_v2_ari; + case NVPTX::ST_v2i64_asi: return NVPTX::STV_i64_v2_asi; + case NVPTX::ST_v2f64_avar: return NVPTX::STV_f64_v2_avar; + case NVPTX::ST_v2f64_areg: return NVPTX::STV_f64_v2_areg; + case NVPTX::ST_v2f64_ari: return NVPTX::STV_f64_v2_ari; + case NVPTX::ST_v2f64_asi: return NVPTX::STV_f64_v2_asi; + } + return 0; +} diff --git a/lib/Target/NVPTX/cl_common_defines.h b/lib/Target/NVPTX/cl_common_defines.h new file mode 100644 index 0000000..a7347ef --- /dev/null +++ b/lib/Target/NVPTX/cl_common_defines.h @@ -0,0 +1,125 @@ +#ifndef __CL_COMMON_DEFINES_H__ +#define __CL_COMMON_DEFINES_H__ +// This file includes defines that are common to both kernel code and +// the NVPTX back-end. + +// +// Common defines for Image intrinsics +// Channel order +enum { + CLK_R = 0x10B0, + CLK_A = 0x10B1, + CLK_RG = 0x10B2, + CLK_RA = 0x10B3, + CLK_RGB = 0x10B4, + CLK_RGBA = 0x10B5, + CLK_BGRA = 0x10B6, + CLK_ARGB = 0x10B7, + +#if (__NV_CL_C_VERSION == __NV_CL_C_VERSION_1_0) + CLK_xRGB = 0x10B7, +#endif + + CLK_INTENSITY = 0x10B8, + CLK_LUMINANCE = 0x10B9 + +#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) + , + CLK_Rx = 0x10BA, + CLK_RGx = 0x10BB, + CLK_RGBx = 0x10BC +#endif +}; + + +typedef enum clk_channel_type { + // valid formats for float return types + CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8 + CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16 + CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8 + CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16 + CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half + CLK_FLOAT = 0x10DE, // four channel RGBA float + +#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) + CLK_UNORM_SHORT_565 = 0x10D4, + CLK_UNORM_SHORT_555 = 0x10D5, + CLK_UNORM_INT_101010 = 0x10D6, +#endif + + // valid only for integer return types + CLK_SIGNED_INT8 = 0x10D7, + CLK_SIGNED_INT16 = 0x10D8, + CLK_SIGNED_INT32 = 0x10D9, + CLK_UNSIGNED_INT8 = 0x10DA, + CLK_UNSIGNED_INT16 = 0x10DB, + CLK_UNSIGNED_INT32 = 0x10DC, + + // CI SPI for CPU + __CLK_UNORM_INT8888 , // four channel ARGB unorm8 + __CLK_UNORM_INT8888R, // four channel BGRA unorm8 + + __CLK_VALID_IMAGE_TYPE_COUNT, + __CLK_INVALID_IMAGE_TYPE = __CLK_VALID_IMAGE_TYPE_COUNT, + __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to + // represent any image type + __CLK_VALID_IMAGE_TYPE_MASK = ( 1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS ) - 1 +}clk_channel_type; + +typedef enum clk_sampler_type { + __CLK_ADDRESS_BASE = 0, + CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE, + +#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) + CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR, +#endif + __CLK_ADDRESS_MASK = CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP | + CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR, + __CLK_ADDRESS_BITS = 3, // number of bits required to + // represent address info + + __CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS, + CLK_NORMALIZED_COORDS_FALSE = 0, + CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE, + __CLK_NORMALIZED_MASK = CLK_NORMALIZED_COORDS_FALSE | + CLK_NORMALIZED_COORDS_TRUE, + __CLK_NORMALIZED_BITS = 1, // number of bits required to + // represent normalization + + __CLK_FILTER_BASE = __CLK_NORMALIZED_BASE + + __CLK_NORMALIZED_BITS, + CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE, + CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE, + CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE, + __CLK_FILTER_MASK = CLK_FILTER_NEAREST | CLK_FILTER_LINEAR | + CLK_FILTER_ANISOTROPIC, + __CLK_FILTER_BITS = 2, // number of bits required to + // represent address info + + __CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS, + CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE, + CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE, + CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE, + __CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR | + CLK_MIP_ANISOTROPIC, + __CLK_MIP_BITS = 2, + + __CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS, + __CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK | + __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK, + + __CLK_ANISOTROPIC_RATIO_BITS = 5, + __CLK_ANISOTROPIC_RATIO_MASK = (int) 0x80000000 >> + (__CLK_ANISOTROPIC_RATIO_BITS-1) +} clk_sampler_type; + +// Memory synchronization +#define CLK_LOCAL_MEM_FENCE (1 << 0) +#define CLK_GLOBAL_MEM_FENCE (1 << 1) + +#endif // __CL_COMMON_DEFINES_H__ diff --git a/lib/Target/NVPTX/gen-register-defs.py b/lib/Target/NVPTX/gen-register-defs.py new file mode 100644 index 0000000..ed06668 --- /dev/null +++ b/lib/Target/NVPTX/gen-register-defs.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python + +num_regs = 396 + +outFile = open('NVPTXRegisterInfo.td', 'w') + +outFile.write(''' +//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the PTX register file +//===----------------------------------------------------------------------===// + +class NVPTXReg<string n> : Register<n> { + let Namespace = "NVPTX"; +} + +class NVPTXRegClass<list<ValueType> regTypes, int alignment, dag regList> + : RegisterClass <"NVPTX", regTypes, alignment, regList>; + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// + +// Special Registers used as stack pointer +def VRFrame : NVPTXReg<"%SP">; +def VRFrameLocal : NVPTXReg<"%SPL">; + +// Special Registers used as the stack +def VRDepot : NVPTXReg<"%Depot">; +''') + +# Predicates +outFile.write(''' +//===--- Predicate --------------------------------------------------------===// +''') +for i in range(0, num_regs): + outFile.write('def P%d : NVPTXReg<"%%p%d">;\n' % (i, i)) + +# Int8 +outFile.write(''' +//===--- 8-bit ------------------------------------------------------------===// +''') +for i in range(0, num_regs): + outFile.write('def RC%d : NVPTXReg<"%%rc%d">;\n' % (i, i)) + +# Int16 +outFile.write(''' +//===--- 16-bit -----------------------------------------------------------===// +''') +for i in range(0, num_regs): + outFile.write('def RS%d : NVPTXReg<"%%rs%d">;\n' % (i, i)) + +# Int32 +outFile.write(''' +//===--- 32-bit -----------------------------------------------------------===// +''') +for i in range(0, num_regs): + outFile.write('def R%d : NVPTXReg<"%%r%d">;\n' % (i, i)) + +# Int64 +outFile.write(''' +//===--- 64-bit -----------------------------------------------------------===// +''') +for i in range(0, num_regs): + outFile.write('def RL%d : NVPTXReg<"%%rl%d">;\n' % (i, i)) + +# F32 +outFile.write(''' +//===--- 32-bit float -----------------------------------------------------===// +''') +for i in range(0, num_regs): + outFile.write('def F%d : NVPTXReg<"%%f%d">;\n' % (i, i)) + +# F64 +outFile.write(''' +//===--- 64-bit float -----------------------------------------------------===// +''') +for i in range(0, num_regs): + outFile.write('def FL%d : NVPTXReg<"%%fl%d">;\n' % (i, i)) + +# Vector registers +outFile.write(''' +//===--- Vector -----------------------------------------------------------===// +''') +for i in range(0, num_regs): + outFile.write('def v2b8_%d : NVPTXReg<"%%v2b8_%d">;\n' % (i, i)) +for i in range(0, num_regs): + outFile.write('def v2b16_%d : NVPTXReg<"%%v2b16_%d">;\n' % (i, i)) +for i in range(0, num_regs): + outFile.write('def v2b32_%d : NVPTXReg<"%%v2b32_%d">;\n' % (i, i)) +for i in range(0, num_regs): + outFile.write('def v2b64_%d : NVPTXReg<"%%v2b64_%d">;\n' % (i, i)) + +for i in range(0, num_regs): + outFile.write('def v4b8_%d : NVPTXReg<"%%v4b8_%d">;\n' % (i, i)) +for i in range(0, num_regs): + outFile.write('def v4b16_%d : NVPTXReg<"%%v4b16_%d">;\n' % (i, i)) +for i in range(0, num_regs): + outFile.write('def v4b32_%d : NVPTXReg<"%%v4b32_%d">;\n' % (i, i)) + +# Argument registers +outFile.write(''' +//===--- Arguments --------------------------------------------------------===// +''') +for i in range(0, num_regs): + outFile.write('def ia%d : NVPTXReg<"%%ia%d">;\n' % (i, i)) +for i in range(0, num_regs): + outFile.write('def la%d : NVPTXReg<"%%la%d">;\n' % (i, i)) +for i in range(0, num_regs): + outFile.write('def fa%d : NVPTXReg<"%%fa%d">;\n' % (i, i)) +for i in range(0, num_regs): + outFile.write('def da%d : NVPTXReg<"%%da%d">;\n' % (i, i)) + +outFile.write(''' +//===----------------------------------------------------------------------===// +// Register classes +//===----------------------------------------------------------------------===// +''') + +outFile.write('def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%%u", 0, %d))>;\n' % (num_regs-1)) +outFile.write('def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%%u", 0, %d))>;\n' % (num_regs-1)) +outFile.write('def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%%u", 0, %d))>;\n' % (num_regs-1)) +outFile.write('def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%%u", 0, %d))>;\n' % (num_regs-1)) +outFile.write('def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%%u", 0, %d))>;\n' % (num_regs-1)) + +outFile.write('def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%%u", 0, %d))>;\n' % (num_regs-1)) +outFile.write('def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%%u", 0, %d))>;\n' % (num_regs-1)) + +outFile.write('def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%%u", 0, %d))>;\n' % (num_regs-1)) +outFile.write('def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%%u", 0, %d))>;\n' % (num_regs-1)) +outFile.write('def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%%u", 0, %d))>;\n' % (num_regs-1)) +outFile.write('def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%%u", 0, %d))>;\n' % (num_regs-1)) + +outFile.write(''' +// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used. +def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>; +''') + +outFile.write(''' +class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList, + NVPTXRegClass sClass, + int e, + string n> + : NVPTXRegClass<regTypes, alignment, regList> +{ + NVPTXRegClass scalarClass=sClass; + int elems=e; + string name=n; +} +''') + + +outFile.write('def V2F32Regs\n : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Float32Regs, 2, ".v2.f32">;\n' % (num_regs-1)) +outFile.write('def V4F32Regs\n : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Float32Regs, 4, ".v4.f32">;\n' % (num_regs-1)) + +outFile.write('def V2I32Regs\n : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Int32Regs, 2, ".v2.u32">;\n' % (num_regs-1)) +outFile.write('def V4I32Regs\n : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Int32Regs, 4, ".v4.u32">;\n' % (num_regs-1)) + +outFile.write('def V2F64Regs\n : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Float64Regs, 2, ".v2.f64">;\n' % (num_regs-1)) +outFile.write('def V2I64Regs\n : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Int64Regs, 2, ".v2.u64">;\n' % (num_regs-1)) + +outFile.write('def V2I16Regs\n : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%%u", 0, %d)),\n Int16Regs, 2, ".v2.u16">;\n' % (num_regs-1)) +outFile.write('def V4I16Regs\n : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%%u", 0, %d)),\n Int16Regs, 4, ".v4.u16">;\n' % (num_regs-1)) + +outFile.write('def V2I8Regs\n : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%%u", 0, %d)),\n Int8Regs, 2, ".v2.u8">;\n' % (num_regs-1)) +outFile.write('def V4I8Regs\n : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%%u", 0, %d)),\n Int8Regs, 4, ".v4.u8">;\n' % (num_regs-1)) + +outFile.close() + + +outFile = open('NVPTXNumRegisters.h', 'w') +outFile.write(''' +//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTX_NUM_REGISTERS_H +#define NVPTX_NUM_REGISTERS_H + +namespace llvm { + +const unsigned NVPTXNumRegisters = %d; + +} + +#endif +''' % num_regs) + +outFile.close() |