diff options
95 files changed, 97 insertions, 9086 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 7f4dea9..5dfb1ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -83,7 +83,6 @@ set(LLVM_ALL_TARGETS MSP430 NVPTX PowerPC - PTX Sparc X86 XCore diff --git a/autoconf/configure.ac b/autoconf/configure.ac index 62c01f9..e751059 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -369,7 +369,6 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch], msp430-*) llvm_cv_target_arch="MSP430" ;; hexagon-*) llvm_cv_target_arch="Hexagon" ;; mblaze-*) llvm_cv_target_arch="MBlaze" ;; - ptx-*) llvm_cv_target_arch="PTX" ;; nvptx-*) llvm_cv_target_arch="NVPTX" ;; *) llvm_cv_target_arch="Unknown" ;; esac]) @@ -403,7 +402,6 @@ case $host in msp430-*) host_arch="MSP430" ;; hexagon-*) host_arch="Hexagon" ;; mblaze-*) host_arch="MBlaze" ;; - ptx-*) host_arch="PTX" ;; *) host_arch="Unknown" ;; esac @@ -542,7 +540,6 @@ else MSP430) AC_SUBST(TARGET_HAS_JIT,0) ;; Hexagon) AC_SUBST(TARGET_HAS_JIT,0) ;; MBlaze) AC_SUBST(TARGET_HAS_JIT,0) ;; - PTX) AC_SUBST(TARGET_HAS_JIT,0) ;; NVPTX) AC_SUBST(TARGET_HAS_JIT,0) ;; *) AC_SUBST(TARGET_HAS_JIT,0) ;; esac @@ -655,13 +652,13 @@ TARGETS_TO_BUILD="" AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets], [Build specific host targets: all or target1,target2,... Valid targets are: host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon, - xcore, msp430, ptx, nvptx, and cpp (default=all)]),, + xcore, msp430, nvptx, and cpp (default=all)]),, enableval=all) if test "$enableval" = host-only ; then enableval=host fi case "$enableval" in - all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX NVPTX Hexagon" ;; + all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;; *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do case "$a_target" in x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; @@ -677,7 +674,6 @@ case "$enableval" in cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;; hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;; mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;; - ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;; nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;; host) case "$llvm_cv_target_arch" in x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; @@ -691,7 +687,6 @@ case "$enableval" in XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;; - PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;; NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;; *) AC_MSG_ERROR([Can not set target to build]) ;; esac ;; @@ -1419,7 +1419,7 @@ Optional Features: --enable-targets Build specific host targets: all or target1,target2,... Valid targets are: host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon, - xcore, msp430, ptx, nvptx, and cpp (default=all) + xcore, msp430, nvptx, and cpp (default=all) --enable-bindings Build specific language bindings: all,auto,none,{binding-name} (default=auto) --enable-libffi Check for the presence of libffi (default is NO) @@ -3901,7 +3901,6 @@ else msp430-*) llvm_cv_target_arch="MSP430" ;; hexagon-*) llvm_cv_target_arch="Hexagon" ;; mblaze-*) llvm_cv_target_arch="MBlaze" ;; - ptx-*) llvm_cv_target_arch="PTX" ;; nvptx-*) llvm_cv_target_arch="NVPTX" ;; *) llvm_cv_target_arch="Unknown" ;; esac @@ -3935,7 +3934,6 @@ case $host in msp430-*) host_arch="MSP430" ;; hexagon-*) host_arch="Hexagon" ;; mblaze-*) host_arch="MBlaze" ;; - ptx-*) host_arch="PTX" ;; *) host_arch="Unknown" ;; esac @@ -5148,8 +5146,6 @@ else ;; MBlaze) TARGET_HAS_JIT=0 ;; - PTX) TARGET_HAS_JIT=0 - ;; NVPTX) TARGET_HAS_JIT=0 ;; *) TARGET_HAS_JIT=0 @@ -5336,7 +5332,7 @@ if test "$enableval" = host-only ; then enableval=host fi case "$enableval" in - all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX NVPTX Hexagon" ;; + all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;; *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do case "$a_target" in x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; @@ -5352,7 +5348,6 @@ case "$enableval" in cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;; hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;; mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;; - ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;; nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;; host) case "$llvm_cv_target_arch" in x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; @@ -5366,7 +5361,6 @@ case "$enableval" in XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;; - PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;; NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;; *) { { echo "$as_me:$LINENO: error: Can not set target to build" >&5 echo "$as_me: error: Can not set target to build" >&2;} @@ -10349,7 +10343,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<EOF -#line 10352 "configure" +#line 10346 "configure" #include "confdefs.h" #if HAVE_DLFCN_H diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h index 957c581..6b11c7a 100644 --- a/include/llvm/ADT/Triple.h +++ b/include/llvm/ADT/Triple.h @@ -62,8 +62,6 @@ public: x86_64, // X86-64: amd64, x86_64 xcore, // XCore: xcore mblaze, // MBlaze: mblaze - ptx32, // PTX: ptx (32-bit) - ptx64, // PTX: ptx (64-bit) nvptx, // NVPTX: 32-bit nvptx64, // NVPTX: 64-bit le32, // le32: generic little-endian 32-bit CPU (PNaCl / Emscripten) diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index 75162bf..794848c 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -445,6 +445,5 @@ include "llvm/IntrinsicsX86.td" include "llvm/IntrinsicsARM.td" include "llvm/IntrinsicsCellSPU.td" include "llvm/IntrinsicsXCore.td" -include "llvm/IntrinsicsPTX.td" include "llvm/IntrinsicsHexagon.td" include "llvm/IntrinsicsNVVM.td" diff --git a/include/llvm/IntrinsicsNVVM.td b/include/llvm/IntrinsicsNVVM.td index 6936778..1853c99 100644 --- a/include/llvm/IntrinsicsNVVM.td +++ b/include/llvm/IntrinsicsNVVM.td @@ -870,3 +870,83 @@ def int_nvvm_compiler_error : Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.error">; def int_nvvm_compiler_warn : Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">; + + +// Old PTX back-end intrinsics retained here for backwards-compatibility + +multiclass PTXReadSpecialRegisterIntrinsic_v4i32<string prefix> { +// FIXME: Do we need the 128-bit integer type version? +// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem]>; + +// FIXME: Enable this once v4i32 support is enabled in back-end. +// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>; + + def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + GCCBuiltin<!strconcat(prefix, "_x")>; + def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + GCCBuiltin<!strconcat(prefix, "_y")>; + def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + GCCBuiltin<!strconcat(prefix, "_z")>; + def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + GCCBuiltin<!strconcat(prefix, "_w")>; +} + +class PTXReadSpecialRegisterIntrinsic_r32<string name> + : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + GCCBuiltin<name>; + +class PTXReadSpecialRegisterIntrinsic_r64<string name> + : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>, + GCCBuiltin<name>; + +defm int_ptx_read_tid : PTXReadSpecialRegisterIntrinsic_v4i32 + <"__builtin_ptx_read_tid">; +defm int_ptx_read_ntid : PTXReadSpecialRegisterIntrinsic_v4i32 + <"__builtin_ptx_read_ntid">; + +def int_ptx_read_laneid : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_laneid">; +def int_ptx_read_warpid : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_warpid">; +def int_ptx_read_nwarpid : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_nwarpid">; + +defm int_ptx_read_ctaid : PTXReadSpecialRegisterIntrinsic_v4i32 + <"__builtin_ptx_read_ctaid">; +defm int_ptx_read_nctaid : PTXReadSpecialRegisterIntrinsic_v4i32 + <"__builtin_ptx_read_nctaid">; + +def int_ptx_read_smid : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_smid">; +def int_ptx_read_nsmid : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_nsmid">; +def int_ptx_read_gridid : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_gridid">; + +def int_ptx_read_lanemask_eq : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_lanemask_eq">; +def int_ptx_read_lanemask_le : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_lanemask_le">; +def int_ptx_read_lanemask_lt : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_lanemask_lt">; +def int_ptx_read_lanemask_ge : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_lanemask_ge">; +def int_ptx_read_lanemask_gt : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_lanemask_gt">; + +def int_ptx_read_clock : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_clock">; +def int_ptx_read_clock64 : PTXReadSpecialRegisterIntrinsic_r64 + <"__builtin_ptx_read_clock64">; + +def int_ptx_read_pm0 : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_pm0">; +def int_ptx_read_pm1 : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_pm1">; +def int_ptx_read_pm2 : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_pm2">; +def int_ptx_read_pm3 : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_pm3">; + +def int_ptx_bar_sync : Intrinsic<[], [llvm_i32_ty], []>, + GCCBuiltin<"__builtin_ptx_bar_sync">; diff --git a/include/llvm/IntrinsicsPTX.td b/include/llvm/IntrinsicsPTX.td deleted file mode 100644 index 28379c9..0000000 --- a/include/llvm/IntrinsicsPTX.td +++ /dev/null @@ -1,92 +0,0 @@ -//===- IntrinsicsPTX.td - Defines PTX intrinsics -----------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines all of the PTX-specific intrinsics. -// -//===----------------------------------------------------------------------===// - -let TargetPrefix = "ptx" in { - multiclass PTXReadSpecialRegisterIntrinsic_v4i32<string prefix> { -// FIXME: Do we need the 128-bit integer type version? -// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem]>; - -// FIXME: Enable this once v4i32 support is enabled in back-end. -// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>; - - def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, - GCCBuiltin<!strconcat(prefix, "_x")>; - def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, - GCCBuiltin<!strconcat(prefix, "_y")>; - def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, - GCCBuiltin<!strconcat(prefix, "_z")>; - def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, - GCCBuiltin<!strconcat(prefix, "_w")>; - } - - class PTXReadSpecialRegisterIntrinsic_r32<string name> - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, - GCCBuiltin<name>; - - class PTXReadSpecialRegisterIntrinsic_r64<string name> - : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>, - GCCBuiltin<name>; -} - -defm int_ptx_read_tid : PTXReadSpecialRegisterIntrinsic_v4i32 - <"__builtin_ptx_read_tid">; -defm int_ptx_read_ntid : PTXReadSpecialRegisterIntrinsic_v4i32 - <"__builtin_ptx_read_ntid">; - -def int_ptx_read_laneid : PTXReadSpecialRegisterIntrinsic_r32 - <"__builtin_ptx_read_laneid">; -def int_ptx_read_warpid : PTXReadSpecialRegisterIntrinsic_r32 - <"__builtin_ptx_read_warpid">; -def int_ptx_read_nwarpid : PTXReadSpecialRegisterIntrinsic_r32 - <"__builtin_ptx_read_nwarpid">; - -defm int_ptx_read_ctaid : PTXReadSpecialRegisterIntrinsic_v4i32 - <"__builtin_ptx_read_ctaid">; -defm int_ptx_read_nctaid : PTXReadSpecialRegisterIntrinsic_v4i32 - <"__builtin_ptx_read_nctaid">; - -def int_ptx_read_smid : PTXReadSpecialRegisterIntrinsic_r32 - <"__builtin_ptx_read_smid">; -def int_ptx_read_nsmid : PTXReadSpecialRegisterIntrinsic_r32 - <"__builtin_ptx_read_nsmid">; -def int_ptx_read_gridid : PTXReadSpecialRegisterIntrinsic_r32 - <"__builtin_ptx_read_gridid">; - -def int_ptx_read_lanemask_eq : PTXReadSpecialRegisterIntrinsic_r32 - <"__builtin_ptx_read_lanemask_eq">; -def int_ptx_read_lanemask_le : PTXReadSpecialRegisterIntrinsic_r32 - <"__builtin_ptx_read_lanemask_le">; -def int_ptx_read_lanemask_lt : PTXReadSpecialRegisterIntrinsic_r32 - <"__builtin_ptx_read_lanemask_lt">; -def int_ptx_read_lanemask_ge : PTXReadSpecialRegisterIntrinsic_r32 - <"__builtin_ptx_read_lanemask_ge">; -def int_ptx_read_lanemask_gt : PTXReadSpecialRegisterIntrinsic_r32 - <"__builtin_ptx_read_lanemask_gt">; - -def int_ptx_read_clock : PTXReadSpecialRegisterIntrinsic_r32 - <"__builtin_ptx_read_clock">; -def int_ptx_read_clock64 : PTXReadSpecialRegisterIntrinsic_r64 - <"__builtin_ptx_read_clock64">; - -def int_ptx_read_pm0 : PTXReadSpecialRegisterIntrinsic_r32 - <"__builtin_ptx_read_pm0">; -def int_ptx_read_pm1 : PTXReadSpecialRegisterIntrinsic_r32 - <"__builtin_ptx_read_pm1">; -def int_ptx_read_pm2 : PTXReadSpecialRegisterIntrinsic_r32 - <"__builtin_ptx_read_pm2">; -def int_ptx_read_pm3 : PTXReadSpecialRegisterIntrinsic_r32 - <"__builtin_ptx_read_pm3">; - -let TargetPrefix = "ptx" in - def int_ptx_bar_sync : Intrinsic<[], [llvm_i32_ty], []>, - GCCBuiltin<"__builtin_ptx_bar_sync">; diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 868aa57..822ada7 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -38,8 +38,6 @@ const char *Triple::getArchTypeName(ArchType Kind) { case x86_64: return "x86_64"; case xcore: return "xcore"; case mblaze: return "mblaze"; - case ptx32: return "ptx32"; - case ptx64: return "ptx64"; case nvptx: return "nvptx"; case nvptx64: return "nvptx64"; case le32: return "le32"; @@ -76,8 +74,6 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case xcore: return "xcore"; - case ptx32: return "ptx"; - case ptx64: return "ptx"; case nvptx: return "nvptx"; case nvptx64: return "nvptx"; case le32: return "le32"; @@ -164,8 +160,6 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { .Case("x86", x86) .Case("x86-64", x86_64) .Case("xcore", xcore) - .Case("ptx32", ptx32) - .Case("ptx64", ptx64) .Case("nvptx", nvptx) .Case("nvptx64", nvptx64) .Case("le32", le32) @@ -198,8 +192,6 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) { .Cases("arm", "armv4t", "armv5", "armv6", Triple::arm) .Cases("armv7", "armv7f", "armv7k", "armv7s", "xscale", Triple::arm) .Case("r600", Triple::r600) - .Case("ptx32", Triple::ptx32) - .Case("ptx64", Triple::ptx64) .Case("nvptx", Triple::nvptx) .Case("nvptx64", Triple::nvptx64) .Case("amdil", Triple::amdil) @@ -223,8 +215,6 @@ const char *Triple::getArchNameForAssembler() { .Cases("armv6", "thumbv6", "armv6") .Cases("armv7", "thumbv7", "armv7") .Case("r600", "r600") - .Case("ptx32", "ptx32") - .Case("ptx64", "ptx64") .Case("nvptx", "nvptx") .Case("nvptx64", "nvptx64") .Case("le32", "le32") @@ -259,8 +249,6 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("sparcv9", Triple::sparcv9) .Case("tce", Triple::tce) .Case("xcore", Triple::xcore) - .Case("ptx32", Triple::ptx32) - .Case("ptx64", Triple::ptx64) .Case("nvptx", Triple::nvptx) .Case("nvptx64", Triple::nvptx64) .Case("le32", Triple::le32) @@ -689,7 +677,6 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::mipsel: case llvm::Triple::nvptx: case llvm::Triple::ppc: - case llvm::Triple::ptx32: case llvm::Triple::r600: case llvm::Triple::sparc: case llvm::Triple::tce: @@ -702,7 +689,6 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::mips64el: case llvm::Triple::nvptx64: case llvm::Triple::ppc64: - case llvm::Triple::ptx64: case llvm::Triple::sparcv9: case llvm::Triple::x86_64: return 64; @@ -740,7 +726,6 @@ Triple Triple::get32BitArchVariant() const { case Triple::mipsel: case Triple::nvptx: case Triple::ppc: - case Triple::ptx32: case Triple::r600: case Triple::sparc: case Triple::tce: @@ -754,7 +739,6 @@ Triple Triple::get32BitArchVariant() const { case Triple::mips64el: T.setArch(Triple::mipsel); break; case Triple::nvptx64: T.setArch(Triple::nvptx); break; case Triple::ppc64: T.setArch(Triple::ppc); break; - case Triple::ptx64: T.setArch(Triple::ptx32); break; case Triple::sparcv9: T.setArch(Triple::sparc); break; case Triple::x86_64: T.setArch(Triple::x86); break; } @@ -783,7 +767,6 @@ Triple Triple::get64BitArchVariant() const { case Triple::mips64el: case Triple::nvptx64: case Triple::ppc64: - case Triple::ptx64: case Triple::sparcv9: case Triple::x86_64: // Already 64-bit. @@ -793,7 +776,6 @@ Triple Triple::get64BitArchVariant() const { case Triple::mipsel: T.setArch(Triple::mips64el); break; case Triple::nvptx: T.setArch(Triple::nvptx64); break; case Triple::ppc: T.setArch(Triple::ppc64); break; - case Triple::ptx32: T.setArch(Triple::ptx64); break; case Triple::sparc: T.setArch(Triple::sparcv9); break; case Triple::x86: T.setArch(Triple::x86_64); break; } diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index 045ab9e..8995080 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PTX PowerPC Sparc X86 XCore +subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore ; This is a special group whose required libraries are extended (by llvm-build) ; with the best execution engine (the native JIT, if available, or the diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt deleted file mode 100644 index a3be342..0000000 --- a/lib/Target/PTX/CMakeLists.txt +++ /dev/null @@ -1,32 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS PTX.td) - -tablegen(LLVM PTXGenAsmWriter.inc -gen-asm-writer) -tablegen(LLVM PTXGenDAGISel.inc -gen-dag-isel) -tablegen(LLVM PTXGenInstrInfo.inc -gen-instr-info) -tablegen(LLVM PTXGenRegisterInfo.inc -gen-register-info) -tablegen(LLVM PTXGenSubtargetInfo.inc -gen-subtarget) -add_public_tablegen_target(PTXCommonTableGen) - -add_llvm_target(PTXCodeGen - PTXAsmPrinter.cpp - PTXISelDAGToDAG.cpp - PTXISelLowering.cpp - PTXInstrInfo.cpp - PTXFPRoundingModePass.cpp - PTXFrameLowering.cpp - PTXMCAsmStreamer.cpp - PTXMCInstLower.cpp - PTXMFInfoExtract.cpp - PTXMachineFunctionInfo.cpp - PTXParamManager.cpp - PTXRegAlloc.cpp - PTXRegisterInfo.cpp - PTXSelectionDAGInfo.cpp - PTXSubtarget.cpp - PTXTargetMachine.cpp - ) - -add_subdirectory(TargetInfo) -add_subdirectory(InstPrinter) -add_subdirectory(MCTargetDesc) - diff --git a/lib/Target/PTX/InstPrinter/CMakeLists.txt b/lib/Target/PTX/InstPrinter/CMakeLists.txt deleted file mode 100644 index b252893..0000000 --- a/lib/Target/PTX/InstPrinter/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMPTXAsmPrinter - PTXInstPrinter.cpp - ) - -add_dependencies(LLVMPTXAsmPrinter PTXCommonTableGen) - diff --git a/lib/Target/PTX/InstPrinter/LLVMBuild.txt b/lib/Target/PTX/InstPrinter/LLVMBuild.txt deleted file mode 100644 index af5d200..0000000 --- a/lib/Target/PTX/InstPrinter/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/PTX/InstPrinter/LLVMBuild.txt ---------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = PTXAsmPrinter -parent = PTX -required_libraries = MC Support -add_to_library_groups = PTX diff --git a/lib/Target/PTX/InstPrinter/Makefile b/lib/Target/PTX/InstPrinter/Makefile deleted file mode 100644 index 0ccfe44..0000000 --- a/lib/Target/PTX/InstPrinter/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/PTX/AsmPrinter/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMPTXAsmPrinter - -# Hack: we need to include 'main' ptx target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common - diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp deleted file mode 100644 index 1830213..0000000 --- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp +++ /dev/null @@ -1,249 +0,0 @@ -//===-- PTXInstPrinter.cpp - Convert PTX MCInst to assembly syntax --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class prints a PTX MCInst to a .ptx file. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asm-printer" -#include "PTXInstPrinter.h" -#include "MCTargetDesc/PTXBaseInfo.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/ADT/APFloat.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -#include "PTXGenAsmWriter.inc" - -PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) : - MCInstPrinter(MAI, MII, MRI) { - // Initialize the set of available features. - setAvailableFeatures(STI.getFeatureBits()); -} - -void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - // Decode the register number into type and offset - unsigned RegSpace = RegNo & 0x7; - unsigned RegType = (RegNo >> 3) & 0x7; - unsigned RegOffset = RegNo >> 6; - - // Print the register - OS << "%"; - - switch (RegSpace) { - default: - llvm_unreachable("Unknown register space!"); - case PTXRegisterSpace::Reg: - switch (RegType) { - default: - llvm_unreachable("Unknown register type!"); - case PTXRegisterType::Pred: - OS << "p"; - break; - case PTXRegisterType::B16: - OS << "rh"; - break; - case PTXRegisterType::B32: - OS << "r"; - break; - case PTXRegisterType::B64: - OS << "rd"; - break; - case PTXRegisterType::F32: - OS << "f"; - break; - case PTXRegisterType::F64: - OS << "fd"; - break; - } - break; - case PTXRegisterSpace::Return: - OS << "ret"; - break; - case PTXRegisterSpace::Argument: - OS << "arg"; - break; - } - - OS << RegOffset; -} - -void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { - printPredicate(MI, O); - switch (MI->getOpcode()) { - default: - printInstruction(MI, O); - break; - case PTX::CALL: - printCall(MI, O); - } - O << ";"; - printAnnotation(O, Annot); -} - -void PTXInstPrinter::printPredicate(const MCInst *MI, raw_ostream &O) { - // The last two operands are the predicate operands - int RegIndex; - int OpIndex; - - if (MI->getOpcode() == PTX::CALL) { - RegIndex = 0; - OpIndex = 1; - } else { - RegIndex = MI->getNumOperands()-2; - OpIndex = MI->getNumOperands()-1; - } - - int PredOp = MI->getOperand(OpIndex).getImm(); - if (PredOp == PTXPredicate::None) - return; - - if (PredOp == PTXPredicate::Negate) - O << '!'; - else - O << '@'; - - printOperand(MI, RegIndex, O); -} - -void PTXInstPrinter::printCall(const MCInst *MI, raw_ostream &O) { - O << "\tcall.uni\t"; - // The first two operands are the predicate slot - unsigned Index = 2; - unsigned NumRets = MI->getOperand(Index++).getImm(); - - if (NumRets > 0) { - O << "("; - printOperand(MI, Index++, O); - for (unsigned i = 1; i < NumRets; ++i) { - O << ", "; - printOperand(MI, Index++, O); - } - O << "), "; - } - - const MCExpr* Expr = MI->getOperand(Index++).getExpr(); - unsigned NumArgs = MI->getOperand(Index++).getImm(); - - // if the function call is to printf or puts, change to vprintf - if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) { - const MCSymbol &Sym = SymRefExpr->getSymbol(); - if (Sym.getName() == "printf" || Sym.getName() == "puts") { - O << "vprintf"; - } else { - O << Sym.getName(); - } - } else { - O << *Expr; - } - - O << ", ("; - - if (NumArgs > 0) { - printOperand(MI, Index++, O); - for (unsigned i = 1; i < NumArgs; ++i) { - O << ", "; - printOperand(MI, Index++, O); - } - } - O << ")"; -} - -void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isImm()) { - O << Op.getImm(); - } else if (Op.isFPImm()) { - double Imm = Op.getFPImm(); - APFloat FPImm(Imm); - APInt FPIntImm = FPImm.bitcastToAPInt(); - O << "0D"; - // PTX requires us to output the full 64 bits, even if the number is zero - if (FPIntImm.getZExtValue() > 0) { - O << FPIntImm.toString(16, false); - } else { - O << "0000000000000000"; - } - } else if (Op.isReg()) { - printRegName(O, Op.getReg()); - } else { - assert(Op.isExpr() && "unknown operand kind in printOperand"); - const MCExpr *Expr = Op.getExpr(); - if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) { - const MCSymbol &Sym = SymRefExpr->getSymbol(); - O << Sym.getName(); - } else { - O << *Op.getExpr(); - } - } -} - -void PTXInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - // By definition, operand OpNo+1 is an i32imm - const MCOperand &Op2 = MI->getOperand(OpNo+1); - printOperand(MI, OpNo, O); - if (Op2.getImm() == 0) - return; // don't print "+0" - O << "+" << Op2.getImm(); -} - -void PTXInstPrinter::printRoundingMode(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - assert (Op.isImm() && "Rounding modes must be immediate values"); - switch (Op.getImm()) { - default: - llvm_unreachable("Unknown rounding mode!"); - case PTXRoundingMode::RndDefault: - llvm_unreachable("FP rounding-mode pass did not handle instruction!"); - case PTXRoundingMode::RndNone: - // Do not print anything. - break; - case PTXRoundingMode::RndNearestEven: - O << ".rn"; - break; - case PTXRoundingMode::RndTowardsZero: - O << ".rz"; - break; - case PTXRoundingMode::RndNegInf: - O << ".rm"; - break; - case PTXRoundingMode::RndPosInf: - O << ".rp"; - break; - case PTXRoundingMode::RndApprox: - O << ".approx"; - break; - case PTXRoundingMode::RndNearestEvenInt: - O << ".rni"; - break; - case PTXRoundingMode::RndTowardsZeroInt: - O << ".rzi"; - break; - case PTXRoundingMode::RndNegInfInt: - O << ".rmi"; - break; - case PTXRoundingMode::RndPosInfInt: - O << ".rpi"; - break; - } -} - diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.h b/lib/Target/PTX/InstPrinter/PTXInstPrinter.h deleted file mode 100644 index ea4d504..0000000 --- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.h +++ /dev/null @@ -1,45 +0,0 @@ -//===- PTXInstPrinter.h - Convert PTX MCInst to assembly syntax -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class prints n PTX MCInst to a .ptx file. -// -//===----------------------------------------------------------------------===// - -#ifndef PTXINSTPRINTER_H -#define PTXINSTPRINTER_H - -#include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCSubtargetInfo.h" - -namespace llvm { - -class MCOperand; - -class PTXInstPrinter : public MCInstPrinter { -public: - PTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); - - virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); - virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - - // Autogenerated by tblgen. - void printInstruction(const MCInst *MI, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo); - - void printPredicate(const MCInst *MI, raw_ostream &O); - void printCall(const MCInst *MI, raw_ostream &O); - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printRoundingMode(const MCInst *MI, unsigned OpNo, raw_ostream &O); -}; -} - -#endif - diff --git a/lib/Target/PTX/LLVMBuild.txt b/lib/Target/PTX/LLVMBuild.txt deleted file mode 100644 index 15a1eb5..0000000 --- a/lib/Target/PTX/LLVMBuild.txt +++ /dev/null @@ -1,32 +0,0 @@ -;===- ./lib/Target/PTX/LLVMBuild.txt ---------------------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[common] -subdirectories = InstPrinter MCTargetDesc TargetInfo - -[component_0] -type = TargetGroup -name = PTX -parent = Target -has_asmprinter = 1 - -[component_1] -type = Library -name = PTXCodeGen -parent = PTX -required_libraries = Analysis AsmPrinter CodeGen Core MC PTXDesc PTXInfo SelectionDAG Support Target TransformUtils -add_to_library_groups = PTX diff --git a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index d1fd74c..0000000 --- a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -add_llvm_library(LLVMPTXDesc - PTXMCTargetDesc.cpp - PTXMCAsmInfo.cpp - ) - -add_dependencies(LLVMPTXDesc PTXCommonTableGen) diff --git a/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt b/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt deleted file mode 100644 index 19b80c5..0000000 --- a/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/PTX/MCTargetDesc/LLVMBuild.txt --------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = PTXDesc -parent = PTX -required_libraries = MC PTXAsmPrinter PTXInfo Support -add_to_library_groups = PTX diff --git a/lib/Target/PTX/MCTargetDesc/Makefile b/lib/Target/PTX/MCTargetDesc/Makefile deleted file mode 100644 index 35f5a7b..0000000 --- a/lib/Target/PTX/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/PTX/TargetDesc/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMPTXDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h deleted file mode 100644 index a3e0f32..0000000 --- a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h +++ /dev/null @@ -1,134 +0,0 @@ -//===-- PTXBaseInfo.h - Top level definitions for PTX -------- --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains small standalone helper functions and enum definitions for -// the PTX target useful for the compiler back-end and the MC libraries. -// As such, it deliberately does not include references to LLVM core -// code gen types, passes, etc.. -// -//===----------------------------------------------------------------------===// - -#ifndef PTXBASEINFO_H -#define PTXBASEINFO_H - -#include "PTXMCTargetDesc.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" - -namespace llvm { - namespace PTXStateSpace { - enum { - Global = 0, // default to global state space - Constant = 1, - Local = 2, - Parameter = 3, - Shared = 4 - }; - } // namespace PTXStateSpace - - namespace PTXPredicate { - enum { - Normal = 0, - Negate = 1, - None = 2 - }; - } // namespace PTXPredicate - - /// Namespace to hold all target-specific flags. - namespace PTXRoundingMode { - // Instruction Flags - enum { - // Rounding Mode Flags - RndMask = 15, - RndDefault = 0, // --- - RndNone = 1, // <NONE> - RndNearestEven = 2, // .rn - RndTowardsZero = 3, // .rz - RndNegInf = 4, // .rm - RndPosInf = 5, // .rp - RndApprox = 6, // .approx - RndNearestEvenInt = 7, // .rni - RndTowardsZeroInt = 8, // .rzi - RndNegInfInt = 9, // .rmi - RndPosInfInt = 10 // .rpi - }; - } // namespace PTXII - - namespace PTXRegisterType { - // Register type encoded in MCOperands - enum { - Pred = 0, - B16, - B32, - B64, - F32, - F64 - }; - } // namespace PTXRegisterType - - namespace PTXRegisterSpace { - // Register space encoded in MCOperands - enum { - Reg = 0, - Local, - Param, - Argument, - Return - }; - } - - inline static void decodeRegisterName(raw_ostream &OS, - unsigned EncodedReg) { - OS << "%"; - - unsigned RegSpace = EncodedReg & 0x7; - unsigned RegType = (EncodedReg >> 3) & 0x7; - unsigned RegOffset = EncodedReg >> 6; - - switch (RegSpace) { - default: - llvm_unreachable("Unknown register space!"); - case PTXRegisterSpace::Reg: - switch (RegType) { - default: - llvm_unreachable("Unknown register type!"); - case PTXRegisterType::Pred: - OS << "p"; - break; - case PTXRegisterType::B16: - OS << "rh"; - break; - case PTXRegisterType::B32: - OS << "r"; - break; - case PTXRegisterType::B64: - OS << "rd"; - break; - case PTXRegisterType::F32: - OS << "f"; - break; - case PTXRegisterType::F64: - OS << "fd"; - break; - } - break; - case PTXRegisterSpace::Return: - OS << "ret"; - break; - case PTXRegisterSpace::Argument: - OS << "arg"; - break; - } - - OS << RegOffset; - } -} // namespace llvm - -#endif - diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp deleted file mode 100644 index cdfbc80..0000000 --- a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp +++ /dev/null @@ -1,37 +0,0 @@ -//===-- PTXMCAsmInfo.cpp - PTX asm properties -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declarations of the PTXMCAsmInfo properties. -// -//===----------------------------------------------------------------------===// - -#include "PTXMCAsmInfo.h" -#include "llvm/ADT/Triple.h" - -using namespace llvm; - -void PTXMCAsmInfo::anchor() { } - -PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) { - Triple TheTriple(TT); - if (TheTriple.getArch() == Triple::ptx64) - PointerSize = 8; - - CommentString = "//"; - - PrivateGlobalPrefix = "$L__"; - - AllowPeriodsInName = false; - - HasSetDirective = false; - - HasDotTypeDotSizeDirective = false; - - HasSingleParameterDotFile = false; -} diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h deleted file mode 100644 index 32ca069..0000000 --- a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h +++ /dev/null @@ -1,30 +0,0 @@ -//===-- PTXMCAsmInfo.h - PTX asm properties --------------------*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the PTXMCAsmInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_MCASM_INFO_H -#define PTX_MCASM_INFO_H - -#include "llvm/MC/MCAsmInfo.h" - -namespace llvm { - class Target; - class StringRef; - - class PTXMCAsmInfo : public MCAsmInfo { - virtual void anchor(); - public: - explicit PTXMCAsmInfo(const Target &T, const StringRef &TT); - }; -} // namespace llvm - -#endif // PTX_MCASM_INFO_H diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp deleted file mode 100644 index 08fb970..0000000 --- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp +++ /dev/null @@ -1,98 +0,0 @@ -//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides PTX specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#include "PTXMCTargetDesc.h" -#include "PTXMCAsmInfo.h" -#include "InstPrinter/PTXInstPrinter.h" -#include "llvm/MC/MCCodeGenInfo.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_INSTRINFO_MC_DESC -#include "PTXGenInstrInfo.inc" - -#define GET_SUBTARGETINFO_MC_DESC -#include "PTXGenSubtargetInfo.inc" - -#define GET_REGINFO_MC_DESC -#include "PTXGenRegisterInfo.inc" - -using namespace llvm; - -static MCInstrInfo *createPTXMCInstrInfo() { - MCInstrInfo *X = new MCInstrInfo(); - InitPTXMCInstrInfo(X); - return X; -} - -static MCRegisterInfo *createPTXMCRegisterInfo(StringRef TT) { - MCRegisterInfo *X = new MCRegisterInfo(); - // PTX does not have a return address register. - InitPTXMCRegisterInfo(X, 0); - return X; -} - -static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU, - StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitPTXMCSubtargetInfo(X, TT, CPU, FS); - return X; -} - -static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) { - MCCodeGenInfo *X = new MCCodeGenInfo(); - X->InitMCCodeGenInfo(RM, CM, OL); - return X; -} - -static MCInstPrinter *createPTXMCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { - assert(SyntaxVariant == 0 && "We only have one syntax variant"); - return new PTXInstPrinter(MAI, MII, MRI, STI); -} - -extern "C" void LLVMInitializePTXTargetMC() { - // Register the MC asm info. - RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target); - RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(ThePTX32Target, createPTXMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(ThePTX64Target, createPTXMCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(ThePTX32Target, createPTXMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(ThePTX64Target, createPTXMCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target, - createPTXMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target, - createPTXMCSubtargetInfo); - - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(ThePTX32Target, createPTXMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(ThePTX64Target, createPTXMCInstPrinter); -} diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h deleted file mode 100644 index 542638a..0000000 --- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h +++ /dev/null @@ -1,36 +0,0 @@ -//===-- PTXMCTargetDesc.h - PTX Target Descriptions ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides PTX specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#ifndef PTXMCTARGETDESC_H -#define PTXMCTARGETDESC_H - -namespace llvm { -class Target; - -extern Target ThePTX32Target; -extern Target ThePTX64Target; - -} // End llvm namespace - -// Defines symbolic names for PTX registers. -#define GET_REGINFO_ENUM -#include "PTXGenRegisterInfo.inc" - -// Defines symbolic names for the PTX instructions. -#define GET_INSTRINFO_ENUM -#include "PTXGenInstrInfo.inc" - -#define GET_SUBTARGETINFO_ENUM -#include "PTXGenSubtargetInfo.inc" - -#endif diff --git a/lib/Target/PTX/Makefile b/lib/Target/PTX/Makefile deleted file mode 100644 index fa09634..0000000 --- a/lib/Target/PTX/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -##===- lib/Target/PTX/Makefile -----------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMPTXCodeGen -TARGET = PTX - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = PTXGenAsmWriter.inc \ - PTXGenDAGISel.inc \ - PTXGenInstrInfo.inc \ - PTXGenRegisterInfo.inc \ - PTXGenSubtargetInfo.inc - -DIRS = InstPrinter TargetInfo MCTargetDesc - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h deleted file mode 100644 index ffb92cb..0000000 --- a/lib/Target/PTX/PTX.h +++ /dev/null @@ -1,43 +0,0 @@ -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the entry points for global functions defined in the LLVM -// PTX back-end. -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_H -#define PTX_H - -#include "MCTargetDesc/PTXBaseInfo.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - class MachineInstr; - class MCInst; - class PTXAsmPrinter; - class PTXTargetMachine; - class FunctionPass; - - FunctionPass *createPTXISelDag(PTXTargetMachine &TM, - CodeGenOpt::Level OptLevel); - - FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM, - CodeGenOpt::Level OptLevel); - - FunctionPass *createPTXFPRoundingModePass(PTXTargetMachine &TM, - CodeGenOpt::Level OptLevel); - - FunctionPass *createPTXRegisterAllocator(); - - void LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, - PTXAsmPrinter &AP); - -} // namespace llvm; - -#endif // PTX_H diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td deleted file mode 100644 index 994a68e..0000000 --- a/lib/Target/PTX/PTX.td +++ /dev/null @@ -1,141 +0,0 @@ -//===-- PTX.td - Describe the PTX Target Machine -----------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This is the top level entry point for the PTX target. -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Target-independent interfaces -//===----------------------------------------------------------------------===// - -include "llvm/Target/Target.td" - -//===----------------------------------------------------------------------===// -// Subtarget Features -//===----------------------------------------------------------------------===// - -//===- Architectural Features ---------------------------------------------===// - -def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true", - "Do not demote .f64 to .f32">; - -def FeatureNoFMA : SubtargetFeature<"no-fma","SupportsFMA", "false", - "Disable Fused-Multiply Add">; - -//===- PTX Version --------------------------------------------------------===// - -def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0", - "Use PTX Language Version 2.0">; - -def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1", - "Use PTX Language Version 2.1">; - -def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2", - "Use PTX Language Version 2.2">; - -def FeaturePTX23 : SubtargetFeature<"ptx23", "PTXVersion", "PTX_VERSION_2_3", - "Use PTX Language Version 2.3">; - -//===- PTX Target ---------------------------------------------------------===// - -def FeatureSM10 : SubtargetFeature<"sm10", "PTXTarget", "PTX_SM_1_0", - "Use Shader Model 1.0">; -def FeatureSM11 : SubtargetFeature<"sm11", "PTXTarget", "PTX_SM_1_1", - "Use Shader Model 1.1">; -def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2", - "Use Shader Model 1.2">; -def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3", - "Use Shader Model 1.3">; -def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0", - "Use Shader Model 2.0", [FeatureDouble]>; -def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1", - "Use Shader Model 2.1", [FeatureDouble]>; -def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2", - "Use Shader Model 2.2", [FeatureDouble]>; -def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3", - "Use Shader Model 2.3", [FeatureDouble]>; - -def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget", - "PTX_COMPUTE_1_0", - "Use Compute Compatibility 1.0">; -def FeatureCOMPUTE11 : SubtargetFeature<"compute11", "PTXTarget", - "PTX_COMPUTE_1_1", - "Use Compute Compatibility 1.1">; -def FeatureCOMPUTE12 : SubtargetFeature<"compute12", "PTXTarget", - "PTX_COMPUTE_1_2", - "Use Compute Compatibility 1.2">; -def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget", - "PTX_COMPUTE_1_3", - "Use Compute Compatibility 1.3">; -def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget", - "PTX_COMPUTE_2_0", - "Use Compute Compatibility 2.0", - [FeatureDouble]>; - -//===----------------------------------------------------------------------===// -// PTX supported processors -//===----------------------------------------------------------------------===// - -class Proc<string Name, list<SubtargetFeature> Features> - : Processor<Name, NoItineraries, Features>; - -def : Proc<"generic", []>; - -// Processor definitions for compute/shader models -def : Proc<"compute_10", [FeatureCOMPUTE10]>; -def : Proc<"compute_11", [FeatureCOMPUTE11]>; -def : Proc<"compute_12", [FeatureCOMPUTE12]>; -def : Proc<"compute_13", [FeatureCOMPUTE13]>; -def : Proc<"compute_20", [FeatureCOMPUTE20]>; -def : Proc<"sm_10", [FeatureSM10]>; -def : Proc<"sm_11", [FeatureSM11]>; -def : Proc<"sm_12", [FeatureSM12]>; -def : Proc<"sm_13", [FeatureSM13]>; -def : Proc<"sm_20", [FeatureSM20]>; -def : Proc<"sm_21", [FeatureSM21]>; -def : Proc<"sm_22", [FeatureSM22]>; -def : Proc<"sm_23", [FeatureSM23]>; - -// Processor definitions for common GPU architectures -def : Proc<"g80", [FeatureSM10]>; -def : Proc<"gt200", [FeatureSM13]>; -def : Proc<"gf100", [FeatureSM20, FeatureDouble]>; -def : Proc<"fermi", [FeatureSM20, FeatureDouble]>; - -//===----------------------------------------------------------------------===// -// Register File Description -//===----------------------------------------------------------------------===// - -include "PTXRegisterInfo.td" - -//===----------------------------------------------------------------------===// -// Instruction Descriptions -//===----------------------------------------------------------------------===// - -include "PTXInstrInfo.td" - -def PTXInstrInfo : InstrInfo; - -//===----------------------------------------------------------------------===// -// Assembly printer -//===----------------------------------------------------------------------===// -// PTX uses the MC printer for asm output, so make sure the TableGen -// AsmWriter bits get associated with the correct class. -def PTXAsmWriter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - bit isMCAsmWriter = 1; -} - -//===----------------------------------------------------------------------===// -// Target Declaration -//===----------------------------------------------------------------------===// - -def PTX : Target { - let InstructionSet = PTXInstrInfo; - let AssemblyWriters = [PTXAsmWriter]; -} diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp deleted file mode 100644 index 0b6ac7b..0000000 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ /dev/null @@ -1,561 +0,0 @@ -//===-- PTXAsmPrinter.cpp - PTX LLVM assembly writer ----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to PTX assembly language. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "ptx-asm-printer" - -#include "PTXAsmPrinter.h" -#include "PTX.h" -#include "PTXMachineFunctionInfo.h" -#include "PTXParamManager.h" -#include "PTXRegisterInfo.h" -#include "PTXTargetMachine.h" -#include "llvm/Argument.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Module.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -static const char PARAM_PREFIX[] = "__param_"; -static const char RETURN_PREFIX[] = "__ret_"; - -static const char *getRegisterTypeName(unsigned RegType) { - switch (RegType) { - default: - llvm_unreachable("Unknown register type"); - case PTXRegisterType::Pred: - return ".pred"; - case PTXRegisterType::B16: - return ".b16"; - case PTXRegisterType::B32: - return ".b32"; - case PTXRegisterType::B64: - return ".b64"; - case PTXRegisterType::F32: - return ".f32"; - case PTXRegisterType::F64: - return ".f64"; - } -} - -static const char *getStateSpaceName(unsigned addressSpace) { - switch (addressSpace) { - default: llvm_unreachable("Unknown state space"); - case PTXStateSpace::Global: return "global"; - case PTXStateSpace::Constant: return "const"; - case PTXStateSpace::Local: return "local"; - case PTXStateSpace::Parameter: return "param"; - case PTXStateSpace::Shared: return "shared"; - } -} - -static const char *getTypeName(Type* type) { - while (true) { - switch (type->getTypeID()) { - default: llvm_unreachable("Unknown type"); - case Type::FloatTyID: return ".f32"; - case Type::DoubleTyID: return ".f64"; - case Type::IntegerTyID: - switch (type->getPrimitiveSizeInBits()) { - default: llvm_unreachable("Unknown integer bit-width"); - case 16: return ".u16"; - case 32: return ".u32"; - case 64: return ".u64"; - } - case Type::ArrayTyID: - case Type::PointerTyID: - type = dyn_cast<SequentialType>(type)->getElementType(); - break; - } - } - return NULL; -} - -bool PTXAsmPrinter::doFinalization(Module &M) { - // XXX Temproarily remove global variables so that doFinalization() will not - // emit them again (global variables are emitted at beginning). - - Module::GlobalListType &global_list = M.getGlobalList(); - int i, n = global_list.size(); - GlobalVariable **gv_array = new GlobalVariable* [n]; - - // first, back-up GlobalVariable in gv_array - i = 0; - for (Module::global_iterator I = global_list.begin(), E = global_list.end(); - I != E; ++I) - gv_array[i++] = &*I; - - // second, empty global_list - while (!global_list.empty()) - global_list.remove(global_list.begin()); - - // call doFinalization - bool ret = AsmPrinter::doFinalization(M); - - // now we restore global variables - for (i = 0; i < n; i ++) - global_list.insert(global_list.end(), gv_array[i]); - - delete[] gv_array; - return ret; -} - -void PTXAsmPrinter::EmitStartOfAsmFile(Module &M) -{ - const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); - - // Emit the PTX .version and .target attributes - OutStreamer.EmitRawText(Twine("\t.version ") + ST.getPTXVersionString()); - OutStreamer.EmitRawText(Twine("\t.target ") + ST.getTargetString() + - (ST.supportsDouble() ? "" - : ", map_f64_to_f32")); - // .address_size directive is optional, but it must immediately follow - // the .target directive if present within a module - if (ST.supportsPTX23()) { - const char *addrSize = ST.is64Bit() ? "64" : "32"; - OutStreamer.EmitRawText(Twine("\t.address_size ") + addrSize); - } - - OutStreamer.AddBlankLine(); - - // Define any .file directives - DebugInfoFinder DbgFinder; - DbgFinder.processModule(M); - - for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), - E = DbgFinder.compile_unit_end(); I != E; ++I) { - DICompileUnit DIUnit(*I); - StringRef FN = DIUnit.getFilename(); - StringRef Dir = DIUnit.getDirectory(); - GetOrCreateSourceID(FN, Dir); - } - - OutStreamer.AddBlankLine(); - - // declare external functions - for (Module::const_iterator i = M.begin(), e = M.end(); - i != e; ++i) - EmitFunctionDeclaration(i); - - // declare global variables - for (Module::const_global_iterator i = M.global_begin(), e = M.global_end(); - i != e; ++i) - EmitVariableDeclaration(i); -} - -void PTXAsmPrinter::EmitFunctionBodyStart() { - OutStreamer.EmitRawText(Twine("{")); - - const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); - const PTXParamManager &PM = MFI->getParamManager(); - - // Print register definitions - SmallString<128> regDefs; - raw_svector_ostream os(regDefs); - unsigned numRegs; - - // pred - numRegs = MFI->countRegisters(PTXRegisterType::Pred, PTXRegisterSpace::Reg); - if(numRegs > 0) - os << "\t.reg .pred %p<" << numRegs << ">;\n"; - - // i16 - numRegs = MFI->countRegisters(PTXRegisterType::B16, PTXRegisterSpace::Reg); - if(numRegs > 0) - os << "\t.reg .b16 %rh<" << numRegs << ">;\n"; - - // i32 - numRegs = MFI->countRegisters(PTXRegisterType::B32, PTXRegisterSpace::Reg); - if(numRegs > 0) - os << "\t.reg .b32 %r<" << numRegs << ">;\n"; - - // i64 - numRegs = MFI->countRegisters(PTXRegisterType::B64, PTXRegisterSpace::Reg); - if(numRegs > 0) - os << "\t.reg .b64 %rd<" << numRegs << ">;\n"; - - // f32 - numRegs = MFI->countRegisters(PTXRegisterType::F32, PTXRegisterSpace::Reg); - if(numRegs > 0) - os << "\t.reg .f32 %f<" << numRegs << ">;\n"; - - // f64 - numRegs = MFI->countRegisters(PTXRegisterType::F64, PTXRegisterSpace::Reg); - if(numRegs > 0) - os << "\t.reg .f64 %fd<" << numRegs << ">;\n"; - - // Local params - for (PTXParamManager::param_iterator i = PM.local_begin(), e = PM.local_end(); - i != e; ++i) - os << "\t.param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i) - << ";\n"; - - OutStreamer.EmitRawText(os.str()); - - - const MachineFrameInfo* FrameInfo = MF->getFrameInfo(); - DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects() - << " frame object(s)\n"); - for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) { - DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n"); - if (FrameInfo->getObjectSize(i) > 0) { - OutStreamer.EmitRawText("\t.local .align " + - Twine(FrameInfo->getObjectAlignment(i)) + - " .b8 __local" + - Twine(i) + - "[" + - Twine(FrameInfo->getObjectSize(i)) + - "];"); - } - } - - //unsigned Index = 1; - // Print parameter passing params - //for (PTXMachineFunctionInfo::param_iterator - // i = MFI->paramBegin(), e = MFI->paramEnd(); i != e; ++i) { - // std::string def = "\t.param .b"; - // def += utostr(*i); - // def += " __ret_"; - // def += utostr(Index); - // Index++; - // def += ";"; - // OutStreamer.EmitRawText(Twine(def)); - //} -} - -void PTXAsmPrinter::EmitFunctionBodyEnd() { - OutStreamer.EmitRawText(Twine("}")); -} - -void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { - MCInst TmpInst; - LowerPTXMachineInstrToMCInst(MI, TmpInst, *this); - OutStreamer.EmitInstruction(TmpInst); -} - -void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { - // Check to see if this is a special global used by LLVM, if so, emit it. - if (EmitSpecialLLVMGlobal(gv)) - return; - - MCSymbol *gvsym = Mang->getSymbol(gv); - - assert(gvsym->isUndefined() && "Cannot define a symbol twice!"); - - SmallString<128> decl; - raw_svector_ostream os(decl); - - // check if it is defined in some other translation unit - if (gv->isDeclaration()) - os << ".extern "; - - // state space: e.g., .global - os << '.' << getStateSpaceName(gv->getType()->getAddressSpace()) << ' '; - - // alignment (optional) - unsigned alignment = gv->getAlignment(); - if (alignment != 0) - os << ".align " << gv->getAlignment() << ' '; - - - if (PointerType::classof(gv->getType())) { - PointerType* pointerTy = dyn_cast<PointerType>(gv->getType()); - Type* elementTy = pointerTy->getElementType(); - - if (elementTy->isArrayTy()) { - assert(elementTy->isArrayTy() && "Only pointers to arrays are supported"); - - ArrayType* arrayTy = dyn_cast<ArrayType>(elementTy); - elementTy = arrayTy->getElementType(); - - unsigned numElements = arrayTy->getNumElements(); - - while (elementTy->isArrayTy()) { - arrayTy = dyn_cast<ArrayType>(elementTy); - elementTy = arrayTy->getElementType(); - - numElements *= arrayTy->getNumElements(); - } - - // FIXME: isPrimitiveType() == false for i16? - assert(elementTy->isSingleValueType() && - "Non-primitive types are not handled"); - - // Find the size of the element in bits - unsigned elementSize = elementTy->getPrimitiveSizeInBits(); - - os << ".b" << elementSize << ' ' << gvsym->getName() - << '[' << numElements << ']'; - } else { - os << ".b8" << gvsym->getName() << "[]"; - } - - // handle string constants (assume ConstantArray means string) - if (gv->hasInitializer()) { - const Constant *C = gv->getInitializer(); - if (const ConstantArray *CA = dyn_cast<ConstantArray>(C)) { - os << " = {"; - - for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { - if (i > 0) - os << ','; - - os << "0x"; - os.write_hex(cast<ConstantInt>(CA->getOperand(i))->getZExtValue()); - } - - os << '}'; - } - } - } else { - // Note: this is currently the fall-through case and most likely generates - // incorrect code. - os << getTypeName(gv->getType()) << ' ' << gvsym->getName(); - - if (isa<ArrayType>(gv->getType()) || isa<PointerType>(gv->getType())) - os << "[]"; - } - - os << ';'; - - OutStreamer.EmitRawText(os.str()); - OutStreamer.AddBlankLine(); -} - -void PTXAsmPrinter::EmitFunctionEntryLabel() { - // The function label could have already been emitted if two symbols end up - // conflicting due to asm renaming. Detect this and emit an error. - if (!CurrentFnSym->isUndefined()) - report_fatal_error("'" + Twine(CurrentFnSym->getName()) + - "' label emitted multiple times to assembly file"); - - const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); - const PTXParamManager &PM = MFI->getParamManager(); - const bool isKernel = MFI->isKernel(); - const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); - - SmallString<128> decl; - raw_svector_ostream os(decl); - os << (isKernel ? ".entry" : ".func"); - - if (!isKernel) { - os << " ("; - if (ST.useParamSpaceForDeviceArgs()) { - for (PTXParamManager::param_iterator i = PM.ret_begin(), e = PM.ret_end(), - b = i; i != e; ++i) { - if (i != b) - os << ", "; - - os << ".param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i); - } - } else { - for (PTXMachineFunctionInfo::reg_iterator - i = MFI->retreg_begin(), e = MFI->retreg_end(), b = i; - i != e; ++i) { - if (i != b) - os << ", "; - - os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' ' - << MFI->getRegisterName(*i); - } - } - os << ')'; - } - - // Print function name - os << ' ' << CurrentFnSym->getName() << " ("; - - const Function *F = MF->getFunction(); - - // Print parameters - if (isKernel || ST.useParamSpaceForDeviceArgs()) { - /*for (PTXParamManager::param_iterator i = PM.arg_begin(), e = PM.arg_end(), - b = i; i != e; ++i) { - if (i != b) - os << ", "; - - os << ".param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i); - }*/ - int Counter = 1; - for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(), - b = i; i != e; ++i) { - if (i != b) - os << ", "; - const Type *ArgType = (*i).getType(); - os << ".param .b"; - if (ArgType->isPointerTy()) { - if (ST.is64Bit()) - os << "64"; - else - os << "32"; - } else { - os << ArgType->getPrimitiveSizeInBits(); - } - if (ArgType->isPointerTy() && ST.emitPtrAttribute()) { - const PointerType *PtrType = dyn_cast<const PointerType>(ArgType); - os << " .ptr"; - switch (PtrType->getAddressSpace()) { - default: - llvm_unreachable("Unknown address space in argument"); - case PTXStateSpace::Global: - os << " .global"; - break; - case PTXStateSpace::Shared: - os << " .shared"; - break; - } - } - os << " __param_" << Counter++; - } - } else { - for (PTXMachineFunctionInfo::reg_iterator - i = MFI->argreg_begin(), e = MFI->argreg_end(), b = i; - i != e; ++i) { - if (i != b) - os << ", "; - - os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' ' - << MFI->getRegisterName(*i); - } - } - os << ')'; - - OutStreamer.EmitRawText(os.str()); -} - -void PTXAsmPrinter::EmitFunctionDeclaration(const Function* func) -{ - const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); - - std::string decl = ""; - - // hard-coded emission of extern vprintf function - - if (func->getName() == "printf" || func->getName() == "puts") { - decl += ".extern .func (.param .b32 __param_1) vprintf (.param .b"; - if (ST.is64Bit()) - decl += "64"; - else - decl += "32"; - decl += " __param_2, .param .b"; - if (ST.is64Bit()) - decl += "64"; - else - decl += "32"; - decl += " __param_3)\n"; - } - - OutStreamer.EmitRawText(Twine(decl)); -} - -unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName, - StringRef DirName) { - // If FE did not provide a file name, then assume stdin. - if (FileName.empty()) - return GetOrCreateSourceID("<stdin>", StringRef()); - - // MCStream expects full path name as filename. - if (!DirName.empty() && !sys::path::is_absolute(FileName)) { - SmallString<128> FullPathName = DirName; - sys::path::append(FullPathName, FileName); - // Here FullPathName will be copied into StringMap by GetOrCreateSourceID. - return GetOrCreateSourceID(StringRef(FullPathName), StringRef()); - } - - StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName); - if (Entry.getValue()) - return Entry.getValue(); - - unsigned SrcId = SourceIdMap.size(); - Entry.setValue(SrcId); - - // Print out a .file directive to specify files for .loc directives. - OutStreamer.EmitDwarfFileDirective(SrcId, "", Entry.getKey()); - - return SrcId; -} - -MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO, - const MCSymbol *Symbol) { - const MCExpr *Expr; - Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext); - return MCOperand::CreateExpr(Expr); -} - -MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) { - MCOperand MCOp; - const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); - unsigned EncodedReg; - switch (MO.getType()) { - default: - llvm_unreachable("Unknown operand type"); - case MachineOperand::MO_Register: - if (MO.getReg() > 0) { - // Encode the register - EncodedReg = MFI->getEncodedRegister(MO.getReg()); - } else { - EncodedReg = 0; - } - MCOp = MCOperand::CreateReg(EncodedReg); - break; - case MachineOperand::MO_Immediate: - MCOp = MCOperand::CreateImm(MO.getImm()); - break; - case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( - MO.getMBB()->getSymbol(), OutContext)); - break; - case MachineOperand::MO_GlobalAddress: - MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal())); - break; - case MachineOperand::MO_ExternalSymbol: - MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName())); - break; - case MachineOperand::MO_FPImmediate: - APFloat Val = MO.getFPImm()->getValueAPF(); - bool ignored; - Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored); - MCOp = MCOperand::CreateFPImm(Val.convertToDouble()); - break; - } - - return MCOp; -} - -// Force static initialization. -extern "C" void LLVMInitializePTXAsmPrinter() { - RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target); - RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target); -} diff --git a/lib/Target/PTX/PTXAsmPrinter.h b/lib/Target/PTX/PTXAsmPrinter.h deleted file mode 100644 index 74c8d58..0000000 --- a/lib/Target/PTX/PTXAsmPrinter.h +++ /dev/null @@ -1,57 +0,0 @@ -//===-- PTXAsmPrinter.h - Print machine code to a PTX file ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// PTX Assembly printer class. -// -//===----------------------------------------------------------------------===// - -#ifndef PTXASMPRINTER_H -#define PTXASMPRINTER_H - -#include "PTX.h" -#include "PTXTargetMachine.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/Support/Compiler.h" - -namespace llvm { - -class MCOperand; - -class LLVM_LIBRARY_VISIBILITY PTXAsmPrinter : public AsmPrinter { -public: - explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) {} - - const char *getPassName() const { return "PTX Assembly Printer"; } - - bool doFinalization(Module &M); - - virtual void EmitStartOfAsmFile(Module &M); - virtual void EmitFunctionBodyStart(); - virtual void EmitFunctionBodyEnd(); - virtual void EmitFunctionEntryLabel(); - virtual void EmitInstruction(const MachineInstr *MI); - - unsigned GetOrCreateSourceID(StringRef FileName, - StringRef DirName); - - MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol); - MCOperand lowerOperand(const MachineOperand &MO); - -private: - void EmitVariableDeclaration(const GlobalVariable *gv); - void EmitFunctionDeclaration(const Function* func); - - StringMap<unsigned> SourceIdMap; -}; // class PTXAsmPrinter -} // namespace llvm - -#endif - diff --git a/lib/Target/PTX/PTXFPRoundingModePass.cpp b/lib/Target/PTX/PTXFPRoundingModePass.cpp deleted file mode 100644 index a21d172..0000000 --- a/lib/Target/PTX/PTXFPRoundingModePass.cpp +++ /dev/null @@ -1,181 +0,0 @@ -//===-- PTXFPRoundingModePass.cpp - Assign rounding modes pass ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines a machine function pass that sets appropriate FP rounding -// modes for all relevant instructions. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "ptx-fp-rounding-mode" - -#include "PTX.h" -#include "PTXTargetMachine.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -// NOTE: PTXFPRoundingModePass should be executed just before emission. - -namespace { - /// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to - /// all FP instructions. Essentially, this pass just looks for all FP - /// instructions that have a rounding mode set to RndDefault, and sets an - /// appropriate rounding mode based on the target device. - /// - class PTXFPRoundingModePass : public MachineFunctionPass { - private: - static char ID; - - typedef std::pair<unsigned, unsigned> RndModeDesc; - - PTXTargetMachine& TargetMachine; - DenseMap<unsigned, RndModeDesc> Instrs; - - public: - PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel) - : MachineFunctionPass(ID), - TargetMachine(TM) { - initializeMap(); - } - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual const char *getPassName() const { - return "PTX FP Rounding Mode Pass"; - } - - private: - - void initializeMap(); - void processInstruction(MachineInstr &MI); - }; // class PTXFPRoundingModePass -} // end anonymous namespace - -using namespace llvm; - -char PTXFPRoundingModePass::ID = 0; - -bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) { - // Look at each basic block - for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe; - ++bbi) { - MachineBasicBlock &MBB = *bbi; - // Look at each instruction - for (MachineBasicBlock::iterator ii = MBB.begin(), ie = MBB.end(); - ii != ie; ++ii) { - MachineInstr &MI = *ii; - processInstruction(MI); - } - } - return false; -} - -void PTXFPRoundingModePass::initializeMap() { - using namespace PTXRoundingMode; - const PTXSubtarget& ST = TargetMachine.getSubtarget<PTXSubtarget>(); - - // Build a map of default rounding mode for all instructions that need a - // rounding mode. - Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven); - - Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone); - Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone); - Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone); - Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone); - - unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone; - Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode); - Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode); - Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode); - Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode); - - unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone; - Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode); - Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode); - Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode); - Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode); - Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode); - Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode); - - Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven); - - Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox); - Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox); - Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox); - Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox); - Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox); - Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox); - Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox); - Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox); - - Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - - Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven); -} - -void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) { - // Is this an instruction that needs a rounding mode? - if (Instrs.count(MI.getOpcode())) { - const RndModeDesc &Desc = Instrs[MI.getOpcode()]; - // Get the rounding mode operand - MachineOperand &Op = MI.getOperand(Desc.first); - // Update the rounding mode if needed - if (Op.getImm() == PTXRoundingMode::RndDefault) { - Op.setImm(Desc.second); - } - } -} - -FunctionPass *llvm::createPTXFPRoundingModePass(PTXTargetMachine &TM, - CodeGenOpt::Level OptLevel) { - return new PTXFPRoundingModePass(TM, OptLevel); -} - diff --git a/lib/Target/PTX/PTXFrameLowering.cpp b/lib/Target/PTX/PTXFrameLowering.cpp deleted file mode 100644 index e6e268e..0000000 --- a/lib/Target/PTX/PTXFrameLowering.cpp +++ /dev/null @@ -1,24 +0,0 @@ -//===-- PTXFrameLowering.cpp - PTX Frame Information ----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the PTX implementation of TargetFrameLowering class. -// -//===----------------------------------------------------------------------===// - -#include "PTXFrameLowering.h" -#include "llvm/CodeGen/MachineFunction.h" - -using namespace llvm; - -void PTXFrameLowering::emitPrologue(MachineFunction &MF) const { -} - -void PTXFrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { -} diff --git a/lib/Target/PTX/PTXFrameLowering.h b/lib/Target/PTX/PTXFrameLowering.h deleted file mode 100644 index 831e818..0000000 --- a/lib/Target/PTX/PTXFrameLowering.h +++ /dev/null @@ -1,44 +0,0 @@ -//===-- PTXFrameLowering.h - Define frame lowering for PTX -----*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_FRAMEINFO_H -#define PTX_FRAMEINFO_H - -#include "PTX.h" -#include "PTXSubtarget.h" -#include "llvm/Target/TargetFrameLowering.h" - -namespace llvm { - class PTXSubtarget; - -class PTXFrameLowering : public TargetFrameLowering { -protected: - const PTXSubtarget &STI; - -public: - explicit PTXFrameLowering(const PTXSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), - STI(sti) { - } - - /// emitProlog/emitEpilog - These methods insert prolog and epilog code into - /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - - bool hasFP(const MachineFunction &MF) const { return false; } -}; - -} // End llvm namespace - -#endif diff --git a/lib/Target/PTX/PTXISelDAGToDAG.cpp b/lib/Target/PTX/PTXISelDAGToDAG.cpp deleted file mode 100644 index 5c7ee29..0000000 --- a/lib/Target/PTX/PTXISelDAGToDAG.cpp +++ /dev/null @@ -1,356 +0,0 @@ -//===-- PTXISelDAGToDAG.cpp - A dag to dag inst selector for PTX ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines an instruction selector for the PTX target. -// -//===----------------------------------------------------------------------===// - -#include "PTX.h" -#include "PTXMachineFunctionInfo.h" -#include "PTXTargetMachine.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { -// PTXDAGToDAGISel - PTX specific code to select PTX machine -// instructions for SelectionDAG operations. -class PTXDAGToDAGISel : public SelectionDAGISel { - public: - PTXDAGToDAGISel(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel); - - virtual const char *getPassName() const { - return "PTX DAG->DAG Pattern Instruction Selection"; - } - - SDNode *Select(SDNode *Node); - - // Complex Pattern Selectors. - bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2); - bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset); - bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset); - bool SelectADDRlocal(SDValue &Addr, SDValue &Base, SDValue &Offset); - - // Include the pieces auto'gened from the target description -#include "PTXGenDAGISel.inc" - - private: - // We need this only because we can't match intruction BRAdp - // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td - SDNode *SelectBRCOND(SDNode *Node); - - SDNode *SelectREADPARAM(SDNode *Node); - SDNode *SelectWRITEPARAM(SDNode *Node); - SDNode *SelectFrameIndex(SDNode *Node); - - bool isImm(const SDValue &operand); - bool SelectImm(const SDValue &operand, SDValue &imm); - - const PTXSubtarget& getSubtarget() const; -}; // class PTXDAGToDAGISel -} // namespace - -// createPTXISelDag - This pass converts a legalized DAG into a -// PTX-specific DAG, ready for instruction scheduling -FunctionPass *llvm::createPTXISelDag(PTXTargetMachine &TM, - CodeGenOpt::Level OptLevel) { - return new PTXDAGToDAGISel(TM, OptLevel); -} - -PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM, - CodeGenOpt::Level OptLevel) - : SelectionDAGISel(TM, OptLevel) {} - -SDNode *PTXDAGToDAGISel::Select(SDNode *Node) { - switch (Node->getOpcode()) { - case ISD::BRCOND: - return SelectBRCOND(Node); - case PTXISD::READ_PARAM: - return SelectREADPARAM(Node); - case PTXISD::WRITE_PARAM: - return SelectWRITEPARAM(Node); - case ISD::FrameIndex: - return SelectFrameIndex(Node); - default: - return SelectCode(Node); - } -} - -SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) { - assert(Node->getNumOperands() >= 3); - - SDValue Chain = Node->getOperand(0); - SDValue Pred = Node->getOperand(1); - SDValue Target = Node->getOperand(2); // branch target - SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::Normal, MVT::i32); - DebugLoc dl = Node->getDebugLoc(); - - assert(Target.getOpcode() == ISD::BasicBlock); - assert(Pred.getValueType() == MVT::i1); - - // Emit BRAdp - SDValue Ops[] = { Target, Pred, PredOp, Chain }; - return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4); -} - -SDNode *PTXDAGToDAGISel::SelectREADPARAM(SDNode *Node) { - SDValue Chain = Node->getOperand(0); - SDValue Index = Node->getOperand(1); - - int OpCode; - - // Get the type of parameter we are reading - EVT VT = Node->getValueType(0); - assert(VT.isSimple() && "READ_PARAM only implemented for MVT types"); - - MVT Type = VT.getSimpleVT(); - - if (Type == MVT::i1) - OpCode = PTX::READPARAMPRED; - else if (Type == MVT::i16) - OpCode = PTX::READPARAMI16; - else if (Type == MVT::i32) - OpCode = PTX::READPARAMI32; - else if (Type == MVT::i64) - OpCode = PTX::READPARAMI64; - else if (Type == MVT::f32) - OpCode = PTX::READPARAMF32; - else { - assert(Type == MVT::f64 && "Unexpected type!"); - OpCode = PTX::READPARAMF64; - } - - SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1); - SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32); - DebugLoc dl = Node->getDebugLoc(); - - SDValue Ops[] = { Index, Pred, PredOp, Chain }; - return CurDAG->getMachineNode(OpCode, dl, VT, Ops, 4); -} - -SDNode *PTXDAGToDAGISel::SelectWRITEPARAM(SDNode *Node) { - - SDValue Chain = Node->getOperand(0); - SDValue Value = Node->getOperand(1); - - int OpCode; - - //Node->dumpr(CurDAG); - - // Get the type of parameter we are writing - EVT VT = Value->getValueType(0); - assert(VT.isSimple() && "WRITE_PARAM only implemented for MVT types"); - - MVT Type = VT.getSimpleVT(); - - if (Type == MVT::i1) - OpCode = PTX::WRITEPARAMPRED; - else if (Type == MVT::i16) - OpCode = PTX::WRITEPARAMI16; - else if (Type == MVT::i32) - OpCode = PTX::WRITEPARAMI32; - else if (Type == MVT::i64) - OpCode = PTX::WRITEPARAMI64; - else if (Type == MVT::f32) - OpCode = PTX::WRITEPARAMF32; - else if (Type == MVT::f64) - OpCode = PTX::WRITEPARAMF64; - else - llvm_unreachable("Invalid type in SelectWRITEPARAM"); - - SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1); - SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32); - DebugLoc dl = Node->getDebugLoc(); - - SDValue Ops[] = { Value, Pred, PredOp, Chain }; - SDNode* Ret = CurDAG->getMachineNode(OpCode, dl, MVT::Other, Ops, 4); - - //dbgs() << "SelectWRITEPARAM produced:\n\t"; - //Ret->dumpr(CurDAG); - - return Ret; -} - -SDNode *PTXDAGToDAGISel::SelectFrameIndex(SDNode *Node) { - int FI = cast<FrameIndexSDNode>(Node)->getIndex(); - //dbgs() << "Selecting FrameIndex at index " << FI << "\n"; - //SDValue TFI = CurDAG->getTargetFrameIndex(FI, Node->getValueType(0)); - - PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); - - SDValue FrameSymbol = CurDAG->getTargetExternalSymbol(MFI->getFrameSymbol(FI), - Node->getValueType(0)); - - return FrameSymbol.getNode(); -} - -// Match memory operand of the form [reg+reg] -bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) { - if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 || - isImm(Addr.getOperand(0)) || isImm(Addr.getOperand(1))) - return false; - - assert(Addr.getValueType().isSimple() && "Type must be simple"); - - R1 = Addr; - R2 = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); - - return true; -} - -// Match memory operand of the form [reg], [imm+reg], and [reg+imm] -bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base, - SDValue &Offset) { - // FrameIndex addresses are handled separately - //errs() << "SelectADDRri: "; - //Addr.getNode()->dumpr(); - if (isa<FrameIndexSDNode>(Addr)) { - //errs() << "Failure\n"; - return false; - } - - if (CurDAG->isBaseWithConstantOffset(Addr)) { - Base = Addr.getOperand(0); - if (isa<FrameIndexSDNode>(Base)) { - //errs() << "Failure\n"; - return false; - } - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); - Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); - //errs() << "Success\n"; - return true; - } - - /*if (Addr.getNumOperands() == 1) { - Base = Addr; - Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); - errs() << "Success\n"; - return true; - }*/ - - //errs() << "SelectADDRri fails on: "; - //Addr.getNode()->dumpr(); - - if (isImm(Addr)) { - //errs() << "Failure\n"; - return false; - } - - Base = Addr; - Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); - - //errs() << "Success\n"; - return true; - - /*if (Addr.getOpcode() != ISD::ADD) { - // let SelectADDRii handle the [imm] case - if (isImm(Addr)) - return false; - // it is [reg] - - assert(Addr.getValueType().isSimple() && "Type must be simple"); - Base = Addr; - Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); - - return true; - } - - if (Addr.getNumOperands() < 2) - return false; - - // let SelectADDRii handle the [imm+imm] case - if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1))) - return false; - - // try [reg+imm] and [imm+reg] - for (int i = 0; i < 2; i ++) - if (SelectImm(Addr.getOperand(1-i), Offset)) { - Base = Addr.getOperand(i); - return true; - } - - // neither [reg+imm] nor [imm+reg] - return false;*/ -} - -// Match memory operand of the form [imm+imm] and [imm] -bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base, - SDValue &Offset) { - // is [imm+imm]? - if (Addr.getOpcode() == ISD::ADD) { - return SelectImm(Addr.getOperand(0), Base) && - SelectImm(Addr.getOperand(1), Offset); - } - - // is [imm]? - if (SelectImm(Addr, Base)) { - assert(Addr.getValueType().isSimple() && "Type must be simple"); - - Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); - - return true; - } - - return false; -} - -// Match memory operand of the form [reg], [imm+reg], and [reg+imm] -bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base, - SDValue &Offset) { - //errs() << "SelectADDRlocal: "; - //Addr.getNode()->dumpr(); - if (isa<FrameIndexSDNode>(Addr)) { - Base = Addr; - Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); - //errs() << "Success\n"; - return true; - } - - if (CurDAG->isBaseWithConstantOffset(Addr)) { - Base = Addr.getOperand(0); - if (!isa<FrameIndexSDNode>(Base)) { - //errs() << "Failure\n"; - return false; - } - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); - Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); - //errs() << "Offset: "; - //Offset.getNode()->dumpr(); - //errs() << "Success\n"; - return true; - } - - //errs() << "Failure\n"; - return false; -} - -bool PTXDAGToDAGISel::isImm(const SDValue &operand) { - return ConstantSDNode::classof(operand.getNode()); -} - -bool PTXDAGToDAGISel::SelectImm(const SDValue &operand, SDValue &imm) { - SDNode *node = operand.getNode(); - if (!ConstantSDNode::classof(node)) - return false; - - ConstantSDNode *CN = cast<ConstantSDNode>(node); - imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(), - operand.getValueType()); - return true; -} - -const PTXSubtarget& PTXDAGToDAGISel::getSubtarget() const -{ - return TM.getSubtarget<PTXSubtarget>(); -} - diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp deleted file mode 100644 index 4d5e9bf..0000000 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ /dev/null @@ -1,516 +0,0 @@ -//===-- PTXISelLowering.cpp - PTX DAG Lowering Implementation -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the PTXTargetLowering class. -// -//===----------------------------------------------------------------------===// - -#include "PTXISelLowering.h" -#include "PTX.h" -#include "PTXMachineFunctionInfo.h" -#include "PTXRegisterInfo.h" -#include "PTXSubtarget.h" -#include "llvm/Function.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -//===----------------------------------------------------------------------===// -// TargetLowering Implementation -//===----------------------------------------------------------------------===// - -PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) - : TargetLowering(TM, new TargetLoweringObjectFileELF()) { - // Set up the register classes. - addRegisterClass(MVT::i1, &PTX::RegPredRegClass); - addRegisterClass(MVT::i16, &PTX::RegI16RegClass); - addRegisterClass(MVT::i32, &PTX::RegI32RegClass); - addRegisterClass(MVT::i64, &PTX::RegI64RegClass); - addRegisterClass(MVT::f32, &PTX::RegF32RegClass); - addRegisterClass(MVT::f64, &PTX::RegF64RegClass); - - setBooleanContents(ZeroOrOneBooleanContent); - setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? - setMinFunctionAlignment(2); - - // Let LLVM use loads/stores for all mem* operations - maxStoresPerMemcpy = 4096; - maxStoresPerMemmove = 4096; - maxStoresPerMemset = 4096; - - //////////////////////////////////// - /////////// Expansion ////////////// - //////////////////////////////////// - - // (any/zero/sign) extload => load + (any/zero/sign) extend - - setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand); - setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand); - - // f32 extload => load + fextend - - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); - - // f64 truncstore => trunc + store - - setTruncStoreAction(MVT::f64, MVT::f32, Expand); - - // sign_extend_inreg => sign_extend - - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - - // br_cc => brcond - - setOperationAction(ISD::BR_CC, MVT::Other, Expand); - - // select_cc => setcc - - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); - setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); - setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); - - //////////////////////////////////// - //////////// Legal ///////////////// - //////////////////////////////////// - - setOperationAction(ISD::ConstantFP, MVT::f32, Legal); - setOperationAction(ISD::ConstantFP, MVT::f64, Legal); - - //////////////////////////////////// - //////////// Custom //////////////// - //////////////////////////////////// - - // customise setcc to use bitwise logic if possible - - //setOperationAction(ISD::SETCC, MVT::i1, Custom); - setOperationAction(ISD::SETCC, MVT::i1, Legal); - - // customize translation of memory addresses - - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); - - // Compute derived properties from the register classes - computeRegisterProperties(); -} - -EVT PTXTargetLowering::getSetCCResultType(EVT VT) const { - return MVT::i1; -} - -SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { - switch (Op.getOpcode()) { - default: - llvm_unreachable("Unimplemented operand"); - case ISD::SETCC: - return LowerSETCC(Op, DAG); - case ISD::GlobalAddress: - return LowerGlobalAddress(Op, DAG); - } -} - -const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - default: - llvm_unreachable("Unknown opcode"); - case PTXISD::COPY_ADDRESS: - return "PTXISD::COPY_ADDRESS"; - case PTXISD::LOAD_PARAM: - return "PTXISD::LOAD_PARAM"; - case PTXISD::STORE_PARAM: - return "PTXISD::STORE_PARAM"; - case PTXISD::READ_PARAM: - return "PTXISD::READ_PARAM"; - case PTXISD::WRITE_PARAM: - return "PTXISD::WRITE_PARAM"; - case PTXISD::EXIT: - return "PTXISD::EXIT"; - case PTXISD::RET: - return "PTXISD::RET"; - case PTXISD::CALL: - return "PTXISD::CALL"; - } -} - -//===----------------------------------------------------------------------===// -// Custom Lower Operation -//===----------------------------------------------------------------------===// - -SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { - assert(Op.getValueType() == MVT::i1 && "SetCC type must be 1-bit integer"); - SDValue Op0 = Op.getOperand(0); - SDValue Op1 = Op.getOperand(1); - SDValue Op2 = Op.getOperand(2); - DebugLoc dl = Op.getDebugLoc(); - //ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); - - // Look for X == 0, X == 1, X != 0, or X != 1 - // We can simplify these to bitwise logic - - //if (Op1.getOpcode() == ISD::Constant && - // (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || - // cast<ConstantSDNode>(Op1)->isNullValue()) && - // (CC == ISD::SETEQ || CC == ISD::SETNE)) { - // - // return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1); - //} - - //ConstantSDNode* COp1 = cast<ConstantSDNode>(Op1); - //if(COp1 && COp1->getZExtValue() == 1) { - // if(CC == ISD::SETNE) { - // return DAG.getNode(PTX::XORripreds, dl, MVT::i1, Op0); - // } - //} - - llvm_unreachable("setcc was not matched by a pattern!"); - - return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2); -} - -SDValue PTXTargetLowering:: -LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { - EVT PtrVT = getPointerTy(); - DebugLoc dl = Op.getDebugLoc(); - const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - - assert(PtrVT.isSimple() && "Pointer must be to primitive type."); - - SDValue targetGlobal = DAG.getTargetGlobalAddress(GV, dl, PtrVT); - SDValue movInstr = DAG.getNode(PTXISD::COPY_ADDRESS, - dl, - PtrVT.getSimpleVT(), - targetGlobal); - - return movInstr; -} - -//===----------------------------------------------------------------------===// -// Calling Convention Implementation -//===----------------------------------------------------------------------===// - -SDValue PTXTargetLowering:: - LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, - SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { - if (isVarArg) llvm_unreachable("PTX does not support varargs"); - - MachineFunction &MF = DAG.getMachineFunction(); - const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>(); - PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); - PTXParamManager &PM = MFI->getParamManager(); - - switch (CallConv) { - default: - llvm_unreachable("Unsupported calling convention"); - case CallingConv::PTX_Kernel: - MFI->setKernel(true); - break; - case CallingConv::PTX_Device: - MFI->setKernel(false); - break; - } - - // We do one of two things here: - // IsKernel || SM >= 2.0 -> Use param space for arguments - // SM < 2.0 -> Use registers for arguments - if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) { - // We just need to emit the proper LOAD_PARAM ISDs - for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) && - "Kernels cannot take pred operands"); - - unsigned ParamSize = Ins[i].VT.getStoreSizeInBits(); - unsigned Param = PM.addArgumentParam(ParamSize); - const std::string &ParamName = PM.getParamName(Param); - SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), - MVT::Other); - SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain, - ParamValue); - InVals.push_back(ArgValue); - } - } - else { - for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - EVT RegVT = Ins[i].VT; - const TargetRegisterClass* TRC = getRegClassFor(RegVT); - unsigned RegType; - - // Determine which register class we need - if (RegVT == MVT::i1) - RegType = PTXRegisterType::Pred; - else if (RegVT == MVT::i16) - RegType = PTXRegisterType::B16; - else if (RegVT == MVT::i32) - RegType = PTXRegisterType::B32; - else if (RegVT == MVT::i64) - RegType = PTXRegisterType::B64; - else if (RegVT == MVT::f32) - RegType = PTXRegisterType::F32; - else if (RegVT == MVT::f64) - RegType = PTXRegisterType::F64; - else - llvm_unreachable("Unknown parameter type"); - - // Use a unique index in the instruction to prevent instruction folding. - // Yes, this is a hack. - SDValue Index = DAG.getTargetConstant(i, MVT::i32); - unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC); - SDValue ArgValue = DAG.getNode(PTXISD::READ_PARAM, dl, RegVT, Chain, - Index); - - InVals.push_back(ArgValue); - - MFI->addRegister(Reg, RegType, PTXRegisterSpace::Argument); - } - } - - return Chain; -} - -SDValue PTXTargetLowering:: - LowerReturn(SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, - SelectionDAG &DAG) const { - if (isVarArg) llvm_unreachable("PTX does not support varargs"); - - switch (CallConv) { - default: - llvm_unreachable("Unsupported calling convention."); - case CallingConv::PTX_Kernel: - assert(Outs.size() == 0 && "Kernel must return void."); - return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain); - case CallingConv::PTX_Device: - assert(Outs.size() <= 1 && "Can at most return one value."); - break; - } - - MachineFunction& MF = DAG.getMachineFunction(); - PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); - PTXParamManager &PM = MFI->getParamManager(); - - SDValue Flag; - const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>(); - - if (ST.useParamSpaceForDeviceArgs()) { - assert(Outs.size() < 2 && "Device functions can return at most one value"); - - if (Outs.size() == 1) { - unsigned ParamSize = OutVals[0].getValueType().getSizeInBits(); - unsigned Param = PM.addReturnParam(ParamSize); - const std::string &ParamName = PM.getParamName(Param); - SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), - MVT::Other); - Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, - ParamValue, OutVals[0]); - } - } else { - for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - EVT RegVT = Outs[i].VT; - const TargetRegisterClass* TRC; - unsigned RegType; - - // Determine which register class we need - if (RegVT == MVT::i1) { - TRC = &PTX::RegPredRegClass; - RegType = PTXRegisterType::Pred; - } else if (RegVT == MVT::i16) { - TRC = &PTX::RegI16RegClass; - RegType = PTXRegisterType::B16; - } else if (RegVT == MVT::i32) { - TRC = &PTX::RegI32RegClass; - RegType = PTXRegisterType::B32; - } else if (RegVT == MVT::i64) { - TRC = &PTX::RegI64RegClass; - RegType = PTXRegisterType::B64; - } else if (RegVT == MVT::f32) { - TRC = &PTX::RegF32RegClass; - RegType = PTXRegisterType::F32; - } else if (RegVT == MVT::f64) { - TRC = &PTX::RegF64RegClass; - RegType = PTXRegisterType::F64; - } else { - llvm_unreachable("Unknown parameter type"); - } - - unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC); - - SDValue Copy = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i]/*, Flag*/); - SDValue OutReg = DAG.getRegister(Reg, RegVT); - - Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg); - - MFI->addRegister(Reg, RegType, PTXRegisterSpace::Return); - } - } - - if (Flag.getNode() == 0) { - return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain); - } - else { - return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag); - } -} - -SDValue -PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool doesNotRet, bool &isTailCall, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { - - MachineFunction& MF = DAG.getMachineFunction(); - PTXMachineFunctionInfo *PTXMFI = MF.getInfo<PTXMachineFunctionInfo>(); - PTXParamManager &PM = PTXMFI->getParamManager(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - - assert(getTargetMachine().getSubtarget<PTXSubtarget>().callsAreHandled() && - "Calls are not handled for the target device"); - - // Identify the callee function - const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); - const Function *function = cast<Function>(GV); - - // allow non-device calls only for printf - bool isPrintf = function->getName() == "printf" || function->getName() == "puts"; - - assert((isPrintf || function->getCallingConv() == CallingConv::PTX_Device) && - "PTX function calls must be to PTX device functions"); - - unsigned outSize = isPrintf ? 2 : Outs.size(); - - std::vector<SDValue> Ops; - // The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs] - Ops.resize(outSize + Ins.size() + 4); - - Ops[0] = Chain; - - // Identify the callee function - Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); - Ops[Ins.size()+2] = Callee; - - // #Outs - Ops[Ins.size()+3] = DAG.getTargetConstant(outSize, MVT::i32); - - if (isPrintf) { - // first argument is the address of the global string variable in memory - unsigned Param0 = PM.addLocalParam(getPointerTy().getSizeInBits()); - SDValue ParamValue0 = DAG.getTargetExternalSymbol(PM.getParamName(Param0).c_str(), - MVT::Other); - Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, - ParamValue0, OutVals[0]); - Ops[Ins.size()+4] = ParamValue0; - - // alignment is the maximum size of all the arguments - unsigned alignment = 0; - for (unsigned i = 1; i < OutVals.size(); ++i) { - alignment = std::max(alignment, - OutVals[i].getValueType().getSizeInBits()); - } - - // size is the alignment multiplied by the number of arguments - unsigned size = alignment * (OutVals.size() - 1); - - // second argument is the address of the stack object (unless no arguments) - unsigned Param1 = PM.addLocalParam(getPointerTy().getSizeInBits()); - SDValue ParamValue1 = DAG.getTargetExternalSymbol(PM.getParamName(Param1).c_str(), - MVT::Other); - Ops[Ins.size()+5] = ParamValue1; - - if (size > 0) - { - // create a local stack object to store the arguments - unsigned StackObject = MFI->CreateStackObject(size / 8, alignment / 8, false); - SDValue FrameIndex = DAG.getFrameIndex(StackObject, getPointerTy()); - - // store each of the arguments to the stack in turn - for (unsigned int i = 1; i != OutVals.size(); i++) { - SDValue FrameAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameIndex, DAG.getTargetConstant((i - 1) * 8, getPointerTy())); - Chain = DAG.getStore(Chain, dl, OutVals[i], FrameAddr, - MachinePointerInfo(), - false, false, 0); - } - - // copy the address of the local frame index to get the address in non-local space - SDValue genericAddr = DAG.getNode(PTXISD::COPY_ADDRESS, dl, getPointerTy(), FrameIndex); - - // store this address in the second argument - Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, ParamValue1, genericAddr); - } - } - else - { - // Generate STORE_PARAM nodes for each function argument. In PTX, function - // arguments are explicitly stored into .param variables and passed as - // arguments. There is no register/stack-based calling convention in PTX. - for (unsigned i = 0; i != OutVals.size(); ++i) { - unsigned Size = OutVals[i].getValueType().getSizeInBits(); - unsigned Param = PM.addLocalParam(Size); - const std::string &ParamName = PM.getParamName(Param); - SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), - MVT::Other); - Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, - ParamValue, OutVals[i]); - Ops[i+Ins.size()+4] = ParamValue; - } - } - - std::vector<SDValue> InParams; - - // Generate list of .param variables to hold the return value(s). - Ops[1] = DAG.getTargetConstant(Ins.size(), MVT::i32); - for (unsigned i = 0; i < Ins.size(); ++i) { - unsigned Size = Ins[i].VT.getStoreSizeInBits(); - unsigned Param = PM.addLocalParam(Size); - const std::string &ParamName = PM.getParamName(Param); - SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), - MVT::Other); - Ops[i+2] = ParamValue; - InParams.push_back(ParamValue); - } - - Ops[0] = Chain; - - // Create the CALL node. - Chain = DAG.getNode(PTXISD::CALL, dl, MVT::Other, &Ops[0], Ops.size()); - - // Create the LOAD_PARAM nodes that retrieve the function return value(s). - for (unsigned i = 0; i < Ins.size(); ++i) { - SDValue Load = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain, - InParams[i]); - InVals.push_back(Load); - } - - return Chain; -} - -unsigned PTXTargetLowering::getNumRegisters(LLVMContext &Context, EVT VT) { - // All arguments consist of one "register," regardless of the type. - return 1; -} - diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h deleted file mode 100644 index 33220f4..0000000 --- a/lib/Target/PTX/PTXISelLowering.h +++ /dev/null @@ -1,82 +0,0 @@ -//===-- PTXISelLowering.h - PTX DAG Lowering Interface ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interfaces that PTX uses to lower LLVM code into a -// selection DAG. -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_ISEL_LOWERING_H -#define PTX_ISEL_LOWERING_H - -#include "llvm/Target/TargetLowering.h" - -namespace llvm { - -namespace PTXISD { - enum NodeType { - FIRST_NUMBER = ISD::BUILTIN_OP_END, - LOAD_PARAM, - STORE_PARAM, - READ_PARAM, - WRITE_PARAM, - EXIT, - RET, - COPY_ADDRESS, - CALL - }; -} // namespace PTXISD - -class PTXTargetLowering : public TargetLowering { - public: - explicit PTXTargetLowering(TargetMachine &TM); - - virtual const char *getTargetNodeName(unsigned Opcode) const; - - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; - - virtual SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - - virtual SDValue - LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, - SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; - - virtual SDValue - LowerReturn(SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, - SelectionDAG &DAG) const; - - virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, - bool isVarArg, bool doesNotRet, bool &isTailCall, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; - - virtual EVT getSetCCResultType(EVT VT) const; - - virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT); - - private: - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; -}; // class PTXTargetLowering -} // namespace llvm - -#endif // PTX_ISEL_LOWERING_H diff --git a/lib/Target/PTX/PTXInstrFormats.td b/lib/Target/PTX/PTXInstrFormats.td deleted file mode 100644 index 267e834..0000000 --- a/lib/Target/PTX/PTXInstrFormats.td +++ /dev/null @@ -1,51 +0,0 @@ -//===-- PTXInstrFormats.td - PTX Instruction Formats -------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - - -// Rounding Mode Specifier -/*class RoundingMode<bits<3> val> { - bits<3> Value = val; -} - -def RndDefault : RoundingMode<0>; -def RndNearestEven : RoundingMode<1>; -def RndNearestZero : RoundingMode<2>; -def RndNegInf : RoundingMode<3>; -def RndPosInf : RoundingMode<4>; -def RndApprox : RoundingMode<5>;*/ - - -// Rounding Mode Operand -def RndMode : Operand<i32> { - let PrintMethod = "printRoundingMode"; -} - -def RndDefault : PatLeaf<(i32 0)>; - -// PTX Predicate operand, default to (0, 0) = (zero-reg, none). -// Leave PrintMethod empty; predicate printing is defined elsewhere. -def pred : PredicateOperand<OtherVT, (ops RegPred, i32imm), - (ops (i1 zero_reg), (i32 2))>; - -def RndModeOperand : Operand<OtherVT> { - let MIOperandInfo = (ops i32imm); -} - -// Instruction Types -let Namespace = "PTX" in { - - class InstPTX<dag oops, dag iops, string asmstr, list<dag> pattern> - : Instruction { - dag OutOperandList = oops; - dag InOperandList = !con(iops, (ins pred:$_p)); - let AsmString = asmstr; // Predicate printing is defined elsewhere. - let Pattern = pattern; - let isPredicable = 1; - } -} diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp deleted file mode 100644 index 443cd54..0000000 --- a/lib/Target/PTX/PTXInstrInfo.cpp +++ /dev/null @@ -1,359 +0,0 @@ -//===-- PTXInstrInfo.cpp - PTX Instruction Information --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the PTX implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "ptx-instrinfo" - -#include "PTXInstrInfo.h" -#include "PTX.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" - -#define GET_INSTRINFO_CTOR -#include "PTXGenInstrInfo.inc" - -using namespace llvm; - -PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM) - : PTXGenInstrInfo(), - RI(_TM, *this), TM(_TM) {} - -static const struct map_entry { - const TargetRegisterClass *cls; - const int opcode; -} map[] = { - { &PTX::RegI16RegClass, PTX::MOVU16rr }, - { &PTX::RegI32RegClass, PTX::MOVU32rr }, - { &PTX::RegI64RegClass, PTX::MOVU64rr }, - { &PTX::RegF32RegClass, PTX::MOVF32rr }, - { &PTX::RegF64RegClass, PTX::MOVF64rr }, - { &PTX::RegPredRegClass, PTX::MOVPREDrr } -}; - -void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DstReg, unsigned SrcReg, - bool KillSrc) const { - - const MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo(); - //assert(MRI.getRegClass(SrcReg) == MRI.getRegClass(DstReg) && - // "Invalid register copy between two register classes"); - - for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++i) { - if (map[i].cls == MRI.getRegClass(DstReg)) { - const MCInstrDesc &MCID = get(map[i].opcode); - MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg). - addReg(SrcReg, getKillRegState(KillSrc)); - AddDefaultPredicate(MI); - return; - } - } - - llvm_unreachable("Impossible reg-to-reg copy"); -} - -bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DstReg, unsigned SrcReg, - const TargetRegisterClass *DstRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - if (DstRC != SrcRC) - return false; - - for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) - if (DstRC == map[i].cls) { - const MCInstrDesc &MCID = get(map[i].opcode); - MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).addReg(SrcReg); - AddDefaultPredicate(MI); - return true; - } - - return false; -} - -bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const { - switch (MI.getOpcode()) { - default: - return false; - case PTX::MOVU16rr: - case PTX::MOVU32rr: - case PTX::MOVU64rr: - case PTX::MOVF32rr: - case PTX::MOVF64rr: - case PTX::MOVPREDrr: - assert(MI.getNumOperands() >= 2 && - MI.getOperand(0).isReg() && MI.getOperand(1).isReg() && - "Invalid register-register move instruction"); - SrcSubIdx = DstSubIdx = 0; // No sub-registers - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } -} - -// predicate support - -bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const { - int i = MI->findFirstPredOperandIdx(); - return i != -1 && MI->getOperand(i).getReg() != PTX::NoRegister; -} - -bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - return !isPredicated(MI) && MI->isTerminator(); -} - -bool PTXInstrInfo:: -PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl<MachineOperand> &Pred) const { - if (Pred.size() < 2) - llvm_unreachable("lesser than 2 predicate operands are provided"); - - int i = MI->findFirstPredOperandIdx(); - if (i == -1) - llvm_unreachable("missing predicate operand"); - - MI->getOperand(i).setReg(Pred[0].getReg()); - MI->getOperand(i+1).setImm(Pred[1].getImm()); - - return true; -} - -bool PTXInstrInfo:: -SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, - const SmallVectorImpl<MachineOperand> &Pred2) const { - const MachineOperand &PredReg1 = Pred1[0]; - const MachineOperand &PredReg2 = Pred2[0]; - if (PredReg1.getReg() != PredReg2.getReg()) - return false; - - const MachineOperand &PredOp1 = Pred1[1]; - const MachineOperand &PredOp2 = Pred2[1]; - if (PredOp1.getImm() != PredOp2.getImm()) - return false; - - return true; -} - -bool PTXInstrInfo:: -DefinesPredicate(MachineInstr *MI, - std::vector<MachineOperand> &Pred) const { - // If an instruction sets a predicate register, it defines a predicate. - - // TODO supprot 5-operand format of setp instruction - - if (MI->getNumOperands() < 1) - return false; - - const MachineOperand &MO = MI->getOperand(0); - - if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::RegPredRegClass) - return false; - - Pred.push_back(MO); - Pred.push_back(MachineOperand::CreateImm(PTXPredicate::None)); - return true; -} - -// branch support - -bool PTXInstrInfo:: -AnalyzeBranch(MachineBasicBlock &MBB, - MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const { - // TODO implement cases when AllowModify is true - - if (MBB.empty()) - return true; - - MachineBasicBlock::iterator iter = MBB.end(); - const MachineInstr& instLast1 = *--iter; - // for special case that MBB has only 1 instruction - const bool IsSizeOne = MBB.size() == 1; - // if IsSizeOne is true, *--iter and instLast2 are invalid - // we put a dummy value in instLast2 and desc2 since they are used - const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter; - - DEBUG(dbgs() << "\n"); - DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n"); - DEBUG(dbgs() << "AnalyzeBranch: MBB: " << MBB.getName().str() << "\n"); - DEBUG(dbgs() << "AnalyzeBranch: TBB: " << TBB << "\n"); - DEBUG(dbgs() << "AnalyzeBranch: FBB: " << FBB << "\n"); - - // this block ends with no branches - if (!IsAnyKindOfBranch(instLast1)) { - DEBUG(dbgs() << "AnalyzeBranch: ends with no branch\n"); - return false; - } - - // this block ends with only an unconditional branch - if (instLast1.isUnconditionalBranch() && - // when IsSizeOne is true, it "absorbs" the evaluation of instLast2 - (IsSizeOne || !IsAnyKindOfBranch(instLast2))) { - DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n"); - TBB = GetBranchTarget(instLast1); - return false; - } - - // this block ends with a conditional branch and - // it falls through to a successor block - if (instLast1.isConditionalBranch() && - IsAnySuccessorAlsoLayoutSuccessor(MBB)) { - DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n"); - TBB = GetBranchTarget(instLast1); - int i = instLast1.findFirstPredOperandIdx(); - Cond.push_back(instLast1.getOperand(i)); - Cond.push_back(instLast1.getOperand(i+1)); - return false; - } - - // when IsSizeOne is true, we are done - if (IsSizeOne) - return true; - - // this block ends with a conditional branch - // followed by an unconditional branch - if (instLast2.isConditionalBranch() && - instLast1.isUnconditionalBranch()) { - DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n"); - TBB = GetBranchTarget(instLast2); - FBB = GetBranchTarget(instLast1); - int i = instLast2.findFirstPredOperandIdx(); - Cond.push_back(instLast2.getOperand(i)); - Cond.push_back(instLast2.getOperand(i+1)); - return false; - } - - // branch cannot be understood - DEBUG(dbgs() << "AnalyzeBranch: cannot be understood\n"); - return true; -} - -unsigned PTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - unsigned count = 0; - while (!MBB.empty()) - if (IsAnyKindOfBranch(MBB.back())) { - MBB.pop_back(); - ++count; - } else - break; - DEBUG(dbgs() << "RemoveBranch: MBB: " << MBB.getName().str() << "\n"); - DEBUG(dbgs() << "RemoveBranch: remove " << count << " branch inst\n"); - return count; -} - -unsigned PTXInstrInfo:: -InsertBranch(MachineBasicBlock &MBB, - MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond, - DebugLoc DL) const { - DEBUG(dbgs() << "InsertBranch: MBB: " << MBB.getName().str() << "\n"); - DEBUG(if (TBB) dbgs() << "InsertBranch: TBB: " << TBB->getName().str() - << "\n"; - else dbgs() << "InsertBranch: TBB: (NULL)\n"); - DEBUG(if (FBB) dbgs() << "InsertBranch: FBB: " << FBB->getName().str() - << "\n"; - else dbgs() << "InsertBranch: FBB: (NULL)\n"); - DEBUG(dbgs() << "InsertBranch: Cond size: " << Cond.size() << "\n"); - - assert(TBB && "TBB is NULL"); - - if (FBB) { - BuildMI(&MBB, DL, get(PTX::BRAdp)) - .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm()); - BuildMI(&MBB, DL, get(PTX::BRAd)) - .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None); - return 2; - } else if (Cond.size()) { - BuildMI(&MBB, DL, get(PTX::BRAdp)) - .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm()); - return 1; - } else { - BuildMI(&MBB, DL, get(PTX::BRAd)) - .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None); - return 1; - } -} - -// Memory operand folding for spills -void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MII, - unsigned SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - llvm_unreachable("storeRegToStackSlot should not be called for PTX"); -} - -void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MII, - unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - llvm_unreachable("loadRegFromStackSlot should not be called for PTX"); -} - -// static helper routines - -MachineSDNode *PTXInstrInfo:: -GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, - DebugLoc dl, EVT VT, SDValue Op1) { - SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1); - SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32); - SDValue ops[] = { Op1, predReg, predOp }; - return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); -} - -MachineSDNode *PTXInstrInfo:: -GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, - DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) { - SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1); - SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32); - SDValue ops[] = { Op1, Op2, predReg, predOp }; - return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); -} - -void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) { - if (MI->findFirstPredOperandIdx() == -1) { - MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false)); - MI->addOperand(MachineOperand::CreateImm(PTXPredicate::None)); - } -} - -bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) { - return inst.isTerminator() || inst.isBranch() || inst.isIndirectBranch(); -} - -bool PTXInstrInfo:: -IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB) { - for (MachineBasicBlock::const_succ_iterator - i = MBB.succ_begin(), e = MBB.succ_end(); i != e; ++i) - if (MBB.isLayoutSuccessor((const MachineBasicBlock*) &*i)) - return true; - return false; -} - -MachineBasicBlock *PTXInstrInfo::GetBranchTarget(const MachineInstr& inst) { - // FIXME So far all branch instructions put destination in 1st operand - const MachineOperand& target = inst.getOperand(0); - assert(target.isMBB() && "FIXME: detect branch target operand"); - return target.getMBB(); -} diff --git a/lib/Target/PTX/PTXInstrInfo.h b/lib/Target/PTX/PTXInstrInfo.h deleted file mode 100644 index fba89c0..0000000 --- a/lib/Target/PTX/PTXInstrInfo.h +++ /dev/null @@ -1,133 +0,0 @@ -//===-- PTXInstrInfo.h - PTX Instruction Information ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the PTX implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_INSTR_INFO_H -#define PTX_INSTR_INFO_H - -#include "PTXRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" - -#define GET_INSTRINFO_HEADER -#include "PTXGenInstrInfo.inc" - -namespace llvm { -class PTXTargetMachine; - -class MachineSDNode; -class SDValue; -class SelectionDAG; - -class PTXInstrInfo : public PTXGenInstrInfo { -private: - const PTXRegisterInfo RI; - PTXTargetMachine &TM; - -public: - explicit PTXInstrInfo(PTXTargetMachine &_TM); - - virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; } - - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DstReg, unsigned SrcReg, - bool KillSrc) const; - - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DstReg, unsigned SrcReg, - const TargetRegisterClass *DstRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; - - virtual bool isMoveInstr(const MachineInstr& MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - - // predicate support - - virtual bool isPredicated(const MachineInstr *MI) const; - - virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const; - - virtual - bool PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl<MachineOperand> &Pred) const; - - virtual - bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, - const SmallVectorImpl<MachineOperand> &Pred2) const; - - virtual bool DefinesPredicate(MachineInstr *MI, - std::vector<MachineOperand> &Pred) const; - - // PTX is fully-predicable - virtual bool isPredicable(MachineInstr *MI) const { return true; } - - // branch support - - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify = false) const; - - virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - - virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond, - DebugLoc DL) const; - - // Memory operand folding for spills - // TODO: Implement this eventually and get rid of storeRegToStackSlot and - // loadRegFromStackSlot. Doing so will get rid of the "stack" registers - // we currently use to spill, though I doubt the overall effect on ptxas - // output will be large. I have yet to see a case where ptxas is unable - // to see through the "stack" register usage and hence generates - // efficient code anyway. - // virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - // MachineInstr* MI, - // const SmallVectorImpl<unsigned> &Ops, - // int FrameIndex) const; - - virtual void storeRegToStackSlot(MachineBasicBlock& MBB, - MachineBasicBlock::iterator MII, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass* RC, - const TargetRegisterInfo* TRI) const; - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MII, - unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - // static helper routines - - static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, - DebugLoc dl, EVT VT, - SDValue Op1); - - static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, - DebugLoc dl, EVT VT, - SDValue Op1, SDValue Op2); - - static void AddDefaultPredicate(MachineInstr *MI); - - static bool IsAnyKindOfBranch(const MachineInstr& inst); - - static bool IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB); - - static MachineBasicBlock *GetBranchTarget(const MachineInstr& inst); -}; // class PTXInstrInfo -} // namespace llvm - -#endif // PTX_INSTR_INFO_H diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td deleted file mode 100644 index bead428..0000000 --- a/lib/Target/PTX/PTXInstrInfo.td +++ /dev/null @@ -1,1031 +0,0 @@ -//===-- PTXInstrInfo.td - PTX Instruction defs --------------*- tablegen-*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the PTX instructions in TableGen format. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Instruction format superclass -//===----------------------------------------------------------------------===// - -include "PTXInstrFormats.td" - -//===----------------------------------------------------------------------===// -// Code Generation Predicates -//===----------------------------------------------------------------------===// - -// Shader Model Support -def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">; -def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">; -def FMadNeedsRoundingMode : Predicate<"getSubtarget().fmadNeedsRoundingMode()">; -def FMadNoRoundingMode : Predicate<"!getSubtarget().fmadNeedsRoundingMode()">; - -// PTX Version Support -def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">; -def DoesNotSupportPTX21 : Predicate<"!getSubtarget().supportsPTX21()">; -def SupportsPTX22 : Predicate<"getSubtarget().supportsPTX22()">; -def DoesNotSupportPTX22 : Predicate<"!getSubtarget().supportsPTX22()">; -def SupportsPTX23 : Predicate<"getSubtarget().supportsPTX23()">; -def DoesNotSupportPTX23 : Predicate<"!getSubtarget().supportsPTX23()">; - -// Fused-Multiply Add -def SupportsFMA : Predicate<"getSubtarget().supportsFMA()">; -def DoesNotSupportFMA : Predicate<"!getSubtarget().supportsFMA()">; - - - -// def SDT_PTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; -// def SDT_PTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; - -// def PTXcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PTXCallSeqStart, -// [SDNPHasChain, SDNPOutGlue]>; -// def PTXcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PTXCallSeqEnd, -// [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; - -def PTXcall : SDNode<"PTXISD::CALL", SDTNone, - [SDNPHasChain, SDNPVariadic, SDNPOptInGlue, SDNPOutGlue]>; - - -// Branch & call targets have OtherVT type. -def brtarget : Operand<OtherVT>; -def calltarget : Operand<i32>; - -//===----------------------------------------------------------------------===// -// PTX Specific Node Definitions -//===----------------------------------------------------------------------===// - -// PTX allow generic 3-reg shifts like shl r0, r1, r2 -def PTXshl : SDNode<"ISD::SHL", SDTIntBinOp>; -def PTXsrl : SDNode<"ISD::SRL", SDTIntBinOp>; -def PTXsra : SDNode<"ISD::SRA", SDTIntBinOp>; - -def PTXexit - : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>; -def PTXret - : SDNode<"PTXISD::RET", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def PTXcopyaddress - : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>; - - - -//===----------------------------------------------------------------------===// -// Instruction Class Templates -//===----------------------------------------------------------------------===// - -// For floating-point instructions, we cannot just embed the pattern into the -// instruction definition since we need to muck around with the rounding mode, -// and I do not know how to insert constants into instructions directly from -// pattern matches. - -//===- Floating-Point Instructions - 2 Operand Form -----------------------===// -multiclass PTX_FLOAT_2OP<string opcstr> { - def rr32 : InstPTX<(outs RegF32:$d), - (ins RndMode:$r, RegF32:$a), - !strconcat(opcstr, "$r.f32\t$d, $a"), []>; - def ri32 : InstPTX<(outs RegF32:$d), - (ins RndMode:$r, f32imm:$a), - !strconcat(opcstr, "$r.f32\t$d, $a"), []>; - def rr64 : InstPTX<(outs RegF64:$d), - (ins RndMode:$r, RegF64:$a), - !strconcat(opcstr, "$r.f64\t$d, $a"), []>; - def ri64 : InstPTX<(outs RegF64:$d), - (ins RndMode:$r, f64imm:$a), - !strconcat(opcstr, "$r.f64\t$d, $a"), []>; -} - -//===- Floating-Point Instructions - 3 Operand Form -----------------------===// -multiclass PTX_FLOAT_3OP<string opcstr> { - def rr32 : InstPTX<(outs RegF32:$d), - (ins RndMode:$r, RegF32:$a, RegF32:$b), - !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>; - def ri32 : InstPTX<(outs RegF32:$d), - (ins RndMode:$r, RegF32:$a, f32imm:$b), - !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>; - def rr64 : InstPTX<(outs RegF64:$d), - (ins RndMode:$r, RegF64:$a, RegF64:$b), - !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>; - def ri64 : InstPTX<(outs RegF64:$d), - (ins RndMode:$r, RegF64:$a, f64imm:$b), - !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>; -} - -//===- Floating-Point Instructions - 4 Operand Form -----------------------===// -multiclass PTX_FLOAT_4OP<string opcstr> { - def rrr32 : InstPTX<(outs RegF32:$d), - (ins RndMode:$r, RegF32:$a, RegF32:$b, RegF32:$c), - !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>; - def rri32 : InstPTX<(outs RegF32:$d), - (ins RndMode:$r, RegF32:$a, RegF32:$b, f32imm:$c), - !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>; - def rii32 : InstPTX<(outs RegF32:$d), - (ins RndMode:$r, RegF32:$a, f32imm:$b, f32imm:$c), - !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>; - def rrr64 : InstPTX<(outs RegF64:$d), - (ins RndMode:$r, RegF64:$a, RegF64:$b, RegF64:$c), - !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>; - def rri64 : InstPTX<(outs RegF64:$d), - (ins RndMode:$r, RegF64:$a, RegF64:$b, f64imm:$c), - !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>; - def rii64 : InstPTX<(outs RegF64:$d), - (ins RndMode:$r, RegF64:$a, f64imm:$b, f64imm:$c), - !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>; -} - -//===- Integer Instructions - 3 Operand Form ------------------------------===// -multiclass PTX_INT3<string opcstr, SDNode opnode> { - def rr16 : InstPTX<(outs RegI16:$d), - (ins RegI16:$a, RegI16:$b), - !strconcat(opcstr, ".u16\t$d, $a, $b"), - [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; - def ri16 : InstPTX<(outs RegI16:$d), - (ins RegI16:$a, i16imm:$b), - !strconcat(opcstr, ".u16\t$d, $a, $b"), - [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; - def rr32 : InstPTX<(outs RegI32:$d), - (ins RegI32:$a, RegI32:$b), - !strconcat(opcstr, ".u32\t$d, $a, $b"), - [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; - def ri32 : InstPTX<(outs RegI32:$d), - (ins RegI32:$a, i32imm:$b), - !strconcat(opcstr, ".u32\t$d, $a, $b"), - [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; - def rr64 : InstPTX<(outs RegI64:$d), - (ins RegI64:$a, RegI64:$b), - !strconcat(opcstr, ".u64\t$d, $a, $b"), - [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; - def ri64 : InstPTX<(outs RegI64:$d), - (ins RegI64:$a, i64imm:$b), - !strconcat(opcstr, ".u64\t$d, $a, $b"), - [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; -} - -//===- Integer Instructions - 3 Operand Form (Signed) ---------------------===// -multiclass PTX_INT3_SIGNED<string opcstr, SDNode opnode> { - def rr16 : InstPTX<(outs RegI16:$d), - (ins RegI16:$a, RegI16:$b), - !strconcat(opcstr, ".s16\t$d, $a, $b"), - [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; - def ri16 : InstPTX<(outs RegI16:$d), - (ins RegI16:$a, i16imm:$b), - !strconcat(opcstr, ".s16\t$d, $a, $b"), - [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; - def rr32 : InstPTX<(outs RegI32:$d), - (ins RegI32:$a, RegI32:$b), - !strconcat(opcstr, ".s32\t$d, $a, $b"), - [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; - def ri32 : InstPTX<(outs RegI32:$d), - (ins RegI32:$a, i32imm:$b), - !strconcat(opcstr, ".s32\t$d, $a, $b"), - [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; - def rr64 : InstPTX<(outs RegI64:$d), - (ins RegI64:$a, RegI64:$b), - !strconcat(opcstr, ".s64\t$d, $a, $b"), - [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; - def ri64 : InstPTX<(outs RegI64:$d), - (ins RegI64:$a, i64imm:$b), - !strconcat(opcstr, ".s64\t$d, $a, $b"), - [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; -} - -//===- Bitwise Logic Instructions - 3 Operand Form ------------------------===// -multiclass PTX_LOGIC<string opcstr, SDNode opnode> { - def ripreds : InstPTX<(outs RegPred:$d), - (ins RegPred:$a, i1imm:$b), - !strconcat(opcstr, ".pred\t$d, $a, $b"), - [(set RegPred:$d, (opnode RegPred:$a, imm:$b))]>; - def rrpreds : InstPTX<(outs RegPred:$d), - (ins RegPred:$a, RegPred:$b), - !strconcat(opcstr, ".pred\t$d, $a, $b"), - [(set RegPred:$d, (opnode RegPred:$a, RegPred:$b))]>; - def rr16 : InstPTX<(outs RegI16:$d), - (ins RegI16:$a, RegI16:$b), - !strconcat(opcstr, ".b16\t$d, $a, $b"), - [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; - def ri16 : InstPTX<(outs RegI16:$d), - (ins RegI16:$a, i16imm:$b), - !strconcat(opcstr, ".b16\t$d, $a, $b"), - [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; - def rr32 : InstPTX<(outs RegI32:$d), - (ins RegI32:$a, RegI32:$b), - !strconcat(opcstr, ".b32\t$d, $a, $b"), - [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; - def ri32 : InstPTX<(outs RegI32:$d), - (ins RegI32:$a, i32imm:$b), - !strconcat(opcstr, ".b32\t$d, $a, $b"), - [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; - def rr64 : InstPTX<(outs RegI64:$d), - (ins RegI64:$a, RegI64:$b), - !strconcat(opcstr, ".b64\t$d, $a, $b"), - [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; - def ri64 : InstPTX<(outs RegI64:$d), - (ins RegI64:$a, i64imm:$b), - !strconcat(opcstr, ".b64\t$d, $a, $b"), - [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; -} - -//===- Integer Shift Instructions - 3 Operand Form ------------------------===// -multiclass PTX_INT3ntnc<string opcstr, SDNode opnode> { - def rr16 : InstPTX<(outs RegI16:$d), - (ins RegI16:$a, RegI16:$b), - !strconcat(opcstr, "16\t$d, $a, $b"), - [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; - def rr32 : InstPTX<(outs RegI32:$d), - (ins RegI32:$a, RegI32:$b), - !strconcat(opcstr, "32\t$d, $a, $b"), - [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; - def rr64 : InstPTX<(outs RegI64:$d), - (ins RegI64:$a, RegI64:$b), - !strconcat(opcstr, "64\t$d, $a, $b"), - [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; - def ri16 : InstPTX<(outs RegI16:$d), - (ins RegI16:$a, i16imm:$b), - !strconcat(opcstr, "16\t$d, $a, $b"), - [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; - def ri32 : InstPTX<(outs RegI32:$d), - (ins RegI32:$a, i32imm:$b), - !strconcat(opcstr, "32\t$d, $a, $b"), - [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; - def ri64 : InstPTX<(outs RegI64:$d), - (ins RegI64:$a, i64imm:$b), - !strconcat(opcstr, "64\t$d, $a, $b"), - [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; - def ir16 : InstPTX<(outs RegI16:$d), - (ins i16imm:$a, RegI16:$b), - !strconcat(opcstr, "16\t$d, $a, $b"), - [(set RegI16:$d, (opnode imm:$a, RegI16:$b))]>; - def ir32 : InstPTX<(outs RegI32:$d), - (ins i32imm:$a, RegI32:$b), - !strconcat(opcstr, "32\t$d, $a, $b"), - [(set RegI32:$d, (opnode imm:$a, RegI32:$b))]>; - def ir64 : InstPTX<(outs RegI64:$d), - (ins i64imm:$a, RegI64:$b), - !strconcat(opcstr, "64\t$d, $a, $b"), - [(set RegI64:$d, (opnode imm:$a, RegI64:$b))]>; -} - -//===- Set Predicate Instructions (Int) - 3/4 Operand Forms ---------------===// -multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls, - CondCode cmp, string cmpstr> { - // TODO support 5-operand format: p|q, a, b, c - - def rr - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b), - !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), - [(set RegPred:$p, (setcc RC:$a, RC:$b, cmp))]>; - def ri - : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b), - !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), - [(set RegPred:$p, (setcc RC:$a, imm:$b, cmp))]>; - - def rr_and_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; - def ri_and_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), - RegPred:$c))]>; - def rr_or_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; - def ri_or_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; - def rr_xor_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; - def ri_xor_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), - RegPred:$c))]>; - - def rr_and_not_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), - (not RegPred:$c)))]>; - def ri_and_not_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), - (not RegPred:$c)))]>; - def rr_or_not_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), - (not RegPred:$c)))]>; - def ri_or_not_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), - (not RegPred:$c)))]>; - def rr_xor_not_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), - (not RegPred:$c)))]>; - def ri_xor_not_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), - (not RegPred:$c)))]>; -} - -//===- Set Predicate Instructions (FP) - 3/4 Operand Form -----------------===// -multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, Operand immcls, - CondCode ucmp, CondCode ocmp, string cmpstr> { - // TODO support 5-operand format: p|q, a, b, c - - def rr_u - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b), - !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"), - [(set RegPred:$p, (setcc RC:$a, RC:$b, ucmp))]>; - def rr_o - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b), - !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), - [(set RegPred:$p, (setcc RC:$a, RC:$b, ocmp))]>; - - def ri_u - : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b), - !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"), - [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ucmp))]>; - def ri_o - : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b), - !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), - [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ocmp))]>; - - def rr_and_r_u - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.and.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), - RegPred:$c))]>; - def rr_and_r_o - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), - RegPred:$c))]>; - - def rr_or_r_u - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.or.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; - def rr_or_r_o - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; - - def rr_xor_r_u - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.xor.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), - RegPred:$c))]>; - def rr_xor_r_o - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), - RegPred:$c))]>; - - def rr_and_not_r_u - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.and.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), - (not RegPred:$c)))]>; - def rr_and_not_r_o - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), - (not RegPred:$c)))]>; - - def rr_or_not_r_u - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.or.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), - (not RegPred:$c)))]>; - def rr_or_not_r_o - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), - (not RegPred:$c)))]>; - - def rr_xor_not_r_u - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.xor.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), - (not RegPred:$c)))]>; - def rr_xor_not_r_o - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), - (not RegPred:$c)))]>; -} - -//===- Select Predicate Instructions - 4 Operand Form ---------------------===// -multiclass PTX_SELP<RegisterClass RC, string regclsname, Operand immcls, - SDNode immnode> { - def rr - : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, RC:$c), - !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), - [(set RC:$r, (select RegPred:$a, RC:$b, RC:$c))]>; - def ri - : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, immcls:$c), - !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), - [(set RC:$r, (select RegPred:$a, RC:$b, immnode:$c))]>; - def ii - : InstPTX<(outs RC:$r), (ins RegPred:$a, immcls:$b, immcls:$c), - !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), - [(set RC:$r, (select RegPred:$a, immnode:$b, immnode:$c))]>; -} - - - -//===----------------------------------------------------------------------===// -// Instructions -//===----------------------------------------------------------------------===// - -///===- Integer Arithmetic Instructions -----------------------------------===// - -defm ADD : PTX_INT3<"add", add>; -defm SUB : PTX_INT3<"sub", sub>; -defm MUL : PTX_INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies -defm DIV : PTX_INT3<"div", udiv>; -defm SDIV : PTX_INT3_SIGNED<"div", sdiv>; -defm REM : PTX_INT3<"rem", urem>; - -///===- Floating-Point Arithmetic Instructions ----------------------------===// - -// FNEG -defm FNEG : PTX_FLOAT_2OP<"neg">; - -// Standard Binary Operations -defm FADD : PTX_FLOAT_3OP<"add">; -defm FSUB : PTX_FLOAT_3OP<"sub">; -defm FMUL : PTX_FLOAT_3OP<"mul">; -defm FDIV : PTX_FLOAT_3OP<"div">; - -// Multi-operation hybrid instructions -defm FMAD : PTX_FLOAT_4OP<"mad">, Requires<[SupportsFMA]>; - - -///===- Floating-Point Intrinsic Instructions -----------------------------===// - -// SQRT -def FSQRTrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a), - "sqrt$r.f32\t$d, $a", []>; -def FSQRTri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a), - "sqrt$r.f32\t$d, $a", []>; -def FSQRTrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a), - "sqrt$r.f64\t$d, $a", []>; -def FSQRTri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a), - "sqrt$r.f64\t$d, $a", []>; - -// SIN -def FSINrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a), - "sin$r.f32\t$d, $a", []>; -def FSINri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a), - "sin$r.f32\t$d, $a", []>; -def FSINrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a), - "sin$r.f64\t$d, $a", []>; -def FSINri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a), - "sin$r.f64\t$d, $a", []>; - -// COS -def FCOSrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a), - "cos$r.f32\t$d, $a", []>; -def FCOSri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a), - "cos$r.f32\t$d, $a", []>; -def FCOSrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a), - "cos$r.f64\t$d, $a", []>; -def FCOSri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a), - "cos$r.f64\t$d, $a", []>; - - - - -///===- Comparison and Selection Instructions -----------------------------===// - -// .setp - -// Compare u16 - -defm SETPEQu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETEQ, "eq">; -defm SETPNEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETNE, "ne">; -defm SETPLTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULT, "lt">; -defm SETPLEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULE, "le">; -defm SETPGTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGT, "gt">; -defm SETPGEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGE, "ge">; -defm SETPLTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLT, "lt">; -defm SETPLEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLE, "le">; -defm SETPGTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGT, "gt">; -defm SETPGEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGE, "ge">; - -// Compare u32 - -defm SETPEQu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETEQ, "eq">; -defm SETPNEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETNE, "ne">; -defm SETPLTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULT, "lt">; -defm SETPLEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULE, "le">; -defm SETPGTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGT, "gt">; -defm SETPGEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGE, "ge">; -defm SETPLTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLT, "lt">; -defm SETPLEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLE, "le">; -defm SETPGTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGT, "gt">; -defm SETPGEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGE, "ge">; - -// Compare u64 - -defm SETPEQu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETEQ, "eq">; -defm SETPNEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETNE, "ne">; -defm SETPLTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULT, "lt">; -defm SETPLEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULE, "le">; -defm SETPGTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGT, "gt">; -defm SETPGEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGE, "ge">; -defm SETPLTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLT, "lt">; -defm SETPLEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLE, "le">; -defm SETPGTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGT, "gt">; -defm SETPGEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGE, "ge">; - -// Compare f32 - -defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUEQ, SETOEQ, "eq">; -defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUNE, SETONE, "ne">; -defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULT, SETOLT, "lt">; -defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULE, SETOLE, "le">; -defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGT, SETOGT, "gt">; -defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGE, SETOGE, "ge">; - -// Compare f64 - -defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUEQ, SETOEQ, "eq">; -defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUNE, SETONE, "ne">; -defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULT, SETOLT, "lt">; -defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULE, SETOLE, "le">; -defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGT, SETOGT, "gt">; -defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGE, SETOGE, "ge">; - -// .selp - -defm SELPi16 : PTX_SELP<RegI16, "u16", i16imm, imm>; -defm SELPi32 : PTX_SELP<RegI32, "u32", i32imm, imm>; -defm SELPi64 : PTX_SELP<RegI64, "u64", i64imm, imm>; -defm SELPf32 : PTX_SELP<RegF32, "f32", f32imm, fpimm>; -defm SELPf64 : PTX_SELP<RegF64, "f64", f64imm, fpimm>; - -///===- Logic and Shift Instructions --------------------------------------===// - -defm SHL : PTX_INT3ntnc<"shl.b", PTXshl>; -defm SRL : PTX_INT3ntnc<"shr.u", PTXsrl>; -defm SRA : PTX_INT3ntnc<"shr.s", PTXsra>; - -defm AND : PTX_LOGIC<"and", and>; -defm OR : PTX_LOGIC<"or", or>; -defm XOR : PTX_LOGIC<"xor", xor>; - -///===- Data Movement and Conversion Instructions -------------------------===// - -// any_extend -// Implement the anyext instruction in terms of the PTX cvt instructions. -//def : Pat<(i32 (anyext RegI16:$a)), (CVT_u32_u16 RegI16:$a)>; -//def : Pat<(i64 (anyext RegI16:$a)), (CVT_u64_u16 RegI16:$a)>; -//def : Pat<(i64 (anyext RegI32:$a)), (CVT_u64_u32 RegI32:$a)>; - -// bitconvert -// These instructions implement the bit-wise conversion between integer and -// floating-point types. -def MOVi32f32 - : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "mov.b32\t$d, $a", []>; -def MOVf32i32 - : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "mov.b32\t$d, $a", []>; -def MOVi64f64 - : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "mov.b64\t$d, $a", []>; -def MOVf64i64 - : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "mov.b64\t$d, $a", []>; - -let neverHasSideEffects = 1 in { - def MOVPREDrr - : InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>; - def MOVU16rr - : InstPTX<(outs RegI16:$d), (ins RegI16:$a), "mov.u16\t$d, $a", []>; - def MOVU32rr - : InstPTX<(outs RegI32:$d), (ins RegI32:$a), "mov.u32\t$d, $a", []>; - def MOVU64rr - : InstPTX<(outs RegI64:$d), (ins RegI64:$a), "mov.u64\t$d, $a", []>; - def MOVF32rr - : InstPTX<(outs RegF32:$d), (ins RegF32:$a), "mov.f32\t$d, $a", []>; - def MOVF64rr - : InstPTX<(outs RegF64:$d), (ins RegF64:$a), "mov.f64\t$d, $a", []>; -} - -let isReMaterializable = 1, isAsCheapAsAMove = 1 in { - def MOVPREDri - : InstPTX<(outs RegPred:$d), (ins i1imm:$a), "mov.pred\t$d, $a", - [(set RegPred:$d, imm:$a)]>; - def MOVU16ri - : InstPTX<(outs RegI16:$d), (ins i16imm:$a), "mov.u16\t$d, $a", - [(set RegI16:$d, imm:$a)]>; - def MOVU32ri - : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", - [(set RegI32:$d, imm:$a)]>; - def MOVU64ri - : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", - [(set RegI64:$d, imm:$a)]>; - def MOVF32ri - : InstPTX<(outs RegF32:$d), (ins f32imm:$a), "mov.f32\t$d, $a", - [(set RegF32:$d, fpimm:$a)]>; - def MOVF64ri - : InstPTX<(outs RegF64:$d), (ins f64imm:$a), "mov.f64\t$d, $a", - [(set RegF64:$d, fpimm:$a)]>; -} - -let isReMaterializable = 1, isAsCheapAsAMove = 1 in { - def MOVaddr32 - : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", - [(set RegI32:$d, (PTXcopyaddress tglobaladdr:$a))]>; - def MOVaddr64 - : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", - [(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>; - def MOVframe32 - : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "cvta.local.u32\t$d, $a", - [(set RegI32:$d, (PTXcopyaddress frameindex:$a))]>; - def MOVframe64 - : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "cvta.local.u64\t$d, $a", - [(set RegI64:$d, (PTXcopyaddress frameindex:$a))]>; -} - -// PTX cvt instructions -// Note all of these may actually be used, we just define all possible patterns -// here (that make sense). -// FIXME: Can we collapse this somehow into a multiclass def? - -// To i16 -def CVTu16u32 - : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a", []>; -def CVTu16u64 - : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", []>; -def CVTu16f32 - : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a), - "cvt$r.u16.f32\t$d, $a", []>; -def CVTs16f32 - : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a), - "cvt$r.s16.f32\t$d, $a", []>; -def CVTu16f64 - : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a), - "cvt$r.u16.f64\t$d, $a", []>; -def CVTs16f64 - : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a), - "cvt$r.s16.f64\t$d, $a", []>; - -// To i32 -def CVTu32u16 - : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", []>; -def CVTs32s16 - : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.s32.s16\t$d, $a", []>; -def CVTu32u64 - : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", []>; -def CVTu32f32 - : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a), - "cvt$r.u32.f32\t$d, $a", []>; -def CVTs32f32 - : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a), - "cvt$r.s32.f32\t$d, $a", []>; -def CVTu32f64 - : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a), - "cvt$r.u32.f64\t$d, $a", []>; -def CVTs32f64 - : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a), - "cvt$r.s32.f64\t$d, $a", []>; - -// To i64 -def CVTu64u16 - : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a", []>; -def CVTs64s16 - : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.s64.s16\t$d, $a", []>; -def CVTu64u32 - : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a", []>; -def CVTs64s32 - : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.s64.s32\t$d, $a", []>; -def CVTu64f32 - : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a), - "cvt$r.u64.f32\t$d, $a", []>; -def CVTs64f32 - : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a), - "cvt$r.s64.f32\t$d, $a", []>; -def CVTu64f64 - : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a), - "cvt$r.u64.f64\t$d, $a", []>; -def CVTs64f64 - : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a), - "cvt$r.s64.f64\t$d, $a", []>; - -// To f32 -def CVTf32u16 - : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a), - "cvt$r.f32.u16\t$d, $a", []>; -def CVTf32s16 - : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a), - "cvt$r.f32.s16\t$d, $a", []>; -def CVTf32u32 - : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a), - "cvt$r.f32.u32\t$d, $a", []>; -def CVTf32s32 - : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a), - "cvt$r.f32.s32\t$d, $a", []>; -def CVTf32u64 - : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a), - "cvt$r.f32.u64\t$d, $a", []>; -def CVTf32s64 - : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a), - "cvt$r.f32.s64\t$d, $a", []>; -def CVTf32f64 - : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF64:$a), - "cvt$r.f32.f64\t$d, $a", []>; - -// To f64 -def CVTf64u16 - : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a), - "cvt$r.f64.u16\t$d, $a", []>; -def CVTf64s16 - : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a), - "cvt$r.f64.s16\t$d, $a", []>; -def CVTf64u32 - : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a), - "cvt$r.f64.u32\t$d, $a", []>; -def CVTf64s32 - : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a), - "cvt$r.f64.s32\t$d, $a", []>; -def CVTf64u64 - : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a), - "cvt$r.f64.u64\t$d, $a", []>; -def CVTf64s64 - : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a), - "cvt$r.f64.s64\t$d, $a", []>; -def CVTf64f32 - : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", []>; - - ///===- Control Flow Instructions -----------------------------------------===// - -let isBranch = 1, isTerminator = 1, isBarrier = 1 in { - def BRAd - : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>; -} - -let isBranch = 1, isTerminator = 1 in { - // FIXME: The pattern part is blank because I cannot (or do not yet know - // how to) use the first operand of PredicateOperand (a RegPred register) here - // When this is revisited, make sure to also look at LowerSETCC and try to - // fold it into negated predicates, if possible. - def BRAdp - : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", - [/*(brcond pred:$_p, bb:$d)*/]>; -} - -let isReturn = 1, isTerminator = 1, isBarrier = 1 in { - def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>; - def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>; -} - -let hasSideEffects = 1 in { - def CALL : InstPTX<(outs), (ins), "call", [(PTXcall)]>; -} - -///===- Parameter Passing Pseudo-Instructions -----------------------------===// - -def READPARAMPRED : InstPTX<(outs RegPred:$a), (ins i32imm:$b), - "mov.pred\t$a, %arg$b", []>; -def READPARAMI16 : InstPTX<(outs RegI16:$a), (ins i32imm:$b), - "mov.b16\t$a, %arg$b", []>; -def READPARAMI32 : InstPTX<(outs RegI32:$a), (ins i32imm:$b), - "mov.b32\t$a, %arg$b", []>; -def READPARAMI64 : InstPTX<(outs RegI64:$a), (ins i32imm:$b), - "mov.b64\t$a, %arg$b", []>; -def READPARAMF32 : InstPTX<(outs RegF32:$a), (ins i32imm:$b), - "mov.f32\t$a, %arg$b", []>; -def READPARAMF64 : InstPTX<(outs RegF64:$a), (ins i32imm:$b), - "mov.f64\t$a, %arg$b", []>; - -def WRITEPARAMPRED : InstPTX<(outs), (ins RegPred:$a), "//w", []>; -def WRITEPARAMI16 : InstPTX<(outs), (ins RegI16:$a), "//w", []>; -def WRITEPARAMI32 : InstPTX<(outs), (ins RegI32:$a), "//w", []>; -def WRITEPARAMI64 : InstPTX<(outs), (ins RegI64:$a), "//w", []>; -def WRITEPARAMF32 : InstPTX<(outs), (ins RegF32:$a), "//w", []>; -def WRITEPARAMF64 : InstPTX<(outs), (ins RegF64:$a), "//w", []>; - - -//===----------------------------------------------------------------------===// -// Instruction Selection Patterns -//===----------------------------------------------------------------------===// - -// FADD -def : Pat<(f32 (fadd RegF32:$a, RegF32:$b)), - (FADDrr32 RndDefault, RegF32:$a, RegF32:$b)>; -def : Pat<(f32 (fadd RegF32:$a, fpimm:$b)), - (FADDri32 RndDefault, RegF32:$a, fpimm:$b)>; -def : Pat<(f64 (fadd RegF64:$a, RegF64:$b)), - (FADDrr64 RndDefault, RegF64:$a, RegF64:$b)>; -def : Pat<(f64 (fadd RegF64:$a, fpimm:$b)), - (FADDri64 RndDefault, RegF64:$a, fpimm:$b)>; - -// FSUB -def : Pat<(f32 (fsub RegF32:$a, RegF32:$b)), - (FSUBrr32 RndDefault, RegF32:$a, RegF32:$b)>; -def : Pat<(f32 (fsub RegF32:$a, fpimm:$b)), - (FSUBri32 RndDefault, RegF32:$a, fpimm:$b)>; -def : Pat<(f64 (fsub RegF64:$a, RegF64:$b)), - (FSUBrr64 RndDefault, RegF64:$a, RegF64:$b)>; -def : Pat<(f64 (fsub RegF64:$a, fpimm:$b)), - (FSUBri64 RndDefault, RegF64:$a, fpimm:$b)>; - -// FMUL -def : Pat<(f32 (fmul RegF32:$a, RegF32:$b)), - (FMULrr32 RndDefault, RegF32:$a, RegF32:$b)>; -def : Pat<(f32 (fmul RegF32:$a, fpimm:$b)), - (FMULri32 RndDefault, RegF32:$a, fpimm:$b)>; -def : Pat<(f64 (fmul RegF64:$a, RegF64:$b)), - (FMULrr64 RndDefault, RegF64:$a, RegF64:$b)>; -def : Pat<(f64 (fmul RegF64:$a, fpimm:$b)), - (FMULri64 RndDefault, RegF64:$a, fpimm:$b)>; - -// FDIV -def : Pat<(f32 (fdiv RegF32:$a, RegF32:$b)), - (FDIVrr32 RndDefault, RegF32:$a, RegF32:$b)>; -def : Pat<(f32 (fdiv RegF32:$a, fpimm:$b)), - (FDIVri32 RndDefault, RegF32:$a, fpimm:$b)>; -def : Pat<(f64 (fdiv RegF64:$a, RegF64:$b)), - (FDIVrr64 RndDefault, RegF64:$a, RegF64:$b)>; -def : Pat<(f64 (fdiv RegF64:$a, fpimm:$b)), - (FDIVri64 RndDefault, RegF64:$a, fpimm:$b)>; - -// FMUL+FADD -def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), RegF32:$c)), - (FMADrrr32 RndDefault, RegF32:$a, RegF32:$b, RegF32:$c)>, - Requires<[SupportsFMA]>; -def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)), - (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>, - Requires<[SupportsFMA]>; -def : Pat<(f32 (fadd (fmul RegF32:$a, fpimm:$b), fpimm:$c)), - (FMADrrr32 RndDefault, RegF32:$a, fpimm:$b, fpimm:$c)>, - Requires<[SupportsFMA]>; -def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)), - (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>, - Requires<[SupportsFMA]>; -def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), RegF64:$c)), - (FMADrrr64 RndDefault, RegF64:$a, RegF64:$b, RegF64:$c)>, - Requires<[SupportsFMA]>; -def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), fpimm:$c)), - (FMADrri64 RndDefault, RegF64:$a, RegF64:$b, fpimm:$c)>, - Requires<[SupportsFMA]>; -def : Pat<(f64 (fadd (fmul RegF64:$a, fpimm:$b), fpimm:$c)), - (FMADrri64 RndDefault, RegF64:$a, fpimm:$b, fpimm:$c)>, - Requires<[SupportsFMA]>; - -// FNEG -def : Pat<(f32 (fneg RegF32:$a)), (FNEGrr32 RndDefault, RegF32:$a)>; -def : Pat<(f32 (fneg fpimm:$a)), (FNEGri32 RndDefault, fpimm:$a)>; -def : Pat<(f64 (fneg RegF64:$a)), (FNEGrr64 RndDefault, RegF64:$a)>; -def : Pat<(f64 (fneg fpimm:$a)), (FNEGri64 RndDefault, fpimm:$a)>; - -// FSQRT -def : Pat<(f32 (fsqrt RegF32:$a)), (FSQRTrr32 RndDefault, RegF32:$a)>; -def : Pat<(f32 (fsqrt fpimm:$a)), (FSQRTri32 RndDefault, fpimm:$a)>; -def : Pat<(f64 (fsqrt RegF64:$a)), (FSQRTrr64 RndDefault, RegF64:$a)>; -def : Pat<(f64 (fsqrt fpimm:$a)), (FSQRTri64 RndDefault, fpimm:$a)>; - -// FSIN -def : Pat<(f32 (fsin RegF32:$a)), (FSINrr32 RndDefault, RegF32:$a)>; -def : Pat<(f32 (fsin fpimm:$a)), (FSINri32 RndDefault, fpimm:$a)>; -def : Pat<(f64 (fsin RegF64:$a)), (FSINrr64 RndDefault, RegF64:$a)>; -def : Pat<(f64 (fsin fpimm:$a)), (FSINri64 RndDefault, fpimm:$a)>; - -// FCOS -def : Pat<(f32 (fcos RegF32:$a)), (FCOSrr32 RndDefault, RegF32:$a)>; -def : Pat<(f32 (fcos fpimm:$a)), (FCOSri32 RndDefault, fpimm:$a)>; -def : Pat<(f64 (fcos RegF64:$a)), (FCOSrr64 RndDefault, RegF64:$a)>; -def : Pat<(f64 (fcos fpimm:$a)), (FCOSri64 RndDefault, fpimm:$a)>; - -// Type conversion notes: -// - PTX does not directly support converting a predicate to a value, so we -// use a select instruction to select either 0 or 1 (integer or fp) based -// on the truth value of the predicate. -// - PTX does not directly support converting to a predicate type, so we fake it -// by performing a greater-than test between the value and zero. This follows -// the C convention that any non-zero value is equivalent to 'true'. - -// Conversion to pred -def : Pat<(i1 (trunc RegI16:$a)), (SETPGTu16ri RegI16:$a, 0)>; -def : Pat<(i1 (trunc RegI32:$a)), (SETPGTu32ri RegI32:$a, 0)>; -def : Pat<(i1 (trunc RegI64:$a)), (SETPGTu64ri RegI64:$a, 0)>; -def : Pat<(i1 (fp_to_uint RegF32:$a)), (SETPGTu32ri (MOVi32f32 RegF32:$a), 0)>; -def : Pat<(i1 (fp_to_uint RegF64:$a)), (SETPGTu64ri (MOVi64f64 RegF64:$a), 0)>; - -// Conversion to u16 -def : Pat<(i16 (anyext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>; -def : Pat<(i16 (sext RegPred:$a)), (SELPi16ii RegPred:$a, 0xFFFF, 0)>; -def : Pat<(i16 (zext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>; -def : Pat<(i16 (trunc RegI32:$a)), (CVTu16u32 RegI32:$a)>; -def : Pat<(i16 (trunc RegI64:$a)), (CVTu16u64 RegI64:$a)>; -def : Pat<(i16 (fp_to_uint RegF32:$a)), (CVTu16f32 RndDefault, RegF32:$a)>; -def : Pat<(i16 (fp_to_sint RegF32:$a)), (CVTs16f32 RndDefault, RegF32:$a)>; -def : Pat<(i16 (fp_to_uint RegF64:$a)), (CVTu16f64 RndDefault, RegF64:$a)>; -def : Pat<(i16 (fp_to_sint RegF64:$a)), (CVTs16f64 RndDefault, RegF64:$a)>; - -// Conversion to u32 -def : Pat<(i32 (anyext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>; -def : Pat<(i32 (sext RegPred:$a)), (SELPi32ii RegPred:$a, 0xFFFFFFFF, 0)>; -def : Pat<(i32 (zext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>; -def : Pat<(i32 (anyext RegI16:$a)), (CVTu32u16 RegI16:$a)>; -def : Pat<(i32 (sext RegI16:$a)), (CVTs32s16 RegI16:$a)>; -def : Pat<(i32 (zext RegI16:$a)), (CVTu32u16 RegI16:$a)>; -def : Pat<(i32 (trunc RegI64:$a)), (CVTu32u64 RegI64:$a)>; -def : Pat<(i32 (fp_to_uint RegF32:$a)), (CVTu32f32 RndDefault, RegF32:$a)>; -def : Pat<(i32 (fp_to_sint RegF32:$a)), (CVTs32f32 RndDefault, RegF32:$a)>; -def : Pat<(i32 (fp_to_uint RegF64:$a)), (CVTu32f64 RndDefault, RegF64:$a)>; -def : Pat<(i32 (fp_to_sint RegF64:$a)), (CVTs32f64 RndDefault, RegF64:$a)>; -def : Pat<(i32 (bitconvert RegF32:$a)), (MOVi32f32 RegF32:$a)>; - -// Conversion to u64 -def : Pat<(i64 (anyext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>; -def : Pat<(i64 (sext RegPred:$a)), (SELPi64ii RegPred:$a, - 0xFFFFFFFFFFFFFFFF, 0)>; -def : Pat<(i64 (zext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>; -def : Pat<(i64 (anyext RegI16:$a)), (CVTu64u16 RegI16:$a)>; -def : Pat<(i64 (sext RegI16:$a)), (CVTs64s16 RegI16:$a)>; -def : Pat<(i64 (zext RegI16:$a)), (CVTu64u16 RegI16:$a)>; -def : Pat<(i64 (anyext RegI32:$a)), (CVTu64u32 RegI32:$a)>; -def : Pat<(i64 (sext RegI32:$a)), (CVTs64s32 RegI32:$a)>; -def : Pat<(i64 (zext RegI32:$a)), (CVTu64u32 RegI32:$a)>; -def : Pat<(i64 (fp_to_uint RegF32:$a)), (CVTu64f32 RndDefault, RegF32:$a)>; -def : Pat<(i64 (fp_to_sint RegF32:$a)), (CVTs64f32 RndDefault, RegF32:$a)>; -def : Pat<(i64 (fp_to_uint RegF64:$a)), (CVTu64f64 RndDefault, RegF64:$a)>; -def : Pat<(i64 (fp_to_sint RegF64:$a)), (CVTs64f64 RndDefault, RegF64:$a)>; -def : Pat<(i64 (bitconvert RegF64:$a)), (MOVi64f64 RegF64:$a)>; - -// Conversion to f32 -def : Pat<(f32 (uint_to_fp RegPred:$a)), (SELPf32rr RegPred:$a, - (MOVf32i32 0x3F800000), (MOVf32i32 0))>; -def : Pat<(f32 (uint_to_fp RegI16:$a)), (CVTf32u16 RndDefault, RegI16:$a)>; -def : Pat<(f32 (sint_to_fp RegI16:$a)), (CVTf32s16 RndDefault, RegI16:$a)>; -def : Pat<(f32 (uint_to_fp RegI32:$a)), (CVTf32u32 RndDefault, RegI32:$a)>; -def : Pat<(f32 (sint_to_fp RegI32:$a)), (CVTf32s32 RndDefault, RegI32:$a)>; -def : Pat<(f32 (uint_to_fp RegI64:$a)), (CVTf32u64 RndDefault, RegI64:$a)>; -def : Pat<(f32 (sint_to_fp RegI64:$a)), (CVTf32s64 RndDefault, RegI64:$a)>; -def : Pat<(f32 (fround RegF64:$a)), (CVTf32f64 RndDefault, RegF64:$a)>; -def : Pat<(f32 (bitconvert RegI32:$a)), (MOVf32i32 RegI32:$a)>; - -// Conversion to f64 -def : Pat<(f64 (uint_to_fp RegPred:$a)), (SELPf64rr RegPred:$a, - (MOVf64i64 0x3F80000000000000), (MOVf64i64 0))>; -def : Pat<(f64 (uint_to_fp RegI16:$a)), (CVTf64u16 RndDefault, RegI16:$a)>; -def : Pat<(f64 (sint_to_fp RegI16:$a)), (CVTf64s16 RndDefault, RegI16:$a)>; -def : Pat<(f64 (uint_to_fp RegI32:$a)), (CVTf64u32 RndDefault, RegI32:$a)>; -def : Pat<(f64 (sint_to_fp RegI32:$a)), (CVTf64s32 RndDefault, RegI32:$a)>; -def : Pat<(f64 (uint_to_fp RegI64:$a)), (CVTf64u64 RndDefault, RegI64:$a)>; -def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RndDefault, RegI64:$a)>; -def : Pat<(f64 (fextend RegF32:$a)), (CVTf64f32 RegF32:$a)>; -def : Pat<(f64 (bitconvert RegI64:$a)), (MOVf64i64 RegI64:$a)>; - -// setcc - predicate inversion for branch conditions -def : Pat<(i1 (setcc RegPred:$a, imm:$b, SETNE)), - (XORripreds RegPred:$a, imm:$b)>; - -///===- Intrinsic Instructions --------------------------------------------===// -include "PTXIntrinsicInstrInfo.td" - -///===- Load/Store Instructions -------------------------------------------===// -include "PTXInstrLoadStore.td" - diff --git a/lib/Target/PTX/PTXInstrLoadStore.td b/lib/Target/PTX/PTXInstrLoadStore.td deleted file mode 100644 index 7a62684..0000000 --- a/lib/Target/PTX/PTXInstrLoadStore.td +++ /dev/null @@ -1,278 +0,0 @@ -//===- PTXInstrLoadStore.td - PTX Load/Store Instruction Defs -*- tablegen-*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the PTX load/store instructions in TableGen format. -// -//===----------------------------------------------------------------------===// - - -// Addressing Predicates -// We have to differentiate between 32- and 64-bit pointer types -def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">; -def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">; - -//===----------------------------------------------------------------------===// -// Pattern Fragments for Loads/Stores -//===----------------------------------------------------------------------===// - -def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTXStateSpace::Global; - return false; -}]>; - -def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTXStateSpace::Constant; - return false; -}]>; - -def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTXStateSpace::Shared; - return false; -}]>; - -def store_global - : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTXStateSpace::Global; - return false; -}]>; - -def store_shared - : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTXStateSpace::Shared; - return false; -}]>; - -// Addressing modes. -def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; -def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>; -def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>; -def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>; -def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>; -def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>; -def ADDRlocal32 : ComplexPattern<i32, 2, "SelectADDRlocal", [], []>; -def ADDRlocal64 : ComplexPattern<i64, 2, "SelectADDRlocal", [], []>; - -// Address operands -def MEMri32 : Operand<i32> { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops RegI32, i32imm); -} -def MEMri64 : Operand<i64> { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops RegI64, i64imm); -} -def LOCALri32 : Operand<i32> { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops i32imm, i32imm); -} -def LOCALri64 : Operand<i64> { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops i64imm, i64imm); -} -def MEMii32 : Operand<i32> { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops i32imm, i32imm); -} -def MEMii64 : Operand<i64> { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops i64imm, i64imm); -} -// The operand here does not correspond to an actual address, so we -// can use i32 in 64-bit address modes. -def MEMpi : Operand<i32> { - let PrintMethod = "printParamOperand"; - let MIOperandInfo = (ops i32imm); -} -def MEMret : Operand<i32> { - let PrintMethod = "printReturnOperand"; - let MIOperandInfo = (ops i32imm); -} - - -// Load/store .param space -def PTXloadparam - : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>, - [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; -def PTXstoreparam - : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>, - [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; - -def PTXreadparam - : SDNode<"PTXISD::READ_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>, - [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; -def PTXwriteparam - : SDNode<"PTXISD::WRITE_PARAM", SDTypeProfile<0, 1, []>, - [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; - - - -//===----------------------------------------------------------------------===// -// Classes for loads/stores -//===----------------------------------------------------------------------===// -multiclass PTX_LD<string opstr, string typestr, - RegisterClass RC, PatFrag pat_load> { - def rr32 : InstPTX<(outs RC:$d), - (ins MEMri32:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRrr32:$a))]>, - Requires<[Use32BitAddresses]>; - def rr64 : InstPTX<(outs RC:$d), - (ins MEMri64:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRrr64:$a))]>, - Requires<[Use64BitAddresses]>; - def ri32 : InstPTX<(outs RC:$d), - (ins MEMri32:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRri32:$a))]>, - Requires<[Use32BitAddresses]>; - def ri64 : InstPTX<(outs RC:$d), - (ins MEMri64:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRri64:$a))]>, - Requires<[Use64BitAddresses]>; - def ii32 : InstPTX<(outs RC:$d), - (ins MEMii32:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRii32:$a))]>, - Requires<[Use32BitAddresses]>; - def ii64 : InstPTX<(outs RC:$d), - (ins MEMii64:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRii64:$a))]>, - Requires<[Use64BitAddresses]>; -} - -multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, - PatFrag pat_store> { - def rr32 : InstPTX<(outs), - (ins RC:$d, MEMri32:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRrr32:$a)]>, - Requires<[Use32BitAddresses]>; - def rr64 : InstPTX<(outs), - (ins RC:$d, MEMri64:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRrr64:$a)]>, - Requires<[Use64BitAddresses]>; - def ri32 : InstPTX<(outs), - (ins RC:$d, MEMri32:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRri32:$a)]>, - Requires<[Use32BitAddresses]>; - def ri64 : InstPTX<(outs), - (ins RC:$d, MEMri64:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRri64:$a)]>, - Requires<[Use64BitAddresses]>; - def ii32 : InstPTX<(outs), - (ins RC:$d, MEMii32:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRii32:$a)]>, - Requires<[Use32BitAddresses]>; - def ii64 : InstPTX<(outs), - (ins RC:$d, MEMii64:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRii64:$a)]>, - Requires<[Use64BitAddresses]>; -} - -multiclass PTX_LOCAL_LD_ST<string typestr, RegisterClass RC> { - def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a), - !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (load_global ADDRlocal32:$a))]>; - def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a), - !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (load_global ADDRlocal64:$a))]>; - def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a), - !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")), - [(store_global RC:$d, ADDRlocal32:$a)]>; - def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a), - !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")), - [(store_global RC:$d, ADDRlocal64:$a)]>; -} - -multiclass PTX_PARAM_LD_ST<string typestr, RegisterClass RC> { - let hasSideEffects = 1 in { - def LDpi : InstPTX<(outs RC:$d), (ins i32imm:$a), - !strconcat("ld.param", !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (PTXloadparam texternalsym:$a))]>; - def STpi : InstPTX<(outs), (ins i32imm:$d, RC:$a), - !strconcat("st.param", !strconcat(typestr, "\t[$d], $a")), - [(PTXstoreparam texternalsym:$d, RC:$a)]>; - } -} - -multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> { - defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>; - defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>; - defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>; - defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>; - defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>; -} - -multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> { - defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>; - defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>; - defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>; - defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>; - defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>; -} - - - -//===----------------------------------------------------------------------===// -// Instruction definitions for loads/stores -//===----------------------------------------------------------------------===// - -// Global/shared stores -defm STg : PTX_ST_ALL<"st.global", store_global>; -defm STs : PTX_ST_ALL<"st.shared", store_shared>; - -// Global/shared/constant loads -defm LDg : PTX_LD_ALL<"ld.global", load_global>; -defm LDc : PTX_LD_ALL<"ld.const", load_constant>; -defm LDs : PTX_LD_ALL<"ld.shared", load_shared>; - -// Param loads/stores -defm PARAMPRED : PTX_PARAM_LD_ST<".pred", RegPred>; -defm PARAMU16 : PTX_PARAM_LD_ST<".u16", RegI16>; -defm PARAMU32 : PTX_PARAM_LD_ST<".u32", RegI32>; -defm PARAMU64 : PTX_PARAM_LD_ST<".u64", RegI64>; -defm PARAMF32 : PTX_PARAM_LD_ST<".f32", RegF32>; -defm PARAMF64 : PTX_PARAM_LD_ST<".f64", RegF64>; - -// Local loads/stores -defm LOCALPRED : PTX_LOCAL_LD_ST<".pred", RegPred>; -defm LOCALU16 : PTX_LOCAL_LD_ST<".u16", RegI16>; -defm LOCALU32 : PTX_LOCAL_LD_ST<".u32", RegI32>; -defm LOCALU64 : PTX_LOCAL_LD_ST<".u64", RegI64>; -defm LOCALF32 : PTX_LOCAL_LD_ST<".f32", RegF32>; -defm LOCALF64 : PTX_LOCAL_LD_ST<".f64", RegF64>; - diff --git a/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/lib/Target/PTX/PTXIntrinsicInstrInfo.td deleted file mode 100644 index 3416f1c..0000000 --- a/lib/Target/PTX/PTXIntrinsicInstrInfo.td +++ /dev/null @@ -1,110 +0,0 @@ -//===-- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics --*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines all of the PTX-specific intrinsic instructions. -// -//===----------------------------------------------------------------------===// - -// PTX Special Purpose Register Accessor Intrinsics - -class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop> - : InstPTX<(outs RegI64:$d), (ins), - !strconcat("mov.u64\t$d, %", regname), - [(set RegI64:$d, (intop))]>; - -class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop> - : InstPTX<(outs RegI32:$d), (ins), - !strconcat("mov.u32\t$d, %", regname), - [(set RegI32:$d, (intop))]>; - -// TODO Add read vector-version of special registers - -//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", -// int_ptx_read_tid_r64>; -def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", - int_ptx_read_tid_x>; -def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", - int_ptx_read_tid_y>; -def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", - int_ptx_read_tid_z>; -def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", - int_ptx_read_tid_w>; - -//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", -// int_ptx_read_ntid_r64>; -def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", - int_ptx_read_ntid_x>; -def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", - int_ptx_read_ntid_y>; -def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", - int_ptx_read_ntid_z>; -def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", - int_ptx_read_ntid_w>; - -def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", - int_ptx_read_laneid>; -def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", - int_ptx_read_warpid>; -def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", - int_ptx_read_nwarpid>; - -//def PTX_READ_CTAID_R64 : -//PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>; -def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", - int_ptx_read_ctaid_x>; -def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", - int_ptx_read_ctaid_y>; -def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", - int_ptx_read_ctaid_z>; -def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", - int_ptx_read_ctaid_w>; - -//def PTX_READ_NCTAID_R64 : -//PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>; -def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", - int_ptx_read_nctaid_x>; -def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", - int_ptx_read_nctaid_y>; -def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", - int_ptx_read_nctaid_z>; -def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", - int_ptx_read_nctaid_w>; - -def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", - int_ptx_read_smid>; -def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", - int_ptx_read_nsmid>; -def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", - int_ptx_read_gridid>; - -def PTX_READ_LANEMASK_EQ - : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>; -def PTX_READ_LANEMASK_LE - : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>; -def PTX_READ_LANEMASK_LT - : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>; -def PTX_READ_LANEMASK_GE - : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>; -def PTX_READ_LANEMASK_GT - : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>; - -def PTX_READ_CLOCK - : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>; -def PTX_READ_CLOCK64 - : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>; - -def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>; -def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>; -def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>; -def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>; - -// PTX Parallel Synchronization and Communication Intrinsics - -def PTX_BAR_SYNC : InstPTX<(outs), (ins i32imm:$i), "bar.sync\t$i", - [(int_ptx_bar_sync imm:$i)]>; diff --git a/lib/Target/PTX/PTXMCAsmStreamer.cpp b/lib/Target/PTX/PTXMCAsmStreamer.cpp deleted file mode 100644 index 3ed67a6..0000000 --- a/lib/Target/PTX/PTXMCAsmStreamer.cpp +++ /dev/null @@ -1,556 +0,0 @@ -//===-- PTXMCAsmStreamer.cpp - PTX Text Assembly Output -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/PathV2.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { -class PTXMCAsmStreamer : public MCStreamer { - formatted_raw_ostream &OS; - const MCAsmInfo &MAI; - OwningPtr<MCInstPrinter> InstPrinter; - OwningPtr<MCCodeEmitter> Emitter; - - SmallString<128> CommentToEmit; - raw_svector_ostream CommentStream; - - unsigned IsVerboseAsm : 1; - unsigned ShowInst : 1; - -public: - PTXMCAsmStreamer(MCContext &Context, - formatted_raw_ostream &os, - bool isVerboseAsm, bool useLoc, - MCInstPrinter *printer, - MCCodeEmitter *emitter, - bool showInst) - : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()), - InstPrinter(printer), Emitter(emitter), CommentStream(CommentToEmit), - IsVerboseAsm(isVerboseAsm), - ShowInst(showInst) { - if (InstPrinter && IsVerboseAsm) - InstPrinter->setCommentStream(CommentStream); - } - - ~PTXMCAsmStreamer() {} - - inline void EmitEOL() { - // If we don't have any comments, just emit a \n. - if (!IsVerboseAsm) { - OS << '\n'; - return; - } - EmitCommentsAndEOL(); - } - void EmitCommentsAndEOL(); - - /// isVerboseAsm - Return true if this streamer supports verbose assembly at - /// all. - virtual bool isVerboseAsm() const { return IsVerboseAsm; } - - /// hasRawTextSupport - We support EmitRawText. - virtual bool hasRawTextSupport() const { return true; } - - /// AddComment - Add a comment that can be emitted to the generated .s - /// file if applicable as a QoI issue to make the output of the compiler - /// more readable. This only affects the MCAsmStreamer, and only when - /// verbose assembly output is enabled. - virtual void AddComment(const Twine &T); - - /// AddEncodingComment - Add a comment showing the encoding of an instruction. - virtual void AddEncodingComment(const MCInst &Inst); - - /// GetCommentOS - Return a raw_ostream that comments can be written to. - /// Unlike AddComment, you are required to terminate comments with \n if you - /// use this method. - virtual raw_ostream &GetCommentOS() { - if (!IsVerboseAsm) - return nulls(); // Discard comments unless in verbose asm mode. - return CommentStream; - } - - /// AddBlankLine - Emit a blank line to a .s file to pretty it up. - virtual void AddBlankLine() { - EmitEOL(); - } - - /// @name MCStreamer Interface - /// @{ - - virtual void ChangeSection(const MCSection *Section); - virtual void InitSections() { /* PTX does not use sections */ } - - virtual void EmitLabel(MCSymbol *Symbol); - - virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); - - virtual void EmitThumbFunc(MCSymbol *Func); - - virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); - - virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol); - - virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta, - const MCSymbol *LastLabel, - const MCSymbol *Label, - unsigned PointerSize); - - virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); - - virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); - virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol); - virtual void EmitCOFFSymbolStorageClass(int StorageClass); - virtual void EmitCOFFSymbolType(int Type); - virtual void EndCOFFSymbolDef(); - virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value); - virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment); - - /// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol. - /// - /// @param Symbol - The common symbol to emit. - /// @param Size - The size of the common symbol. - /// @param ByteAlignment - The alignment of the common symbol in bytes. - virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment); - - virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, - unsigned Size = 0, unsigned ByteAlignment = 0); - - virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, - uint64_t Size, unsigned ByteAlignment = 0); - - virtual void EmitBytes(StringRef Data, unsigned AddrSpace); - - virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, - unsigned AddrSpace); - virtual void EmitULEB128Value(const MCExpr *Value); - virtual void EmitSLEB128Value(const MCExpr *Value); - virtual void EmitGPRel32Value(const MCExpr *Value); - - - virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue, - unsigned AddrSpace); - - virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, - unsigned ValueSize = 1, - unsigned MaxBytesToEmit = 0); - - virtual void EmitCodeAlignment(unsigned ByteAlignment, - unsigned MaxBytesToEmit = 0); - - virtual bool EmitValueToOffset(const MCExpr *Offset, - unsigned char Value = 0); - - virtual void EmitFileDirective(StringRef Filename); - virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, - StringRef Filename); - - virtual void EmitInstruction(const MCInst &Inst); - - /// EmitRawText - If this file is backed by an assembly streamer, this dumps - /// the specified string in the output .s file. This capability is - /// indicated by the hasRawTextSupport() predicate. - virtual void EmitRawText(StringRef String); - - virtual void FinishImpl(); - - /// @} - -}; // class PTXMCAsmStreamer - -} - -/// TODO: Add appropriate implementation of Emit*() methods when needed - -void PTXMCAsmStreamer::AddComment(const Twine &T) { - if (!IsVerboseAsm) return; - - // Make sure that CommentStream is flushed. - CommentStream.flush(); - - T.toVector(CommentToEmit); - // Each comment goes on its own line. - CommentToEmit.push_back('\n'); - - // Tell the comment stream that the vector changed underneath it. - CommentStream.resync(); -} - -void PTXMCAsmStreamer::EmitCommentsAndEOL() { - if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) { - OS << '\n'; - return; - } - - CommentStream.flush(); - StringRef Comments = CommentToEmit.str(); - - assert(Comments.back() == '\n' && - "Comment array not newline terminated"); - do { - // Emit a line of comments. - OS.PadToColumn(MAI.getCommentColumn()); - size_t Position = Comments.find('\n'); - OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n'; - - Comments = Comments.substr(Position+1); - } while (!Comments.empty()); - - CommentToEmit.clear(); - // Tell the comment stream that the vector changed underneath it. - CommentStream.resync(); -} - -static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) { - assert(Bytes && "Invalid size!"); - return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8)); -} - -void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) { - assert(Section && "Cannot switch to a null section!"); -} - -void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) { - assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); - assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); - assert(getCurrentSection() && "Cannot emit before setting section!"); - - OS << *Symbol << MAI.getLabelSuffix(); - EmitEOL(); - Symbol->setSection(*getCurrentSection()); -} - -void PTXMCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {} - -void PTXMCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {} - -void PTXMCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { - OS << *Symbol << " = " << *Value; - EmitEOL(); - - // FIXME: Lift context changes into super class. - Symbol->setVariableValue(Value); -} - -void PTXMCAsmStreamer::EmitWeakReference(MCSymbol *Alias, - const MCSymbol *Symbol) { - OS << ".weakref " << *Alias << ", " << *Symbol; - EmitEOL(); -} - -void PTXMCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta, - const MCSymbol *LastLabel, - const MCSymbol *Label, - unsigned PointerSize) { - report_fatal_error("Unimplemented."); -} - -void PTXMCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, - MCSymbolAttr Attribute) {} - -void PTXMCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {} - -void PTXMCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {} - -void PTXMCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {} - -void PTXMCAsmStreamer::EmitCOFFSymbolType (int Type) {} - -void PTXMCAsmStreamer::EndCOFFSymbolDef() {} - -void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} - -void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment) {} - -void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment) {} - -void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, - unsigned Size, unsigned ByteAlignment) {} - -void PTXMCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, - MCSymbol *Symbol, - uint64_t Size, unsigned ByteAlignment) {} - -static inline char toOctal(int X) { return (X&7)+'0'; } - -static void PrintQuotedString(StringRef Data, raw_ostream &OS) { - OS << '"'; - - for (unsigned i = 0, e = Data.size(); i != e; ++i) { - unsigned char C = Data[i]; - if (C == '"' || C == '\\') { - OS << '\\' << (char)C; - continue; - } - - if (isprint((unsigned char)C)) { - OS << (char)C; - continue; - } - - switch (C) { - case '\b': OS << "\\b"; break; - case '\f': OS << "\\f"; break; - case '\n': OS << "\\n"; break; - case '\r': OS << "\\r"; break; - case '\t': OS << "\\t"; break; - default: - OS << '\\'; - OS << toOctal(C >> 6); - OS << toOctal(C >> 3); - OS << toOctal(C >> 0); - break; - } - } - - OS << '"'; -} - -void PTXMCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { - assert(getCurrentSection() && "Cannot emit contents before setting section!"); - if (Data.empty()) return; - - if (Data.size() == 1) { - OS << MAI.getData8bitsDirective(AddrSpace); - OS << (unsigned)(unsigned char)Data[0]; - EmitEOL(); - return; - } - - // If the data ends with 0 and the target supports .asciz, use it, otherwise - // use .ascii - if (MAI.getAscizDirective() && Data.back() == 0) { - OS << MAI.getAscizDirective(); - Data = Data.substr(0, Data.size()-1); - } else { - OS << MAI.getAsciiDirective(); - } - - OS << ' '; - PrintQuotedString(Data, OS); - EmitEOL(); -} - -void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, - unsigned AddrSpace) { - assert(getCurrentSection() && "Cannot emit contents before setting section!"); - const char *Directive = 0; - switch (Size) { - default: break; - case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break; - case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break; - case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break; - case 8: - Directive = MAI.getData64bitsDirective(AddrSpace); - // If the target doesn't support 64-bit data, emit as two 32-bit halves. - if (Directive) break; - int64_t IntValue; - if (!Value->EvaluateAsAbsolute(IntValue)) - report_fatal_error("Don't know how to emit this value."); - if (getContext().getAsmInfo().isLittleEndian()) { - EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace); - EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace); - } else { - EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace); - EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace); - } - return; - } - - assert(Directive && "Invalid size for machine code value!"); - OS << Directive << *Value; - EmitEOL(); -} - -void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value) { - assert(MAI.hasLEB128() && "Cannot print a .uleb"); - OS << ".uleb128 " << *Value; - EmitEOL(); -} - -void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) { - assert(MAI.hasLEB128() && "Cannot print a .sleb"); - OS << ".sleb128 " << *Value; - EmitEOL(); -} - -void PTXMCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) { - assert(MAI.getGPRel32Directive() != 0); - OS << MAI.getGPRel32Directive() << *Value; - EmitEOL(); -} - - -/// EmitFill - Emit NumBytes bytes worth of the value specified by -/// FillValue. This implements directives such as '.space'. -void PTXMCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue, - unsigned AddrSpace) { - if (NumBytes == 0) return; - - if (AddrSpace == 0) - if (const char *ZeroDirective = MAI.getZeroDirective()) { - OS << ZeroDirective << NumBytes; - if (FillValue != 0) - OS << ',' << (int)FillValue; - EmitEOL(); - return; - } - - // Emit a byte at a time. - MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace); -} - -void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, - int64_t Value, - unsigned ValueSize, - unsigned MaxBytesToEmit) { - // Some assemblers don't support non-power of two alignments, so we always - // emit alignments as a power of two if possible. - if (isPowerOf2_32(ByteAlignment)) { - switch (ValueSize) { - default: llvm_unreachable("Invalid size for machine code value!"); - case 1: OS << MAI.getAlignDirective(); break; - // FIXME: use MAI for this! - case 2: OS << ".p2alignw "; break; - case 4: OS << ".p2alignl "; break; - case 8: llvm_unreachable("Unsupported alignment size!"); - } - - if (MAI.getAlignmentIsInBytes()) - OS << ByteAlignment; - else - OS << Log2_32(ByteAlignment); - - if (Value || MaxBytesToEmit) { - OS << ", 0x"; - OS.write_hex(truncateToSize(Value, ValueSize)); - - if (MaxBytesToEmit) - OS << ", " << MaxBytesToEmit; - } - EmitEOL(); - return; - } - - // Non-power of two alignment. This is not widely supported by assemblers. - // FIXME: Parameterize this based on MAI. - switch (ValueSize) { - default: llvm_unreachable("Invalid size for machine code value!"); - case 1: OS << ".balign"; break; - case 2: OS << ".balignw"; break; - case 4: OS << ".balignl"; break; - case 8: llvm_unreachable("Unsupported alignment size!"); - } - - OS << ' ' << ByteAlignment; - OS << ", " << truncateToSize(Value, ValueSize); - if (MaxBytesToEmit) - OS << ", " << MaxBytesToEmit; - EmitEOL(); -} - -void PTXMCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment, - unsigned MaxBytesToEmit) {} - -bool PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset, - unsigned char Value) {return false;} - - -void PTXMCAsmStreamer::EmitFileDirective(StringRef Filename) { - assert(MAI.hasSingleParameterDotFile()); - OS << "\t.file\t"; - PrintQuotedString(Filename, OS); - EmitEOL(); -} - -// FIXME: should we inherit from MCAsmStreamer? -bool PTXMCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, - StringRef Directory, - StringRef Filename) { - if (!Directory.empty()) { - if (sys::path::is_absolute(Filename)) - return EmitDwarfFileDirective(FileNo, "", Filename); - SmallString<128> FullPathName = Directory; - sys::path::append(FullPathName, Filename); - return EmitDwarfFileDirective(FileNo, "", FullPathName); - } - - OS << "\t.file\t" << FileNo << ' '; - PrintQuotedString(Filename, OS); - EmitEOL(); - return this->MCStreamer::EmitDwarfFileDirective(FileNo, Directory, Filename); -} - -void PTXMCAsmStreamer::AddEncodingComment(const MCInst &Inst) {} - -void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) { - assert(getCurrentSection() && "Cannot emit contents before setting section!"); - - // Show the encoding in a comment if we have a code emitter. - if (Emitter) - AddEncodingComment(Inst); - - // Show the MCInst if enabled. - if (ShowInst) { - Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n "); - GetCommentOS() << "\n"; - } - - // If we have an AsmPrinter, use that to print, otherwise print the MCInst. - if (InstPrinter) - InstPrinter->printInst(&Inst, OS, ""); - else - Inst.print(OS, &MAI); - EmitEOL(); -} - -/// EmitRawText - If this file is backed by an assembly streamer, this dumps -/// the specified string in the output .s file. This capability is -/// indicated by the hasRawTextSupport() predicate. -void PTXMCAsmStreamer::EmitRawText(StringRef String) { - if (!String.empty() && String.back() == '\n') - String = String.substr(0, String.size()-1); - OS << String; - EmitEOL(); -} - -void PTXMCAsmStreamer::FinishImpl() {} - -namespace llvm { - MCStreamer *createPTXAsmStreamer(MCContext &Context, - formatted_raw_ostream &OS, - bool isVerboseAsm, bool useLoc, bool useCFI, - bool useDwarfDirectory, - MCInstPrinter *IP, - MCCodeEmitter *CE, MCAsmBackend *MAB, - bool ShowInst) { - return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc, - IP, CE, ShowInst); - } -} diff --git a/lib/Target/PTX/PTXMCInstLower.cpp b/lib/Target/PTX/PTXMCInstLower.cpp deleted file mode 100644 index 142e639..0000000 --- a/lib/Target/PTX/PTXMCInstLower.cpp +++ /dev/null @@ -1,32 +0,0 @@ -//===-- PTXMCInstLower.cpp - Convert PTX MachineInstr to an MCInst --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains code to lower PTX MachineInstrs to their corresponding -// MCInst records. -// -//===----------------------------------------------------------------------===// - -#include "PTX.h" -#include "PTXAsmPrinter.h" -#include "llvm/Constants.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/Target/Mangler.h" - -void llvm::LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, - PTXAsmPrinter &AP) { - OutMI.setOpcode(MI->getOpcode()); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - MCOperand MCOp; - OutMI.addOperand(AP.lowerOperand(MO)); - } -} - diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp deleted file mode 100644 index f1676ca..0000000 --- a/lib/Target/PTX/PTXMFInfoExtract.cpp +++ /dev/null @@ -1,85 +0,0 @@ -//===-- PTXMFInfoExtract.cpp - Extract PTX machine function info ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines an information extractor for PTX machine functions. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "ptx-mf-info-extract" - -#include "PTX.h" -#include "PTXTargetMachine.h" -#include "PTXMachineFunctionInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -// NOTE: PTXMFInfoExtract must after register allocation! - -namespace { - /// PTXMFInfoExtract - PTX specific code to extract of PTX machine - /// function information for PTXAsmPrinter - /// - class PTXMFInfoExtract : public MachineFunctionPass { - private: - static char ID; - - public: - PTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel) - : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual const char *getPassName() const { - return "PTX Machine Function Info Extractor"; - } - }; // class PTXMFInfoExtract -} // end anonymous namespace - -using namespace llvm; - -char PTXMFInfoExtract::ID = 0; - -bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { - PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - - // Generate list of all virtual registers used in this function - for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); - const TargetRegisterClass *TRC = MRI.getRegClass(Reg); - unsigned RegType; - if (TRC == &PTX::RegPredRegClass) - RegType = PTXRegisterType::Pred; - else if (TRC == &PTX::RegI16RegClass) - RegType = PTXRegisterType::B16; - else if (TRC == &PTX::RegI32RegClass) - RegType = PTXRegisterType::B32; - else if (TRC == &PTX::RegI64RegClass) - RegType = PTXRegisterType::B64; - else if (TRC == &PTX::RegF32RegClass) - RegType = PTXRegisterType::F32; - else if (TRC == &PTX::RegF64RegClass) - RegType = PTXRegisterType::F64; - else - llvm_unreachable("Unkown register class."); - MFI->addRegister(Reg, RegType, PTXRegisterSpace::Reg); - } - - return false; -} - -FunctionPass *llvm::createPTXMFInfoExtract(PTXTargetMachine &TM, - CodeGenOpt::Level OptLevel) { - return new PTXMFInfoExtract(TM, OptLevel); -} diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.cpp b/lib/Target/PTX/PTXMachineFunctionInfo.cpp deleted file mode 100644 index 60acfc7..0000000 --- a/lib/Target/PTX/PTXMachineFunctionInfo.cpp +++ /dev/null @@ -1,14 +0,0 @@ -//===-- PTXMachineFuctionInfo.cpp - PTX machine function info -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "PTXMachineFunctionInfo.h" - -using namespace llvm; - -void PTXMachineFunctionInfo::anchor() { } diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h deleted file mode 100644 index bb7574c..0000000 --- a/lib/Target/PTX/PTXMachineFunctionInfo.h +++ /dev/null @@ -1,202 +0,0 @@ -//===-- PTXMachineFuctionInfo.h - PTX machine function info ------*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares PTX-specific per-machine-function information. -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_MACHINE_FUNCTION_INFO_H -#define PTX_MACHINE_FUNCTION_INFO_H - -#include "PTX.h" -#include "PTXParamManager.h" -#include "PTXRegisterInfo.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -namespace llvm { - -/// PTXMachineFunctionInfo - This class is derived from MachineFunction and -/// contains private PTX target-specific information for each MachineFunction. -/// -class PTXMachineFunctionInfo : public MachineFunctionInfo { - virtual void anchor(); - bool IsKernel; - DenseSet<unsigned> RegArgs; - DenseSet<unsigned> RegRets; - - typedef DenseMap<int, std::string> FrameMap; - - FrameMap FrameSymbols; - - struct RegisterInfo { - unsigned Reg; - unsigned Type; - unsigned Space; - unsigned Offset; - unsigned Encoded; - }; - - typedef DenseMap<unsigned, RegisterInfo> RegisterInfoMap; - - RegisterInfoMap RegInfo; - - PTXParamManager ParamManager; - -public: - typedef DenseSet<unsigned>::const_iterator reg_iterator; - - PTXMachineFunctionInfo(MachineFunction &MF) - : IsKernel(false) { - } - - /// getParamManager - Returns the PTXParamManager instance for this function. - PTXParamManager& getParamManager() { return ParamManager; } - const PTXParamManager& getParamManager() const { return ParamManager; } - - /// setKernel/isKernel - Gets/sets a flag that indicates if this function is - /// a PTX kernel function. - void setKernel(bool _IsKernel=true) { IsKernel = _IsKernel; } - bool isKernel() const { return IsKernel; } - - /// argreg_begin/argreg_end - Returns iterators to the set of registers - /// containing function arguments. - reg_iterator argreg_begin() const { return RegArgs.begin(); } - reg_iterator argreg_end() const { return RegArgs.end(); } - - /// retreg_begin/retreg_end - Returns iterators to the set of registers - /// containing the function return values. - reg_iterator retreg_begin() const { return RegRets.begin(); } - reg_iterator retreg_end() const { return RegRets.end(); } - - /// addRegister - Adds a virtual register to the set of all used registers - void addRegister(unsigned Reg, unsigned RegType, unsigned RegSpace) { - if (!RegInfo.count(Reg)) { - RegisterInfo Info; - Info.Reg = Reg; - Info.Type = RegType; - Info.Space = RegSpace; - - // Determine register offset - Info.Offset = 0; - for(RegisterInfoMap::const_iterator i = RegInfo.begin(), - e = RegInfo.end(); i != e; ++i) { - const RegisterInfo& RI = i->second; - if (RI.Space == RegSpace) - if (RI.Space != PTXRegisterSpace::Reg || RI.Type == Info.Type) - Info.Offset++; - } - - // Encode the register data into a single register number - Info.Encoded = (Info.Offset << 6) | (Info.Type << 3) | Info.Space; - - RegInfo[Reg] = Info; - - if (RegSpace == PTXRegisterSpace::Argument) - RegArgs.insert(Reg); - else if (RegSpace == PTXRegisterSpace::Return) - RegRets.insert(Reg); - } - } - - /// countRegisters - Returns the number of registers of the given type and - /// space. - unsigned countRegisters(unsigned RegType, unsigned RegSpace) const { - unsigned Count = 0; - for(RegisterInfoMap::const_iterator i = RegInfo.begin(), e = RegInfo.end(); - i != e; ++i) { - const RegisterInfo& RI = i->second; - if (RI.Type == RegType && RI.Space == RegSpace) - Count++; - } - return Count; - } - - /// getEncodedRegister - Returns the encoded value of the register. - unsigned getEncodedRegister(unsigned Reg) const { - return RegInfo.lookup(Reg).Encoded; - } - - /// addRetReg - Adds a register to the set of return-value registers. - void addRetReg(unsigned Reg) { - if (!RegRets.count(Reg)) { - RegRets.insert(Reg); - } - } - - /// addArgReg - Adds a register to the set of function argument registers. - void addArgReg(unsigned Reg) { - RegArgs.insert(Reg); - } - - /// getRegisterName - Returns the name of the specified virtual register. This - /// name is used during PTX emission. - std::string getRegisterName(unsigned Reg) const { - if (RegInfo.count(Reg)) { - const RegisterInfo& RI = RegInfo.lookup(Reg); - std::string Name; - raw_string_ostream NameStr(Name); - decodeRegisterName(NameStr, RI.Encoded); - NameStr.flush(); - return Name; - } - else if (Reg == PTX::NoRegister) - return "%noreg"; - else - llvm_unreachable("Register not in register name map"); - } - - /// getEncodedRegisterName - Returns the name of the encoded register. - std::string getEncodedRegisterName(unsigned EncodedReg) const { - std::string Name; - raw_string_ostream NameStr(Name); - decodeRegisterName(NameStr, EncodedReg); - NameStr.flush(); - return Name; - } - - /// getRegisterType - Returns the type of the specified virtual register. - unsigned getRegisterType(unsigned Reg) const { - if (RegInfo.count(Reg)) - return RegInfo.lookup(Reg).Type; - else - llvm_unreachable("Unknown register"); - } - - /// getOffsetForRegister - Returns the offset of the virtual register - unsigned getOffsetForRegister(unsigned Reg) const { - if (RegInfo.count(Reg)) - return RegInfo.lookup(Reg).Offset; - else - return 0; - } - - /// getFrameSymbol - Returns the symbol name for the given FrameIndex. - const char* getFrameSymbol(int FrameIndex) { - if (FrameSymbols.count(FrameIndex)) { - return FrameSymbols.lookup(FrameIndex).c_str(); - } else { - std::string Name = "__local"; - Name += utostr(FrameIndex); - // The whole point of caching this name is to ensure the pointer we pass - // to any getExternalSymbol() calls will remain valid for the lifetime of - // the back-end instance. This is to work around an issue in SelectionDAG - // where symbol names are expected to be life-long strings. - FrameSymbols[FrameIndex] = Name; - return FrameSymbols[FrameIndex].c_str(); - } - } -}; // class PTXMachineFunctionInfo -} // namespace llvm - -#endif // PTX_MACHINE_FUNCTION_INFO_H diff --git a/lib/Target/PTX/PTXParamManager.cpp b/lib/Target/PTX/PTXParamManager.cpp deleted file mode 100644 index cc1cc71..0000000 --- a/lib/Target/PTX/PTXParamManager.cpp +++ /dev/null @@ -1,73 +0,0 @@ -//===-- PTXParamManager.cpp - Manager for .param variables ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the PTXParamManager class. -// -//===----------------------------------------------------------------------===// - -#include "PTXParamManager.h" -#include "PTX.h" -#include "llvm/ADT/StringExtras.h" - -using namespace llvm; - -PTXParamManager::PTXParamManager() { -} - -unsigned PTXParamManager::addArgumentParam(unsigned Size) { - PTXParam Param; - Param.Type = PTX_PARAM_TYPE_ARGUMENT; - Param.Size = Size; - - std::string Name; - Name = "__param_"; - Name += utostr(ArgumentParams.size()+1); - Param.Name = Name; - - unsigned Index = AllParams.size(); - AllParams[Index] = Param; - ArgumentParams.push_back(Index); - - return Index; -} - -unsigned PTXParamManager::addReturnParam(unsigned Size) { - PTXParam Param; - Param.Type = PTX_PARAM_TYPE_RETURN; - Param.Size = Size; - - std::string Name; - Name = "__ret_"; - Name += utostr(ReturnParams.size()+1); - Param.Name = Name; - - unsigned Index = AllParams.size(); - AllParams[Index] = Param; - ReturnParams.push_back(Index); - - return Index; -} - -unsigned PTXParamManager::addLocalParam(unsigned Size) { - PTXParam Param; - Param.Type = PTX_PARAM_TYPE_LOCAL; - Param.Size = Size; - - std::string Name; - Name = "__localparam_"; - Name += utostr(LocalParams.size()+1); - Param.Name = Name; - - unsigned Index = AllParams.size(); - AllParams[Index] = Param; - LocalParams.push_back(Index); - - return Index; -} - diff --git a/lib/Target/PTX/PTXParamManager.h b/lib/Target/PTX/PTXParamManager.h deleted file mode 100644 index 92e7728..0000000 --- a/lib/Target/PTX/PTXParamManager.h +++ /dev/null @@ -1,87 +0,0 @@ -//===-- PTXParamManager.h - Manager for .param variables --------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the PTXParamManager class, which manages all defined .param -// variables for a particular function. -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_PARAM_MANAGER_H -#define PTX_PARAM_MANAGER_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#include <string> - -namespace llvm { - -/// PTXParamManager - This class manages all .param variables defined for a -/// particular function. -class PTXParamManager { -private: - - /// PTXParamType - Type of a .param variable - enum PTXParamType { - PTX_PARAM_TYPE_ARGUMENT, - PTX_PARAM_TYPE_RETURN, - PTX_PARAM_TYPE_LOCAL - }; - - /// PTXParam - Definition of a PTX .param variable - struct PTXParam { - PTXParamType Type; - unsigned Size; - std::string Name; - }; - - DenseMap<unsigned, PTXParam> AllParams; - SmallVector<unsigned, 4> ArgumentParams; - SmallVector<unsigned, 4> ReturnParams; - SmallVector<unsigned, 4> LocalParams; - -public: - - typedef SmallVector<unsigned, 4>::const_iterator param_iterator; - - PTXParamManager(); - - param_iterator arg_begin() const { return ArgumentParams.begin(); } - param_iterator arg_end() const { return ArgumentParams.end(); } - param_iterator ret_begin() const { return ReturnParams.begin(); } - param_iterator ret_end() const { return ReturnParams.end(); } - param_iterator local_begin() const { return LocalParams.begin(); } - param_iterator local_end() const { return LocalParams.end(); } - - /// addArgumentParam - Returns a new .param used as an argument. - unsigned addArgumentParam(unsigned Size); - - /// addReturnParam - Returns a new .param used as a return argument. - unsigned addReturnParam(unsigned Size); - - /// addLocalParam - Returns a new .param used as a local .param variable. - unsigned addLocalParam(unsigned Size); - - /// getParamName - Returns the name of the parameter as a string. - const std::string &getParamName(unsigned Param) const { - assert(AllParams.count(Param) == 1 && "Param has not been defined!"); - return AllParams.find(Param)->second.Name; - } - - /// getParamSize - Returns the size of the parameter in bits. - unsigned getParamSize(unsigned Param) const { - assert(AllParams.count(Param) == 1 && "Param has not been defined!"); - return AllParams.find(Param)->second.Size; - } - -}; - -} - -#endif - diff --git a/lib/Target/PTX/PTXRegAlloc.cpp b/lib/Target/PTX/PTXRegAlloc.cpp deleted file mode 100644 index 7fd5375..0000000 --- a/lib/Target/PTX/PTXRegAlloc.cpp +++ /dev/null @@ -1,53 +0,0 @@ -//===-- PTXRegAlloc.cpp - PTX Register Allocator --------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a register allocator for PTX code. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "ptx-reg-alloc" - -#include "PTX.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/RegAllocRegistry.h" - -using namespace llvm; - -namespace { - // Special register allocator for PTX. - class PTXRegAlloc : public MachineFunctionPass { - public: - static char ID; - PTXRegAlloc() : MachineFunctionPass(ID) {} - - virtual const char* getPassName() const { - return "PTX Register Allocator"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - virtual bool runOnMachineFunction(MachineFunction &MF) { - // We do not actually do anything (at least not yet). - return false; - } - }; - - char PTXRegAlloc::ID = 0; - - static RegisterRegAlloc - ptxRegAlloc("ptx", "PTX register allocator", createPTXRegisterAllocator); -} - -FunctionPass *llvm::createPTXRegisterAllocator() { - return new PTXRegAlloc(); -} - diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp deleted file mode 100644 index b6ffd38..0000000 --- a/lib/Target/PTX/PTXRegisterInfo.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//===-- PTXRegisterInfo.cpp - PTX Register Information --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the PTX implementation of the TargetRegisterInfo class. -// -//===----------------------------------------------------------------------===// - -#include "PTXRegisterInfo.h" -#include "PTX.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -#define GET_REGINFO_TARGET_DESC -#include "PTXGenRegisterInfo.inc" - -using namespace llvm; - -PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM, - const TargetInstrInfo &tii) - // PTX does not have a return address register. - : PTXGenRegisterInfo(0), TII(tii) { -} - -void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator /*II*/, - int /*SPAdj*/, - RegScavenger * /*RS*/) const { - llvm_unreachable("FrameIndex should have been previously eliminated!"); -} diff --git a/lib/Target/PTX/PTXRegisterInfo.h b/lib/Target/PTX/PTXRegisterInfo.h deleted file mode 100644 index 5614ce7..0000000 --- a/lib/Target/PTX/PTXRegisterInfo.h +++ /dev/null @@ -1,56 +0,0 @@ -//===-- PTXRegisterInfo.h - PTX Register Information Impl -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the PTX implementation of the MRegisterInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_REGISTER_INFO_H -#define PTX_REGISTER_INFO_H - -#include "llvm/Support/ErrorHandling.h" -#include "llvm/ADT/BitVector.h" - -#define GET_REGINFO_HEADER -#include "PTXGenRegisterInfo.inc" - -namespace llvm { -class PTXTargetMachine; -class MachineFunction; - -struct PTXRegisterInfo : public PTXGenRegisterInfo { -private: - const TargetInstrInfo &TII; - -public: - PTXRegisterInfo(PTXTargetMachine &TM, - const TargetInstrInfo &tii); - - virtual const uint16_t - *getCalleeSavedRegs(const MachineFunction *MF = 0) const { - static const uint16_t CalleeSavedRegs[] = { 0 }; - return CalleeSavedRegs; // save nothing - } - - virtual BitVector getReservedRegs(const MachineFunction &MF) const { - BitVector Reserved(getNumRegs()); - return Reserved; // reserve no regs - } - - virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, - RegScavenger *RS = NULL) const; - - virtual unsigned getFrameRegister(const MachineFunction &MF) const { - llvm_unreachable("PTX does not have a frame register"); - } -}; // struct PTXRegisterInfo -} // namespace llvm - -#endif // PTX_REGISTER_INFO_H diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td deleted file mode 100644 index e8b262e..0000000 --- a/lib/Target/PTX/PTXRegisterInfo.td +++ /dev/null @@ -1,36 +0,0 @@ -//===-- PTXRegisterInfo.td - PTX Register defs -------------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Declarations that describe the PTX register file -//===----------------------------------------------------------------------===// - -class PTXReg<string n> : Register<n> { - let Namespace = "PTX"; -} - -//===----------------------------------------------------------------------===// -// Registers -//===----------------------------------------------------------------------===// - -// The generated register info code throws warnings for empty register classes -// (e.g. zero-length arrays), so we use a dummy register here just to prevent -// these warnings. -def DUMMY_REG : PTXReg<"R0">; - -//===----------------------------------------------------------------------===// -// Register classes -//===----------------------------------------------------------------------===// -def RegPred : RegisterClass<"PTX", [i1], 8, (add DUMMY_REG)>; -def RegI16 : RegisterClass<"PTX", [i16], 16, (add DUMMY_REG)>; -def RegI32 : RegisterClass<"PTX", [i32], 32, (add DUMMY_REG)>; -def RegI64 : RegisterClass<"PTX", [i64], 64, (add DUMMY_REG)>; -def RegF32 : RegisterClass<"PTX", [f32], 32, (add DUMMY_REG)>; -def RegF64 : RegisterClass<"PTX", [f64], 64, (add DUMMY_REG)>; - diff --git a/lib/Target/PTX/PTXSelectionDAGInfo.cpp b/lib/Target/PTX/PTXSelectionDAGInfo.cpp deleted file mode 100644 index a116fab..0000000 --- a/lib/Target/PTX/PTXSelectionDAGInfo.cpp +++ /dev/null @@ -1,150 +0,0 @@ -//===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the PTXSelectionDAGInfo class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "ptx-selectiondag-info" -#include "PTXTargetMachine.h" -#include "llvm/DerivedTypes.h" -#include "llvm/CodeGen/SelectionDAG.h" -using namespace llvm; - -PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM) - : TargetSelectionDAGInfo(TM), - Subtarget(&TM.getSubtarget<PTXSubtarget>()) { -} - -PTXSelectionDAGInfo::~PTXSelectionDAGInfo() { -} - -SDValue -PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) const { - // Do repeated 4-byte loads and stores. To be improved. - // This requires 4-byte alignment. - if ((Align & 3) != 0) - return SDValue(); - // This requires the copy size to be a constant, preferably - // within a subtarget-specific limit. - ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); - if (!ConstantSize) - return SDValue(); - uint64_t SizeVal = ConstantSize->getZExtValue(); - // Always inline memcpys. In PTX, we do not have a C library that provides - // a memcpy function. - //if (!AlwaysInline) - // return SDValue(); - - unsigned BytesLeft = SizeVal & 3; - unsigned NumMemOps = SizeVal >> 2; - unsigned EmittedNumMemOps = 0; - EVT VT = MVT::i32; - unsigned VTSize = 4; - unsigned i = 0; - const unsigned MAX_LOADS_IN_LDM = 6; - SDValue TFOps[MAX_LOADS_IN_LDM]; - SDValue Loads[MAX_LOADS_IN_LDM]; - uint64_t SrcOff = 0, DstOff = 0; - EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; - - // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the - // same number of stores. The loads and stores will get combined into - // ldm/stm later on. - while (EmittedNumMemOps < NumMemOps) { - for (i = 0; - i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { - Loads[i] = DAG.getLoad(VT, dl, Chain, - DAG.getNode(ISD::ADD, dl, PointerType, Src, - DAG.getConstant(SrcOff, PointerType)), - SrcPtrInfo.getWithOffset(SrcOff), isVolatile, - false, false, 0); - TFOps[i] = Loads[i].getValue(1); - SrcOff += VTSize; - } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); - - for (i = 0; - i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { - TFOps[i] = DAG.getStore(Chain, dl, Loads[i], - DAG.getNode(ISD::ADD, dl, PointerType, Dst, - DAG.getConstant(DstOff, PointerType)), - DstPtrInfo.getWithOffset(DstOff), - isVolatile, false, 0); - DstOff += VTSize; - } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); - - EmittedNumMemOps += i; - } - - if (BytesLeft == 0) - return Chain; - - // Issue loads / stores for the trailing (1 - 3) bytes. - unsigned BytesLeftSave = BytesLeft; - i = 0; - while (BytesLeft) { - if (BytesLeft >= 2) { - VT = MVT::i16; - VTSize = 2; - } else { - VT = MVT::i8; - VTSize = 1; - } - - Loads[i] = DAG.getLoad(VT, dl, Chain, - DAG.getNode(ISD::ADD, dl, PointerType, Src, - DAG.getConstant(SrcOff, PointerType)), - SrcPtrInfo.getWithOffset(SrcOff), false, false, - false, 0); - TFOps[i] = Loads[i].getValue(1); - ++i; - SrcOff += VTSize; - BytesLeft -= VTSize; - } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); - - i = 0; - BytesLeft = BytesLeftSave; - while (BytesLeft) { - if (BytesLeft >= 2) { - VT = MVT::i16; - VTSize = 2; - } else { - VT = MVT::i8; - VTSize = 1; - } - - TFOps[i] = DAG.getStore(Chain, dl, Loads[i], - DAG.getNode(ISD::ADD, dl, PointerType, Dst, - DAG.getConstant(DstOff, PointerType)), - DstPtrInfo.getWithOffset(DstOff), false, false, 0); - ++i; - DstOff += VTSize; - BytesLeft -= VTSize; - } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); -} - -SDValue PTXSelectionDAGInfo:: -EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, SDValue Dst, - SDValue Src, SDValue Size, - unsigned Align, bool isVolatile, - MachinePointerInfo DstPtrInfo) const { - llvm_unreachable("memset lowering not implemented for PTX yet"); -} - diff --git a/lib/Target/PTX/PTXSelectionDAGInfo.h b/lib/Target/PTX/PTXSelectionDAGInfo.h deleted file mode 100644 index e0c7167..0000000 --- a/lib/Target/PTX/PTXSelectionDAGInfo.h +++ /dev/null @@ -1,53 +0,0 @@ -//===-- PTXSelectionDAGInfo.h - PTX SelectionDAG Info -----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the PTX subclass for TargetSelectionDAGInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef PTXSELECTIONDAGINFO_H -#define PTXSELECTIONDAGINFO_H - -#include "llvm/Target/TargetSelectionDAGInfo.h" - -namespace llvm { - -/// PTXSelectionDAGInfo - TargetSelectionDAGInfo sub-class for the PTX target. -/// At the moment, this is mostly just a copy of ARMSelectionDAGInfo. -class PTXSelectionDAGInfo : public TargetSelectionDAGInfo { - /// Subtarget - Keep a pointer to the PTXSubtarget around so that we can - /// make the right decision when generating code for different targets. - const PTXSubtarget *Subtarget; - -public: - explicit PTXSelectionDAGInfo(const TargetMachine &TM); - ~PTXSelectionDAGInfo(); - - virtual - SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) const; - - virtual - SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Op1, SDValue Op2, - SDValue Op3, unsigned Align, - bool isVolatile, - MachinePointerInfo DstPtrInfo) const; -}; - -} - -#endif - diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp deleted file mode 100644 index 454f64e..0000000 --- a/lib/Target/PTX/PTXSubtarget.cpp +++ /dev/null @@ -1,68 +0,0 @@ -//===-- PTXSubtarget.cpp - PTX Subtarget Information ----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the PTX specific subclass of TargetSubtargetInfo. -// -//===----------------------------------------------------------------------===// - -#include "PTXSubtarget.h" -#include "PTX.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_SUBTARGETINFO_TARGET_DESC -#define GET_SUBTARGETINFO_CTOR -#include "PTXGenSubtargetInfo.inc" - -using namespace llvm; - -void PTXSubtarget::anchor() { } - -PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool is64Bit) - : PTXGenSubtargetInfo(TT, CPU, FS), - PTXTarget(PTX_COMPUTE_1_0), - PTXVersion(PTX_VERSION_2_0), - SupportsDouble(false), - SupportsFMA(true), - Is64Bit(is64Bit) { - std::string TARGET = CPU; - if (TARGET.empty()) - TARGET = "generic"; - ParseSubtargetFeatures(TARGET, FS); -} - -std::string PTXSubtarget::getTargetString() const { - switch(PTXTarget) { - default: llvm_unreachable("Unknown PTX target"); - case PTX_SM_1_0: return "sm_10"; - case PTX_SM_1_1: return "sm_11"; - case PTX_SM_1_2: return "sm_12"; - case PTX_SM_1_3: return "sm_13"; - case PTX_SM_2_0: return "sm_20"; - case PTX_SM_2_1: return "sm_21"; - case PTX_SM_2_2: return "sm_22"; - case PTX_SM_2_3: return "sm_23"; - case PTX_COMPUTE_1_0: return "compute_10"; - case PTX_COMPUTE_1_1: return "compute_11"; - case PTX_COMPUTE_1_2: return "compute_12"; - case PTX_COMPUTE_1_3: return "compute_13"; - case PTX_COMPUTE_2_0: return "compute_20"; - } -} - -std::string PTXSubtarget::getPTXVersionString() const { - switch(PTXVersion) { - case PTX_VERSION_2_0: return "2.0"; - case PTX_VERSION_2_1: return "2.1"; - case PTX_VERSION_2_2: return "2.2"; - case PTX_VERSION_2_3: return "2.3"; - } - llvm_unreachable("Invalid PTX version"); -} diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h deleted file mode 100644 index ce93fef..0000000 --- a/lib/Target/PTX/PTXSubtarget.h +++ /dev/null @@ -1,131 +0,0 @@ -//===-- PTXSubtarget.h - Define Subtarget for the PTX -----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the PTX specific subclass of TargetSubtargetInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_SUBTARGET_H -#define PTX_SUBTARGET_H - -#include "llvm/Target/TargetSubtargetInfo.h" - -#define GET_SUBTARGETINFO_HEADER -#include "PTXGenSubtargetInfo.inc" - -namespace llvm { -class StringRef; - - class PTXSubtarget : public PTXGenSubtargetInfo { - virtual void anchor(); - public: - - /** - * Enumeration of Shader Models supported by the back-end. - */ - enum PTXTargetEnum { - PTX_COMPUTE_1_0, /*< Compute Compatibility 1.0 */ - PTX_COMPUTE_1_1, /*< Compute Compatibility 1.1 */ - PTX_COMPUTE_1_2, /*< Compute Compatibility 1.2 */ - PTX_COMPUTE_1_3, /*< Compute Compatibility 1.3 */ - PTX_COMPUTE_2_0, /*< Compute Compatibility 2.0 */ - PTX_LAST_COMPUTE, - - PTX_SM_1_0, /*< Shader Model 1.0 */ - PTX_SM_1_1, /*< Shader Model 1.1 */ - PTX_SM_1_2, /*< Shader Model 1.2 */ - PTX_SM_1_3, /*< Shader Model 1.3 */ - PTX_SM_2_0, /*< Shader Model 2.0 */ - PTX_SM_2_1, /*< Shader Model 2.1 */ - PTX_SM_2_2, /*< Shader Model 2.2 */ - PTX_SM_2_3, /*< Shader Model 2.3 */ - PTX_LAST_SM - }; - - /** - * Enumeration of PTX versions supported by the back-end. - * - * Currently, PTX 2.0 is the minimum supported version. - */ - enum PTXVersionEnum { - PTX_VERSION_2_0, /*< PTX Version 2.0 */ - PTX_VERSION_2_1, /*< PTX Version 2.1 */ - PTX_VERSION_2_2, /*< PTX Version 2.2 */ - PTX_VERSION_2_3 /*< PTX Version 2.3 */ - }; - - private: - - /// Shader Model supported on the target GPU. - PTXTargetEnum PTXTarget; - - /// PTX Language Version. - PTXVersionEnum PTXVersion; - - // The native .f64 type is supported on the hardware. - bool SupportsDouble; - - // Support the fused-multiply add (FMA) and multiply-add (MAD) - // instructions - bool SupportsFMA; - - // Use .u64 instead of .u32 for addresses. - bool Is64Bit; - - public: - - PTXSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool is64Bit); - - // Target architecture accessors - std::string getTargetString() const; - - std::string getPTXVersionString() const; - - bool supportsDouble() const { return SupportsDouble; } - - bool is64Bit() const { return Is64Bit; } - - bool supportsFMA() const { return SupportsFMA; } - - bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; } - - bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; } - - bool supportsPTX23() const { return PTXVersion >= PTX_VERSION_2_3; } - - bool fdivNeedsRoundingMode() const { - return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) || - (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE); - } - - bool fmadNeedsRoundingMode() const { - return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) || - (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE); - } - - bool useParamSpaceForDeviceArgs() const { - return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) || - (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE); - } - - bool callsAreHandled() const { - return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) || - (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE); - } - - bool emitPtrAttribute() const { - return PTXVersion >= PTX_VERSION_2_2; - } - - void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - }; // class PTXSubtarget -} // namespace llvm - -#endif // PTX_SUBTARGET_H diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp deleted file mode 100644 index 97b8de1..0000000 --- a/lib/Target/PTX/PTXTargetMachine.cpp +++ /dev/null @@ -1,165 +0,0 @@ -//===-- PTXTargetMachine.cpp - Define TargetMachine for PTX ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Top-level implementation for the PTX target. -// -//===----------------------------------------------------------------------===// - -#include "PTXTargetMachine.h" -#include "PTX.h" -#include "llvm/PassManager.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/Verifier.h" -#include "llvm/Assembly/PrintModulePass.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/Transforms/Scalar.h" - - -using namespace llvm; - -namespace llvm { - MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useLoc, - bool useCFI, bool useDwarfDirectory, - MCInstPrinter *InstPrint, - MCCodeEmitter *CE, - MCAsmBackend *MAB, - bool ShowInst); -} - -extern "C" void LLVMInitializePTXTarget() { - - RegisterTargetMachine<PTX32TargetMachine> X(ThePTX32Target); - RegisterTargetMachine<PTX64TargetMachine> Y(ThePTX64Target); - - TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer); - TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer); -} - -namespace { - const char* DataLayout32 = - "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; - const char* DataLayout64 = - "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; -} - -// DataLayout and FrameLowering are filled with dummy data -PTXTargetMachine::PTXTargetMachine(const Target &T, - StringRef TT, StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - DataLayout(is64Bit ? DataLayout64 : DataLayout32), - Subtarget(TT, CPU, FS, is64Bit), - FrameLowering(Subtarget), - InstrInfo(*this), - TSInfo(*this), - TLInfo(*this) { -} - -void PTX32TargetMachine::anchor() { } - -PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { -} - -void PTX64TargetMachine::anchor() { } - -PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) { -} - -namespace llvm { -/// PTX Code Generator Pass Configuration Options. -class PTXPassConfig : public TargetPassConfig { -public: - PTXPassConfig(PTXTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} - - PTXTargetMachine &getPTXTargetMachine() const { - return getTM<PTXTargetMachine>(); - } - - bool addInstSelector(); - FunctionPass *createTargetRegisterAllocator(bool); - void addOptimizedRegAlloc(FunctionPass *RegAllocPass); - bool addPostRegAlloc(); - void addMachineLateOptimization(); - bool addPreEmitPass(); -}; -} // namespace - -TargetPassConfig *PTXTargetMachine::createPassConfig(PassManagerBase &PM) { - PTXPassConfig *PassConfig = new PTXPassConfig(this, PM); - PassConfig->disablePass(PrologEpilogCodeInserterID); - return PassConfig; -} - -bool PTXPassConfig::addInstSelector() { - PM->add(createPTXISelDag(getPTXTargetMachine(), getOptLevel())); - return false; -} - -FunctionPass *PTXPassConfig::createTargetRegisterAllocator(bool /*Optimized*/) { - return createPTXRegisterAllocator(); -} - -// Modify the optimized compilation path to bypass optimized register alloction. -void PTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { - addFastRegAlloc(RegAllocPass); -} - -bool PTXPassConfig::addPostRegAlloc() { - // PTXMFInfoExtract must after register allocation! - //PM->add(createPTXMFInfoExtract(getPTXTargetMachine())); - return false; -} - -/// Add passes that optimize machine instructions after register allocation. -void PTXPassConfig::addMachineLateOptimization() { - if (addPass(BranchFolderPassID) != &NoPassID) - printAndVerify("After BranchFolding"); - - if (addPass(TailDuplicateID) != &NoPassID) - printAndVerify("After TailDuplicate"); -} - -bool PTXPassConfig::addPreEmitPass() { - PM->add(createPTXMFInfoExtract(getPTXTargetMachine(), getOptLevel())); - PM->add(createPTXFPRoundingModePass(getPTXTargetMachine(), getOptLevel())); - return true; -} diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h deleted file mode 100644 index 278d155..0000000 --- a/lib/Target/PTX/PTXTargetMachine.h +++ /dev/null @@ -1,104 +0,0 @@ -//===-- PTXTargetMachine.h - Define TargetMachine for PTX -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the PTX specific subclass of TargetMachine. -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_TARGET_MACHINE_H -#define PTX_TARGET_MACHINE_H - -#include "PTXISelLowering.h" -#include "PTXInstrInfo.h" -#include "PTXFrameLowering.h" -#include "PTXSelectionDAGInfo.h" -#include "PTXSubtarget.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { -class PTXTargetMachine : public LLVMTargetMachine { - private: - const TargetData DataLayout; - PTXSubtarget Subtarget; // has to be initialized before FrameLowering - PTXFrameLowering FrameLowering; - PTXInstrInfo InstrInfo; - PTXSelectionDAGInfo TSInfo; - PTXTargetLowering TLInfo; - - public: - PTXTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool is64Bit); - - virtual const TargetData *getTargetData() const { return &DataLayout; } - - virtual const TargetFrameLowering *getFrameLowering() const { - return &FrameLowering; - } - - virtual const PTXInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const TargetRegisterInfo *getRegisterInfo() const { - return &InstrInfo.getRegisterInfo(); } - - virtual const PTXTargetLowering *getTargetLowering() const { - return &TLInfo; } - - virtual const PTXSelectionDAGInfo* getSelectionDAGInfo() const { - return &TSInfo; - } - - virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; } - - // Emission of machine code through JITCodeEmitter is not supported. - virtual bool addPassesToEmitMachineCode(PassManagerBase &, - JITCodeEmitter &, - bool = true) { - return true; - } - - // Emission of machine code through MCJIT is not supported. - virtual bool addPassesToEmitMC(PassManagerBase &, - MCContext *&, - raw_ostream &, - bool = true) { - return true; - } - - // Pass Pipeline Configuration - virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); -}; // class PTXTargetMachine - - -class PTX32TargetMachine : public PTXTargetMachine { - virtual void anchor(); -public: - - PTX32TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; // class PTX32TargetMachine - -class PTX64TargetMachine : public PTXTargetMachine { - virtual void anchor(); -public: - - PTX64TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; // class PTX32TargetMachine - -} // namespace llvm - -#endif // PTX_TARGET_MACHINE_H diff --git a/lib/Target/PTX/TargetInfo/CMakeLists.txt b/lib/Target/PTX/TargetInfo/CMakeLists.txt deleted file mode 100644 index d9a5da3..0000000 --- a/lib/Target/PTX/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMPTXInfo - PTXTargetInfo.cpp - ) - -add_dependencies(LLVMPTXInfo PTXCommonTableGen) diff --git a/lib/Target/PTX/TargetInfo/LLVMBuild.txt b/lib/Target/PTX/TargetInfo/LLVMBuild.txt deleted file mode 100644 index 2cc30c4..0000000 --- a/lib/Target/PTX/TargetInfo/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/PTX/TargetInfo/LLVMBuild.txt ----------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = PTXInfo -parent = PTX -required_libraries = MC Support Target -add_to_library_groups = PTX diff --git a/lib/Target/PTX/TargetInfo/Makefile b/lib/Target/PTX/TargetInfo/Makefile deleted file mode 100644 index 8619785..0000000 --- a/lib/Target/PTX/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/PTX/TargetInfo/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMPTXInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp deleted file mode 100644 index 09a2735..0000000 --- a/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp +++ /dev/null @@ -1,25 +0,0 @@ -//===-- PTXTargetInfo.cpp - PTX Target Implementation ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "PTX.h" -#include "llvm/Module.h" -#include "llvm/Support/TargetRegistry.h" - -using namespace llvm; - -Target llvm::ThePTX32Target; -Target llvm::ThePTX64Target; - -extern "C" void LLVMInitializePTXTargetInfo() { - // see llvm/ADT/Triple.h - RegisterTarget<Triple::ptx32> X32(ThePTX32Target, "ptx32", - "PTX (32-bit) [Experimental]"); - RegisterTarget<Triple::ptx64> X64(ThePTX64Target, "ptx64", - "PTX (64-bit) [Experimental]"); -} diff --git a/projects/sample/autoconf/configure.ac b/projects/sample/autoconf/configure.ac index e5a4b35..092bc68 100644 --- a/projects/sample/autoconf/configure.ac +++ b/projects/sample/autoconf/configure.ac @@ -309,7 +309,6 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch], msp430-*) llvm_cv_target_arch="MSP430" ;; hexagon-*) llvm_cv_target_arch="Hexagon" ;; mblaze-*) llvm_cv_target_arch="MBlaze" ;; - ptx-*) llvm_cv_target_arch="PTX" ;; nvptx-*) llvm_cv_target_arch="NVPTX" ;; *) llvm_cv_target_arch="Unknown" ;; esac]) @@ -457,7 +456,6 @@ else MSP430) AC_SUBST(TARGET_HAS_JIT,0) ;; Hexagon) AC_SUBST(TARGET_HAS_JIT,0) ;; MBlaze) AC_SUBST(TARGET_HAS_JIT,0) ;; - PTX) AC_SUBST(TARGET_HAS_JIT,0) ;; NVPTX) AC_SUBST(TARGET_HAS_JIT,0) ;; *) AC_SUBST(TARGET_HAS_JIT,0) ;; esac @@ -569,13 +567,13 @@ TARGETS_TO_BUILD="" AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets], [Build specific host targets: all or target1,target2,... Valid targets are: host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon, - xcore, msp430, ptx, nvptx, cbe, and cpp (default=all)]),, + xcore, msp430, nvptx, cbe, and cpp (default=all)]),, enableval=all) if test "$enableval" = host-only ; then enableval=host fi case "$enableval" in - all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze PTX NVPTX" ;; + all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;; *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do case "$a_target" in x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; @@ -590,7 +588,6 @@ case "$enableval" in hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;; cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;; mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;; - ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;; nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;; host) case "$llvm_cv_target_arch" in x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; @@ -604,7 +601,6 @@ case "$enableval" in XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;; - PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;; NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;; *) AC_MSG_ERROR([Can not set target to build]) ;; esac ;; diff --git a/projects/sample/configure b/projects/sample/configure index a54e153..d925888 100755 --- a/projects/sample/configure +++ b/projects/sample/configure @@ -1402,8 +1402,7 @@ Optional Features: --enable-targets Build specific host targets: all or target1,target2,... Valid targets are: host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon, - xcore, msp430, ptx, nvptx, cbe, and cpp - (default=all) + xcore, msp430, nvptx, cbe, and cpp (default=all) --enable-bindings Build specific language bindings: all,auto,none,{binding-name} (default=auto) --enable-libffi Check for the presence of libffi (default is NO) @@ -3846,7 +3845,6 @@ else msp430-*) llvm_cv_target_arch="MSP430" ;; hexagon-*) llvm_cv_target_arch="Hexagon" ;; mblaze-*) llvm_cv_target_arch="MBlaze" ;; - ptx-*) llvm_cv_target_arch="PTX" ;; nvptx-*) llvm_cv_target_arch="NVPTX" ;; *) llvm_cv_target_arch="Unknown" ;; esac @@ -5070,8 +5068,6 @@ else ;; MBlaze) TARGET_HAS_JIT=0 ;; - PTX) TARGET_HAS_JIT=0 - ;; NVPTX) TARGET_HAS_JIT=0 ;; *) TARGET_HAS_JIT=0 @@ -5258,7 +5254,7 @@ if test "$enableval" = host-only ; then enableval=host fi case "$enableval" in - all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze PTX NVPTX" ;; + all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;; *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do case "$a_target" in x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; @@ -5273,7 +5269,6 @@ case "$enableval" in hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;; cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;; mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;; - ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;; nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;; host) case "$llvm_cv_target_arch" in x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; @@ -5287,7 +5282,6 @@ case "$enableval" in XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;; - PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;; NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;; *) { { echo "$as_me:$LINENO: error: Can not set target to build" >&5 echo "$as_me: error: Can not set target to build" >&2;} @@ -10313,7 +10307,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<EOF -#line 10316 "configure" +#line 10310 "configure" #include "confdefs.h" #if HAVE_DLFCN_H diff --git a/test/CodeGen/PTX/20110926-sitofp.ll b/test/CodeGen/PTX/20110926-sitofp.ll deleted file mode 100644 index 38d35c5..0000000 --- a/test/CodeGen/PTX/20110926-sitofp.ll +++ /dev/null @@ -1,24 +0,0 @@ -; RUN: llc < %s -march=ptx32 | FileCheck %s - -@A = common global [1536 x [1536 x float]] zeroinitializer, align 4 -@B = common global [1536 x [1536 x float]] zeroinitializer, align 4 - -define internal ptx_device void @init_array(i32 %x, i32 %y) { - %arrayidx103 = getelementptr [1536 x [1536 x float]]* @A, i32 0, i32 %x, i32 %y - %arrayidx224 = getelementptr [1536 x [1536 x float]]* @B, i32 0, i32 %x, i32 %y - %mul5 = mul i32 %x, %y - %rem = srem i32 %mul5, 1024 - %add = add nsw i32 %rem, 1 -; CHECK: cvt.rn.f64.s32 %fd{{[0-9]+}}, %r{{[0-9]+}} - %conv = sitofp i32 %add to double - %div = fmul double %conv, 5.000000e-01 - %conv7 = fptrunc double %div to float - store float %conv7, float* %arrayidx103, align 4 - %rem14 = srem i32 %mul5, 1024 - %add15 = add nsw i32 %rem14, 1 - %conv16 = sitofp i32 %add15 to double - %div17 = fmul double %conv16, 5.000000e-01 - %conv18 = fptrunc double %div17 to float - store float %conv18, float* %arrayidx224, align 4 - ret void -} diff --git a/test/CodeGen/PTX/add.ll b/test/CodeGen/PTX/add.ll deleted file mode 100644 index 8b10d11..0000000 --- a/test/CodeGen/PTX/add.ll +++ /dev/null @@ -1,71 +0,0 @@ -; RUN: llc < %s -march=ptx32 | FileCheck %s - -define ptx_device i16 @t1_u16(i16 %x, i16 %y) { -; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, %rh{{[0-9]+}}; -; CHECK: ret; - %z = add i16 %x, %y - ret i16 %z -} - -define ptx_device i32 @t1_u32(i32 %x, i32 %y) { -; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}; -; CHECK: ret; - %z = add i32 %x, %y - ret i32 %z -} - -define ptx_device i64 @t1_u64(i64 %x, i64 %y) { -; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}; -; CHECK: ret; - %z = add i64 %x, %y - ret i64 %z -} - -define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} -; CHECK: ret; - %z = fadd float %x, %y - ret float %z -} - -define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}} -; CHECK: ret; - %z = fadd double %x, %y - ret double %z -} - -define ptx_device i16 @t2_u16(i16 %x) { -; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, 1; -; CHECK: ret; - %z = add i16 %x, 1 - ret i16 %z -} - -define ptx_device i32 @t2_u32(i32 %x) { -; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 1; -; CHECK: ret; - %z = add i32 %x, 1 - ret i32 %z -} - -define ptx_device i64 @t2_u64(i64 %x) { -; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, 1; -; CHECK: ret; - %z = add i64 %x, 1 - ret i64 %z -} - -define ptx_device float @t2_f32(float %x) { -; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0D3FF0000000000000; -; CHECK: ret; - %z = fadd float %x, 1.0 - ret float %z -} - -define ptx_device double @t2_f64(double %x) { -; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0D3FF0000000000000; -; CHECK: ret; - %z = fadd double %x, 1.0 - ret double %z -} diff --git a/test/CodeGen/PTX/aggregates.ll b/test/CodeGen/PTX/aggregates.ll deleted file mode 100644 index 3fc0c40..0000000 --- a/test/CodeGen/PTX/aggregates.ll +++ /dev/null @@ -1,24 +0,0 @@ -; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s -; XFAIL: * - -%complex = type { float, float } - -define ptx_device %complex @complex_add(%complex %a, %complex %b) { -entry: -; CHECK: ld.param.f32 r[[R0:[0-9]+]], [__param_1]; -; CHECK-NEXT: ld.param.f32 r[[R2:[0-9]+]], [__param_3]; -; CHECK-NEXT: ld.param.f32 r[[R1:[0-9]+]], [__param_2]; -; CHECK-NEXT: ld.param.f32 r[[R3:[0-9]+]], [__param_4]; -; CHECK-NEXT: add.rn.f32 r[[R0]], r[[R0]], r[[R2]]; -; CHECK-NEXT: add.rn.f32 r[[R1]], r[[R1]], r[[R3]]; -; CHECK-NEXT: ret; - %a.real = extractvalue %complex %a, 0 - %a.imag = extractvalue %complex %a, 1 - %b.real = extractvalue %complex %b, 0 - %b.imag = extractvalue %complex %b, 1 - %ret.real = fadd float %a.real, %b.real - %ret.imag = fadd float %a.imag, %b.imag - %ret.0 = insertvalue %complex undef, float %ret.real, 0 - %ret.1 = insertvalue %complex %ret.0, float %ret.imag, 1 - ret %complex %ret.1 -} diff --git a/test/CodeGen/PTX/bitwise.ll b/test/CodeGen/PTX/bitwise.ll deleted file mode 100644 index 1403a23..0000000 --- a/test/CodeGen/PTX/bitwise.ll +++ /dev/null @@ -1,24 +0,0 @@ -; RUN: llc < %s -march=ptx32 | FileCheck %s - -; preds - -define ptx_device i32 @t1_and_preds(i1 %x, i1 %y) { -; CHECK: and.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}} - %c = and i1 %x, %y - %d = zext i1 %c to i32 - ret i32 %d -} - -define ptx_device i32 @t1_or_preds(i1 %x, i1 %y) { -; CHECK: or.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}} - %a = or i1 %x, %y - %b = zext i1 %a to i32 - ret i32 %b -} - -define ptx_device i32 @t1_xor_preds(i1 %x, i1 %y) { -; CHECK: xor.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}} - %a = xor i1 %x, %y - %b = zext i1 %a to i32 - ret i32 %b -} diff --git a/test/CodeGen/PTX/bra.ll b/test/CodeGen/PTX/bra.ll deleted file mode 100644 index 464c29c..0000000 --- a/test/CodeGen/PTX/bra.ll +++ /dev/null @@ -1,24 +0,0 @@ -; RUN: llc < %s -march=ptx32 | FileCheck %s - -define ptx_device void @test_bra_direct() { -; CHECK: bra $L__BB0_1; -entry: - br label %loop -loop: - br label %loop -} - -define ptx_device i32 @test_bra_cond_direct(i32 %x, i32 %y) { -entry: -; CHECK: setp.le.u32 %p0, %r[[R0:[0-9]+]], %r[[R1:[0-9]+]] - %p = icmp ugt i32 %x, %y -; CHECK-NEXT: @%p0 bra -; CHECK-NOT: bra - br i1 %p, label %clause.if, label %clause.else -clause.if: -; CHECK: mov.u32 %ret{{[0-9]+}}, %r[[R0]] - ret i32 %x -clause.else: -; CHECK: mov.u32 %ret{{[0-9]+}}, %r[[R1]] - ret i32 %y -} diff --git a/test/CodeGen/PTX/cvt.ll b/test/CodeGen/PTX/cvt.ll deleted file mode 100644 index f55070a..0000000 --- a/test/CodeGen/PTX/cvt.ll +++ /dev/null @@ -1,290 +0,0 @@ -; RUN: llc < %s -march=ptx32 | FileCheck %s - -; preds -; (note: we convert back to i32 to return) - -define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) { -; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rh{{[0-9]+}}, 0 -; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}}; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]]; -; CHECK: ret; - %a = trunc i16 %x to i1 - %b = and i1 %a, %y - %c = zext i1 %b to i32 - ret i32 %c -} - -define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) { -; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0 -; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}}; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]]; -; CHECK: ret; - %a = trunc i32 %x to i1 - %b = and i1 %a, %y - %c = zext i1 %b to i32 - ret i32 %c -} - -define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) { -; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, 0 -; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}}; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]]; -; CHECK: ret; - %a = trunc i64 %x to i1 - %b = and i1 %a, %y - %c = zext i1 %b to i32 - ret i32 %c -} - -define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) { -; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0 -; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}}; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]]; -; CHECK: ret; - %a = fptoui float %x to i1 - %b = and i1 %a, %y - %c = zext i1 %b to i32 - ret i32 %c -} - -define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) { -; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, 0 -; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}}; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]]; -; CHECK: ret; - %a = fptoui double %x to i1 - %b = and i1 %a, %y - %c = zext i1 %b to i32 - ret i32 %c -} - -; i16 - -define ptx_device i16 @cvt_i16_preds(i1 %x) { -; CHECK: selp.u16 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}}; -; CHECK: ret; - %a = zext i1 %x to i16 - ret i16 %a -} - -define ptx_device i16 @cvt_i16_i32(i32 %x) { -; CHECK: cvt.u16.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}; -; CHECK: ret; - %a = trunc i32 %x to i16 - ret i16 %a -} - -define ptx_device i16 @cvt_i16_i64(i64 %x) { -; CHECK: cvt.u16.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}; -; CHECK: ret; - %a = trunc i64 %x to i16 - ret i16 %a -} - -define ptx_device i16 @cvt_i16_f32(float %x) { -; CHECK: cvt.rzi.u16.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}; -; CHECK: ret; - %a = fptoui float %x to i16 - ret i16 %a -} - -define ptx_device i16 @cvt_i16_f64(double %x) { -; CHECK: cvt.rzi.u16.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}; -; CHECK: ret; - %a = fptoui double %x to i16 - ret i16 %a -} - -; i32 - -define ptx_device i32 @cvt_i32_preds(i1 %x) { -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}}; -; CHECK: ret; - %a = zext i1 %x to i32 - ret i32 %a -} - -define ptx_device i32 @cvt_i32_i16(i16 %x) { -; CHECK: cvt.u32.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}; -; CHECK: ret; - %a = zext i16 %x to i32 - ret i32 %a -} - -define ptx_device i32 @cvt_i32_i64(i64 %x) { -; CHECK: cvt.u32.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}; -; CHECK: ret; - %a = trunc i64 %x to i32 - ret i32 %a -} - -define ptx_device i32 @cvt_i32_f32(float %x) { -; CHECK: cvt.rzi.u32.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}; -; CHECK: ret; - %a = fptoui float %x to i32 - ret i32 %a -} - -define ptx_device i32 @cvt_i32_f64(double %x) { -; CHECK: cvt.rzi.u32.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}; -; CHECK: ret; - %a = fptoui double %x to i32 - ret i32 %a -} - -; i64 - -define ptx_device i64 @cvt_i64_preds(i1 %x) { -; CHECK: selp.u64 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}}; -; CHECK: ret; - %a = zext i1 %x to i64 - ret i64 %a -} - -define ptx_device i64 @cvt_i64_i16(i16 %x) { -; CHECK: cvt.u64.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}; -; CHECK: ret; - %a = zext i16 %x to i64 - ret i64 %a -} - -define ptx_device i64 @cvt_i64_i32(i32 %x) { -; CHECK: cvt.u64.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}; -; CHECK: ret; - %a = zext i32 %x to i64 - ret i64 %a -} - -define ptx_device i64 @cvt_i64_f32(float %x) { -; CHECK: cvt.rzi.u64.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}; -; CHECK: ret; - %a = fptoui float %x to i64 - ret i64 %a -} - -define ptx_device i64 @cvt_i64_f64(double %x) { -; CHECK: cvt.rzi.u64.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}; -; CHECK: ret; - %a = fptoui double %x to i64 - ret i64 %a -} - -; f32 - -define ptx_device float @cvt_f32_preds(i1 %x) { -; CHECK: mov.b32 %f0, 0; -; CHECK: mov.b32 %f1, 1065353216; -; CHECK: selp.f32 %ret{{[0-9]+}}, %f1, %f0, %p{{[0-9]+}}; -; CHECK: ret; - %a = uitofp i1 %x to float - ret float %a -} - -define ptx_device float @cvt_f32_i16(i16 %x) { -; CHECK: cvt.rn.f32.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}; -; CHECK: ret; - %a = uitofp i16 %x to float - ret float %a -} - -define ptx_device float @cvt_f32_i32(i32 %x) { -; CHECK: cvt.rn.f32.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}; -; CHECK: ret; - %a = uitofp i32 %x to float - ret float %a -} - -define ptx_device float @cvt_f32_i64(i64 %x) { -; CHECK: cvt.rn.f32.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}; -; CHECK: ret; - %a = uitofp i64 %x to float - ret float %a -} - -define ptx_device float @cvt_f32_f64(double %x) { -; CHECK: cvt.rn.f32.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}; -; CHECK: ret; - %a = fptrunc double %x to float - ret float %a -} - -define ptx_device float @cvt_f32_s16(i16 %x) { -; CHECK: cvt.rn.f32.s16 %ret{{[0-9]+}}, %rh{{[0-9]+}} -; CHECK: ret - %a = sitofp i16 %x to float - ret float %a -} - -define ptx_device float @cvt_f32_s32(i32 %x) { -; CHECK: cvt.rn.f32.s32 %ret{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: ret - %a = sitofp i32 %x to float - ret float %a -} - -define ptx_device float @cvt_f32_s64(i64 %x) { -; CHECK: cvt.rn.f32.s64 %ret{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: ret - %a = sitofp i64 %x to float - ret float %a -} - -; f64 - -define ptx_device double @cvt_f64_preds(i1 %x) { -; CHECK: mov.b64 %fd0, 0; -; CHECK: mov.b64 %fd1, 4575657221408423936; -; CHECK: selp.f64 %ret{{[0-9]+}}, %fd1, %fd0, %p{{[0-9]+}}; -; CHECK: ret; - %a = uitofp i1 %x to double - ret double %a -} - -define ptx_device double @cvt_f64_i16(i16 %x) { -; CHECK: cvt.rn.f64.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}; -; CHECK: ret; - %a = uitofp i16 %x to double - ret double %a -} - -define ptx_device double @cvt_f64_i32(i32 %x) { -; CHECK: cvt.rn.f64.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}; -; CHECK: ret; - %a = uitofp i32 %x to double - ret double %a -} - -define ptx_device double @cvt_f64_i64(i64 %x) { -; CHECK: cvt.rn.f64.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}; -; CHECK: ret; - %a = uitofp i64 %x to double - ret double %a -} - -define ptx_device double @cvt_f64_f32(float %x) { -; CHECK: cvt.f64.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}; -; CHECK: ret; - %a = fpext float %x to double - ret double %a -} - -define ptx_device double @cvt_f64_s16(i16 %x) { -; CHECK: cvt.rn.f64.s16 %ret{{[0-9]+}}, %rh{{[0-9]+}} -; CHECK: ret - %a = sitofp i16 %x to double - ret double %a -} - -define ptx_device double @cvt_f64_s32(i32 %x) { -; CHECK: cvt.rn.f64.s32 %ret{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: ret - %a = sitofp i32 %x to double - ret double %a -} - -define ptx_device double @cvt_f64_s64(i64 %x) { -; CHECK: cvt.rn.f64.s64 %ret{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: ret - %a = sitofp i64 %x to double - ret double %a -} diff --git a/test/CodeGen/PTX/exit.ll b/test/CodeGen/PTX/exit.ll deleted file mode 100644 index 7816c80..0000000 --- a/test/CodeGen/PTX/exit.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: llc < %s -march=ptx32 | FileCheck %s - -define ptx_kernel void @t1() { -; CHECK: exit; -; CHECK-NOT: ret; - ret void -} - -define ptx_kernel void @t2(i32* %p, i32 %x) { - store i32 %x, i32* %p -; CHECK: exit; -; CHECK-NOT: ret; - ret void -} diff --git a/test/CodeGen/PTX/fdiv-sm10.ll b/test/CodeGen/PTX/fdiv-sm10.ll deleted file mode 100644 index e1013be..0000000 --- a/test/CodeGen/PTX/fdiv-sm10.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: llc < %s -march=ptx32 -mattr=+sm10 | FileCheck %s - -define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: div.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; -; CHECK: ret; - %a = fdiv float %x, %y - ret float %a -} - -define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: div.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; -; CHECK: ret; - %a = fdiv double %x, %y - ret double %a -} diff --git a/test/CodeGen/PTX/fdiv-sm13.ll b/test/CodeGen/PTX/fdiv-sm13.ll deleted file mode 100644 index 1afa2eb..0000000 --- a/test/CodeGen/PTX/fdiv-sm13.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s - -define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: div.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; -; CHECK: ret; - %a = fdiv float %x, %y - ret float %a -} - -define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: div.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; -; CHECK: ret; - %a = fdiv double %x, %y - ret double %a -} diff --git a/test/CodeGen/PTX/fneg.ll b/test/CodeGen/PTX/fneg.ll deleted file mode 100644 index 2b76e63..0000000 --- a/test/CodeGen/PTX/fneg.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: llc < %s -march=ptx32 | FileCheck %s - -define ptx_device float @t1_f32(float %x) { -; CHECK: neg.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}; -; CHECK: ret; - %y = fsub float -0.000000e+00, %x - ret float %y -} - -define ptx_device double @t1_f64(double %x) { -; CHECK: neg.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}; -; CHECK: ret; - %y = fsub double -0.000000e+00, %x - ret double %y -} diff --git a/test/CodeGen/PTX/intrinsic.ll b/test/CodeGen/PTX/intrinsic.ll deleted file mode 100644 index 9f37ead..0000000 --- a/test/CodeGen/PTX/intrinsic.ll +++ /dev/null @@ -1,281 +0,0 @@ -; RUN: llc < %s -march=ptx32 -mattr=+ptx20 | FileCheck %s - -define ptx_device i32 @test_tid_x() { -; CHECK: mov.u32 %ret0, %tid.x; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.tid.x() - ret i32 %x -} - -define ptx_device i32 @test_tid_y() { -; CHECK: mov.u32 %ret0, %tid.y; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.tid.y() - ret i32 %x -} - -define ptx_device i32 @test_tid_z() { -; CHECK: mov.u32 %ret0, %tid.z; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.tid.z() - ret i32 %x -} - -define ptx_device i32 @test_tid_w() { -; CHECK: mov.u32 %ret0, %tid.w; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.tid.w() - ret i32 %x -} - -define ptx_device i32 @test_ntid_x() { -; CHECK: mov.u32 %ret0, %ntid.x; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.ntid.x() - ret i32 %x -} - -define ptx_device i32 @test_ntid_y() { -; CHECK: mov.u32 %ret0, %ntid.y; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.ntid.y() - ret i32 %x -} - -define ptx_device i32 @test_ntid_z() { -; CHECK: mov.u32 %ret0, %ntid.z; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.ntid.z() - ret i32 %x -} - -define ptx_device i32 @test_ntid_w() { -; CHECK: mov.u32 %ret0, %ntid.w; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.ntid.w() - ret i32 %x -} - -define ptx_device i32 @test_laneid() { -; CHECK: mov.u32 %ret0, %laneid; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.laneid() - ret i32 %x -} - -define ptx_device i32 @test_warpid() { -; CHECK: mov.u32 %ret0, %warpid; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.warpid() - ret i32 %x -} - -define ptx_device i32 @test_nwarpid() { -; CHECK: mov.u32 %ret0, %nwarpid; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.nwarpid() - ret i32 %x -} - -define ptx_device i32 @test_ctaid_x() { -; CHECK: mov.u32 %ret0, %ctaid.x; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.ctaid.x() - ret i32 %x -} - -define ptx_device i32 @test_ctaid_y() { -; CHECK: mov.u32 %ret0, %ctaid.y; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.ctaid.y() - ret i32 %x -} - -define ptx_device i32 @test_ctaid_z() { -; CHECK: mov.u32 %ret0, %ctaid.z; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.ctaid.z() - ret i32 %x -} - -define ptx_device i32 @test_ctaid_w() { -; CHECK: mov.u32 %ret0, %ctaid.w; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.ctaid.w() - ret i32 %x -} - -define ptx_device i32 @test_nctaid_x() { -; CHECK: mov.u32 %ret0, %nctaid.x; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.nctaid.x() - ret i32 %x -} - -define ptx_device i32 @test_nctaid_y() { -; CHECK: mov.u32 %ret0, %nctaid.y; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.nctaid.y() - ret i32 %x -} - -define ptx_device i32 @test_nctaid_z() { -; CHECK: mov.u32 %ret0, %nctaid.z; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.nctaid.z() - ret i32 %x -} - -define ptx_device i32 @test_nctaid_w() { -; CHECK: mov.u32 %ret0, %nctaid.w; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.nctaid.w() - ret i32 %x -} - -define ptx_device i32 @test_smid() { -; CHECK: mov.u32 %ret0, %smid; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.smid() - ret i32 %x -} - -define ptx_device i32 @test_nsmid() { -; CHECK: mov.u32 %ret0, %nsmid; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.nsmid() - ret i32 %x -} - -define ptx_device i32 @test_gridid() { -; CHECK: mov.u32 %ret0, %gridid; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.gridid() - ret i32 %x -} - -define ptx_device i32 @test_lanemask_eq() { -; CHECK: mov.u32 %ret0, %lanemask_eq; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.lanemask.eq() - ret i32 %x -} - -define ptx_device i32 @test_lanemask_le() { -; CHECK: mov.u32 %ret0, %lanemask_le; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.lanemask.le() - ret i32 %x -} - -define ptx_device i32 @test_lanemask_lt() { -; CHECK: mov.u32 %ret0, %lanemask_lt; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.lanemask.lt() - ret i32 %x -} - -define ptx_device i32 @test_lanemask_ge() { -; CHECK: mov.u32 %ret0, %lanemask_ge; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.lanemask.ge() - ret i32 %x -} - -define ptx_device i32 @test_lanemask_gt() { -; CHECK: mov.u32 %ret0, %lanemask_gt; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.lanemask.gt() - ret i32 %x -} - -define ptx_device i32 @test_clock() { -; CHECK: mov.u32 %ret0, %clock; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.clock() - ret i32 %x -} - -define ptx_device i64 @test_clock64() { -; CHECK: mov.u64 %ret0, %clock64; -; CHECK: ret; - %x = call i64 @llvm.ptx.read.clock64() - ret i64 %x -} - -define ptx_device i32 @test_pm0() { -; CHECK: mov.u32 %ret0, %pm0; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.pm0() - ret i32 %x -} - -define ptx_device i32 @test_pm1() { -; CHECK: mov.u32 %ret0, %pm1; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.pm1() - ret i32 %x -} - -define ptx_device i32 @test_pm2() { -; CHECK: mov.u32 %ret0, %pm2; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.pm2() - ret i32 %x -} - -define ptx_device i32 @test_pm3() { -; CHECK: mov.u32 %ret0, %pm3; -; CHECK: ret; - %x = call i32 @llvm.ptx.read.pm3() - ret i32 %x -} - -define ptx_device void @test_bar_sync() { -; CHECK: bar.sync 0 -; CHECK: ret; - call void @llvm.ptx.bar.sync(i32 0) - ret void -} - -declare i32 @llvm.ptx.read.tid.x() -declare i32 @llvm.ptx.read.tid.y() -declare i32 @llvm.ptx.read.tid.z() -declare i32 @llvm.ptx.read.tid.w() -declare i32 @llvm.ptx.read.ntid.x() -declare i32 @llvm.ptx.read.ntid.y() -declare i32 @llvm.ptx.read.ntid.z() -declare i32 @llvm.ptx.read.ntid.w() - -declare i32 @llvm.ptx.read.laneid() -declare i32 @llvm.ptx.read.warpid() -declare i32 @llvm.ptx.read.nwarpid() - -declare i32 @llvm.ptx.read.ctaid.x() -declare i32 @llvm.ptx.read.ctaid.y() -declare i32 @llvm.ptx.read.ctaid.z() -declare i32 @llvm.ptx.read.ctaid.w() -declare i32 @llvm.ptx.read.nctaid.x() -declare i32 @llvm.ptx.read.nctaid.y() -declare i32 @llvm.ptx.read.nctaid.z() -declare i32 @llvm.ptx.read.nctaid.w() - -declare i32 @llvm.ptx.read.smid() -declare i32 @llvm.ptx.read.nsmid() -declare i32 @llvm.ptx.read.gridid() - -declare i32 @llvm.ptx.read.lanemask.eq() -declare i32 @llvm.ptx.read.lanemask.le() -declare i32 @llvm.ptx.read.lanemask.lt() -declare i32 @llvm.ptx.read.lanemask.ge() -declare i32 @llvm.ptx.read.lanemask.gt() - -declare i32 @llvm.ptx.read.clock() -declare i64 @llvm.ptx.read.clock64() - -declare i32 @llvm.ptx.read.pm0() -declare i32 @llvm.ptx.read.pm1() -declare i32 @llvm.ptx.read.pm2() -declare i32 @llvm.ptx.read.pm3() - -declare void @llvm.ptx.bar.sync(i32 %i) diff --git a/test/CodeGen/PTX/ld.ll b/test/CodeGen/PTX/ld.ll deleted file mode 100644 index e55820d..0000000 --- a/test/CodeGen/PTX/ld.ll +++ /dev/null @@ -1,382 +0,0 @@ -; RUN: llc < %s -march=ptx32 | FileCheck %s - -;CHECK: .extern .global .b16 array_i16[10]; -@array_i16 = external global [10 x i16] - -;CHECK: .extern .const .b16 array_constant_i16[10]; -@array_constant_i16 = external addrspace(1) constant [10 x i16] - -;CHECK: .extern .shared .b16 array_shared_i16[10]; -@array_shared_i16 = external addrspace(4) global [10 x i16] - -;CHECK: .extern .global .b32 array_i32[10]; -@array_i32 = external global [10 x i32] - -;CHECK: .extern .const .b32 array_constant_i32[10]; -@array_constant_i32 = external addrspace(1) constant [10 x i32] - -;CHECK: .extern .shared .b32 array_shared_i32[10]; -@array_shared_i32 = external addrspace(4) global [10 x i32] - -;CHECK: .extern .global .b64 array_i64[10]; -@array_i64 = external global [10 x i64] - -;CHECK: .extern .const .b64 array_constant_i64[10]; -@array_constant_i64 = external addrspace(1) constant [10 x i64] - -;CHECK: .extern .shared .b64 array_shared_i64[10]; -@array_shared_i64 = external addrspace(4) global [10 x i64] - -;CHECK: .extern .global .b32 array_float[10]; -@array_float = external global [10 x float] - -;CHECK: .extern .const .b32 array_constant_float[10]; -@array_constant_float = external addrspace(1) constant [10 x float] - -;CHECK: .extern .shared .b32 array_shared_float[10]; -@array_shared_float = external addrspace(4) global [10 x float] - -;CHECK: .extern .global .b64 array_double[10]; -@array_double = external global [10 x double] - -;CHECK: .extern .const .b64 array_constant_double[10]; -@array_constant_double = external addrspace(1) constant [10 x double] - -;CHECK: .extern .shared .b64 array_shared_double[10]; -@array_shared_double = external addrspace(4) global [10 x double] - - -define ptx_device i16 @t1_u16(i16* %p) { -entry: -;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; -;CHECK: ret; - %x = load i16* %p - ret i16 %x -} - -define ptx_device i32 @t1_u32(i32* %p) { -entry: -;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; -;CHECK: ret; - %x = load i32* %p - ret i32 %x -} - -define ptx_device i64 @t1_u64(i64* %p) { -entry: -;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; -;CHECK: ret; - %x = load i64* %p - ret i64 %x -} - -define ptx_device float @t1_f32(float* %p) { -entry: -;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; -;CHECK: ret; - %x = load float* %p - ret float %x -} - -define ptx_device double @t1_f64(double* %p) { -entry: -;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; -;CHECK: ret; - %x = load double* %p - ret double %x -} - -define ptx_device i16 @t2_u16(i16* %p) { -entry: -;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}+2]; -;CHECK: ret; - %i = getelementptr i16* %p, i32 1 - %x = load i16* %i - ret i16 %x -} - -define ptx_device i32 @t2_u32(i32* %p) { -entry: -;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}+4]; -;CHECK: ret; - %i = getelementptr i32* %p, i32 1 - %x = load i32* %i - ret i32 %x -} - -define ptx_device i64 @t2_u64(i64* %p) { -entry: -;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}+8]; -;CHECK: ret; - %i = getelementptr i64* %p, i32 1 - %x = load i64* %i - ret i64 %x -} - -define ptx_device float @t2_f32(float* %p) { -entry: -;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}+4]; -;CHECK: ret; - %i = getelementptr float* %p, i32 1 - %x = load float* %i - ret float %x -} - -define ptx_device double @t2_f64(double* %p) { -entry: -;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}+8]; -;CHECK: ret; - %i = getelementptr double* %p, i32 1 - %x = load double* %i - ret double %x -} - -define ptx_device i16 @t3_u16(i16* %p, i32 %q) { -entry: -;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 1; -;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; -;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; - %i = getelementptr i16* %p, i32 %q - %x = load i16* %i - ret i16 %x -} - -define ptx_device i32 @t3_u32(i32* %p, i32 %q) { -entry: -;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2; -;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; -;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; - %i = getelementptr i32* %p, i32 %q - %x = load i32* %i - ret i32 %x -} - -define ptx_device i64 @t3_u64(i64* %p, i32 %q) { -entry: -;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3; -;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; -;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; - %i = getelementptr i64* %p, i32 %q - %x = load i64* %i - ret i64 %x -} - -define ptx_device float @t3_f32(float* %p, i32 %q) { -entry: -;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2; -;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; -;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; - %i = getelementptr float* %p, i32 %q - %x = load float* %i - ret float %x -} - -define ptx_device double @t3_f64(double* %p, i32 %q) { -entry: -;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3; -;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; -;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; - %i = getelementptr double* %p, i32 %q - %x = load double* %i - ret double %x -} - -define ptx_device i16 @t4_global_u16() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16; -;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r[[R0]]]; -;CHECK: ret; - %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0 - %x = load i16* %i - ret i16 %x -} - -define ptx_device i32 @t4_global_u32() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32; -;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r[[R0]]]; -;CHECK: ret; - %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0 - %x = load i32* %i - ret i32 %x -} - -define ptx_device i64 @t4_global_u64() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64; -;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r[[R0]]]; -;CHECK: ret; - %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0 - %x = load i64* %i - ret i64 %x -} - -define ptx_device float @t4_global_f32() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float; -;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r[[R0]]]; -;CHECK: ret; - %i = getelementptr [10 x float]* @array_float, i32 0, i32 0 - %x = load float* %i - ret float %x -} - -define ptx_device double @t4_global_f64() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double; -;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r[[R0]]]; -;CHECK: ret; - %i = getelementptr [10 x double]* @array_double, i32 0, i32 0 - %x = load double* %i - ret double %x -} - -define ptx_device i16 @t4_const_u16() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i16; -;CHECK: ld.const.u16 %ret{{[0-9]+}}, [%r[[R0]]]; -;CHECK: ret; - %i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0 - %x = load i16 addrspace(1)* %i - ret i16 %x -} - -define ptx_device i32 @t4_const_u32() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i32; -;CHECK: ld.const.u32 %ret{{[0-9]+}}, [%r[[R0]]]; -;CHECK: ret; - %i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0 - %x = load i32 addrspace(1)* %i - ret i32 %x -} - -define ptx_device i64 @t4_const_u64() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i64; -;CHECK: ld.const.u64 %ret{{[0-9]+}}, [%r[[R0]]]; -;CHECK: ret; - %i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0 - %x = load i64 addrspace(1)* %i - ret i64 %x -} - -define ptx_device float @t4_const_f32() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_float; -;CHECK: ld.const.f32 %ret{{[0-9]+}}, [%r[[R0]]]; -;CHECK: ret; - %i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0 - %x = load float addrspace(1)* %i - ret float %x -} - -define ptx_device double @t4_const_f64() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_double; -;CHECK: ld.const.f64 %ret{{[0-9]+}}, [%r[[R0]]]; -;CHECK: ret; - %i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0 - %x = load double addrspace(1)* %i - ret double %x -} - -define ptx_device i16 @t4_shared_u16() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i16; -;CHECK: ld.shared.u16 %ret{{[0-9]+}}, [%r[[R0]]]; -;CHECK: ret; - %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0 - %x = load i16 addrspace(4)* %i - ret i16 %x -} - -define ptx_device i32 @t4_shared_u32() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i32; -;CHECK: ld.shared.u32 %ret{{[0-9]+}}, [%r[[R0]]]; -;CHECK: ret; - %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0 - %x = load i32 addrspace(4)* %i - ret i32 %x -} - -define ptx_device i64 @t4_shared_u64() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i64; -;CHECK: ld.shared.u64 %ret{{[0-9]+}}, [%r[[R0]]]; -;CHECK: ret; - %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0 - %x = load i64 addrspace(4)* %i - ret i64 %x -} - -define ptx_device float @t4_shared_f32() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_float; -;CHECK: ld.shared.f32 %ret{{[0-9]+}}, [%r[[R0]]]; -;CHECK: ret; - %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0 - %x = load float addrspace(4)* %i - ret float %x -} - -define ptx_device double @t4_shared_f64() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_double; -;CHECK: ld.shared.f64 %ret{{[0-9]+}}, [%r[[R0]]]; -;CHECK: ret; - %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0 - %x = load double addrspace(4)* %i - ret double %x -} - -define ptx_device i16 @t5_u16() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16; -;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r[[R0]]+2]; -;CHECK: ret; - %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1 - %x = load i16* %i - ret i16 %x -} - -define ptx_device i32 @t5_u32() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32; -;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r[[R0]]+4]; -;CHECK: ret; - %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1 - %x = load i32* %i - ret i32 %x -} - -define ptx_device i64 @t5_u64() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64; -;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r[[R0]]+8]; -;CHECK: ret; - %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1 - %x = load i64* %i - ret i64 %x -} - -define ptx_device float @t5_f32() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float; -;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r[[R0]]+4]; -;CHECK: ret; - %i = getelementptr [10 x float]* @array_float, i32 0, i32 1 - %x = load float* %i - ret float %x -} - -define ptx_device double @t5_f64() { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double; -;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r[[R0]]+8]; -;CHECK: ret; - %i = getelementptr [10 x double]* @array_double, i32 0, i32 1 - %x = load double* %i - ret double %x -} diff --git a/test/CodeGen/PTX/lit.local.cfg b/test/CodeGen/PTX/lit.local.cfg deleted file mode 100644 index e748f7f..0000000 --- a/test/CodeGen/PTX/lit.local.cfg +++ /dev/null @@ -1,6 +0,0 @@ -config.suffixes = ['.ll', '.c', '.cpp'] - -targets = set(config.root.targets_to_build.split()) -if not 'PTX' in targets: - config.unsupported = True - diff --git a/test/CodeGen/PTX/llvm-intrinsic.ll b/test/CodeGen/PTX/llvm-intrinsic.ll deleted file mode 100644 index e73ad25..0000000 --- a/test/CodeGen/PTX/llvm-intrinsic.ll +++ /dev/null @@ -1,56 +0,0 @@ -; RUN: llc < %s -march=ptx32 -mattr=+ptx20 | FileCheck %s - -define ptx_device float @test_sqrt_f32(float %x) { -entry: -; CHECK: sqrt.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}; -; CHECK: ret; - %y = call float @llvm.sqrt.f32(float %x) - ret float %y -} - -define ptx_device double @test_sqrt_f64(double %x) { -entry: -; CHECK: sqrt.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}; -; CHECK: ret; - %y = call double @llvm.sqrt.f64(double %x) - ret double %y -} - -define ptx_device float @test_sin_f32(float %x) { -entry: -; CHECK: sin.approx.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}; -; CHECK: ret; - %y = call float @llvm.sin.f32(float %x) - ret float %y -} - -define ptx_device double @test_sin_f64(double %x) { -entry: -; CHECK: sin.approx.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}; -; CHECK: ret; - %y = call double @llvm.sin.f64(double %x) - ret double %y -} - -define ptx_device float @test_cos_f32(float %x) { -entry: -; CHECK: cos.approx.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}; -; CHECK: ret; - %y = call float @llvm.cos.f32(float %x) - ret float %y -} - -define ptx_device double @test_cos_f64(double %x) { -entry: -; CHECK: cos.approx.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}; -; CHECK: ret; - %y = call double @llvm.cos.f64(double %x) - ret double %y -} - -declare float @llvm.sqrt.f32(float) -declare double @llvm.sqrt.f64(double) -declare float @llvm.sin.f32(float) -declare double @llvm.sin.f64(double) -declare float @llvm.cos.f32(float) -declare double @llvm.cos.f64(double) diff --git a/test/CodeGen/PTX/mad-disabling.ll b/test/CodeGen/PTX/mad-disabling.ll deleted file mode 100644 index 603c3ba..0000000 --- a/test/CodeGen/PTX/mad-disabling.ll +++ /dev/null @@ -1,24 +0,0 @@ -; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | FileCheck %s -check-prefix=FMA -; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20,+no-fma | FileCheck %s -check-prefix=MUL -; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20 | FileCheck %s -check-prefix=FMA -; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20,+no-fma | FileCheck %s -check-prefix=MUL - -define ptx_device float @test_mul_add_f(float %x, float %y, float %z) { -entry: -; FMA: mad.rn.f32 -; MUL: mul.rn.f32 -; MUL: add.rn.f32 - %a = fmul float %x, %y - %b = fadd float %a, %z - ret float %b -} - -define ptx_device double @test_mul_add_d(double %x, double %y, double %z) { -entry: -; FMA: mad.rn.f64 -; MUL: mul.rn.f64 -; MUL: add.rn.f64 - %a = fmul double %x, %y - %b = fadd double %a, %z - ret double %b -} diff --git a/test/CodeGen/PTX/mad.ll b/test/CodeGen/PTX/mad.ll deleted file mode 100644 index cc28e3f..0000000 --- a/test/CodeGen/PTX/mad.ll +++ /dev/null @@ -1,17 +0,0 @@ -; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s - -define ptx_device float @t1_f32(float %x, float %y, float %z) { -; CHECK: mad.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}; -; CHECK: ret; - %a = fmul float %x, %y - %b = fadd float %a, %z - ret float %b -} - -define ptx_device double @t1_f64(double %x, double %y, double %z) { -; CHECK: mad.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}; -; CHECK: ret; - %a = fmul double %x, %y - %b = fadd double %a, %z - ret double %b -} diff --git a/test/CodeGen/PTX/mov.ll b/test/CodeGen/PTX/mov.ll deleted file mode 100644 index 9e501be..0000000 --- a/test/CodeGen/PTX/mov.ll +++ /dev/null @@ -1,62 +0,0 @@ -; RUN: llc < %s -march=ptx32 | FileCheck %s - -define ptx_device i16 @t1_u16() { -; CHECK: mov.u16 %ret{{[0-9]+}}, 0; -; CHECK: ret; - ret i16 0 -} - -define ptx_device i32 @t1_u32() { -; CHECK: mov.u32 %ret{{[0-9]+}}, 0; -; CHECK: ret; - ret i32 0 -} - -define ptx_device i64 @t1_u64() { -; CHECK: mov.u64 %ret{{[0-9]+}}, 0; -; CHECK: ret; - ret i64 0 -} - -define ptx_device float @t1_f32() { -; CHECK: mov.f32 %ret{{[0-9]+}}, 0D0000000000000000; -; CHECK: ret; - ret float 0.0 -} - -define ptx_device double @t1_f64() { -; CHECK: mov.f64 %ret{{[0-9]+}}, 0D0000000000000000; -; CHECK: ret; - ret double 0.0 -} - -define ptx_device i16 @t2_u16(i16 %x) { -; CHECK: mov.b16 %ret{{[0-9]+}}, %arg{{[0-9]+}}; -; CHECK: ret; - ret i16 %x -} - -define ptx_device i32 @t2_u32(i32 %x) { -; CHECK: mov.b32 %ret{{[0-9]+}}, %arg{{[0-9]+}}; -; CHECK: ret; - ret i32 %x -} - -define ptx_device i64 @t2_u64(i64 %x) { -; CHECK: mov.b64 %ret{{[0-9]+}}, %arg{{[0-9]+}}; -; CHECK: ret; - ret i64 %x -} - -define ptx_device float @t3_f32(float %x) { -; CHECK: mov.f32 %ret{{[0-9]+}}, %arg{{[0-9]+}}; -; CHECK: ret; - ret float %x -} - -define ptx_device double @t3_f64(double %x) { -; CHECK: mov.f64 %ret{{[0-9]+}}, %arg{{[0-9]+}}; -; CHECK: ret; - ret double %x -} - diff --git a/test/CodeGen/PTX/mul.ll b/test/CodeGen/PTX/mul.ll deleted file mode 100644 index 91949db..0000000 --- a/test/CodeGen/PTX/mul.ll +++ /dev/null @@ -1,39 +0,0 @@ -; RUN: llc < %s -march=ptx32 | FileCheck %s - -;define ptx_device i32 @t1(i32 %x, i32 %y) { -; %z = mul i32 %x, %y -; ret i32 %z -;} - -;define ptx_device i32 @t2(i32 %x) { -; %z = mul i32 %x, 1 -; ret i32 %z -;} - -define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: mul.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} -; CHECK: ret; - %z = fmul float %x, %y - ret float %z -} - -define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: mul.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}} -; CHECK: ret; - %z = fmul double %x, %y - ret double %z -} - -define ptx_device float @t2_f32(float %x) { -; CHECK: mul.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0D4014000000000000; -; CHECK: ret; - %z = fmul float %x, 5.0 - ret float %z -} - -define ptx_device double @t2_f64(double %x) { -; CHECK: mul.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0D4014000000000000; -; CHECK: ret; - %z = fmul double %x, 5.0 - ret double %z -} diff --git a/test/CodeGen/PTX/options.ll b/test/CodeGen/PTX/options.ll deleted file mode 100644 index 0fb6602..0000000 --- a/test/CodeGen/PTX/options.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: llc < %s -march=ptx32 -mattr=ptx20 | grep ".version 2.0" -; RUN: llc < %s -march=ptx32 -mattr=ptx21 | grep ".version 2.1" -; RUN: llc < %s -march=ptx32 -mattr=ptx22 | grep ".version 2.2" -; RUN: llc < %s -march=ptx32 -mattr=ptx23 | grep ".version 2.3" -; RUN: llc < %s -march=ptx32 -mattr=sm10 | grep ".target sm_10" -; RUN: llc < %s -march=ptx32 -mattr=sm13 | grep ".target sm_13" -; RUN: llc < %s -march=ptx32 -mattr=sm20 | grep ".target sm_20" -; RUN: llc < %s -march=ptx32 -mattr=ptx23 | grep ".address_size 32" -; RUN: llc < %s -march=ptx64 -mattr=ptx23 | grep ".address_size 64" - -define ptx_device void @t1() { - ret void -} diff --git a/test/CodeGen/PTX/parameter-order.ll b/test/CodeGen/PTX/parameter-order.ll deleted file mode 100644 index 377f173..0000000 --- a/test/CodeGen/PTX/parameter-order.ll +++ /dev/null @@ -1,8 +0,0 @@ -; RUN: llc < %s -march=ptx32 | FileCheck %s - -; CHECK: .func (.reg .b32 %ret{{[0-9]+}}) test_parameter_order (.reg .f32 %arg{{[0-9]+}}, .reg .b32 %arg{{[0-9]+}}, .reg .b32 %arg{{[0-9]+}}, .reg .f32 %arg{{[0-9]+}}) -define ptx_device i32 @test_parameter_order(float %a, i32 %b, i32 %c, float %d) { -; CHECK: sub.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} - %result = sub i32 %b, %c - ret i32 %result -} diff --git a/test/CodeGen/PTX/printf.ll b/test/CodeGen/PTX/printf.ll deleted file mode 100644 index f901b20..0000000 --- a/test/CodeGen/PTX/printf.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20 | FileCheck %s - -declare i32 @printf(i8*, ...) - -@str = private unnamed_addr constant [6 x i8] c"test\0A\00" - -define ptx_device void @t1_printf() { -; CHECK: mov.u64 %rd{{[0-9]+}}, $L__str; -; CHECK: call.uni (__localparam_{{[0-9]+}}), vprintf, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}}); -; CHECK: ret; - %1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @str, i64 0, i64 0)) - ret void -} - -@str2 = private unnamed_addr constant [11 x i8] c"test = %f\0A\00" - -define ptx_device void @t2_printf() { -; CHECK: .local .align 8 .b8 __local{{[0-9]+}}[{{[0-9]+}}]; -; CHECK: mov.u64 %rd{{[0-9]+}}, $L__str2; -; CHECK: cvta.local.u64 %rd{{[0-9]+}}, __local{{[0-9+]}}; -; CHECK: call.uni (__localparam_{{[0-9]+}}), vprintf, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}}); -; CHECK: ret; - %1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @str2, i64 0, i64 0), double 0x3FF3333340000000) - ret void -} diff --git a/test/CodeGen/PTX/ret.ll b/test/CodeGen/PTX/ret.ll deleted file mode 100644 index ba0523f..0000000 --- a/test/CodeGen/PTX/ret.ll +++ /dev/null @@ -1,7 +0,0 @@ -; RUN: llc < %s -march=ptx32 | FileCheck %s - -define ptx_device void @t1() { -; CHECK: ret; -; CHECK-NOT: exit; - ret void -} diff --git a/test/CodeGen/PTX/selp.ll b/test/CodeGen/PTX/selp.ll deleted file mode 100644 index aa7ce85..0000000 --- a/test/CodeGen/PTX/selp.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llc < %s -march=ptx32 | FileCheck %s - -define ptx_device i32 @test_selp_i32(i1 %x, i32 %y, i32 %z) { -; CHECK: selp.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %p{{[0-9]+}}; - %a = select i1 %x, i32 %y, i32 %z - ret i32 %a -} - -define ptx_device i64 @test_selp_i64(i1 %x, i64 %y, i64 %z) { -; CHECK: selp.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}, %p{{[0-9]+}}; - %a = select i1 %x, i64 %y, i64 %z - ret i64 %a -} - -define ptx_device float @test_selp_f32(i1 %x, float %y, float %z) { -; CHECK: selp.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %p{{[0-9]+}}; - %a = select i1 %x, float %y, float %z - ret float %a -} - -define ptx_device double @test_selp_f64(i1 %x, double %y, double %z) { -; CHECK: selp.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %p{{[0-9]+}}; - %a = select i1 %x, double %y, double %z - ret double %a -} diff --git a/test/CodeGen/PTX/setp.ll b/test/CodeGen/PTX/setp.ll deleted file mode 100644 index 646abab..0000000 --- a/test/CodeGen/PTX/setp.ll +++ /dev/null @@ -1,206 +0,0 @@ -; RUN: llc < %s -march=ptx32 | FileCheck %s - -define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; -; CHECK: ret; - %p = icmp eq i32 %x, %y - %z = zext i1 %p to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; -; CHECK: ret; - %p = icmp ne i32 %x, %y - %z = zext i1 %p to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; -; CHECK: ret; - %p = icmp ult i32 %x, %y - %z = zext i1 %p to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.le.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; -; CHECK: ret; - %p = icmp ule i32 %x, %y - %z = zext i1 %p to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; -; CHECK: ret; - %p = icmp ugt i32 %x, %y - %z = zext i1 %p to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) { -; CHECK: setp.ge.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; -; CHECK: ret; - %p = icmp uge i32 %x, %y - %z = zext i1 %p to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_lt_s32_rr(i32 %x, i32 %y) { -; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; -; CHECK: ret; - %p = icmp slt i32 %x, %y - %z = zext i1 %p to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_le_s32_rr(i32 %x, i32 %y) { -; CHECK: setp.le.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; -; CHECK: ret; - %p = icmp sle i32 %x, %y - %z = zext i1 %p to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_gt_s32_rr(i32 %x, i32 %y) { -; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; -; CHECK: ret; - %p = icmp sgt i32 %x, %y - %z = zext i1 %p to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_ge_s32_rr(i32 %x, i32 %y) { -; CHECK: setp.ge.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; -; CHECK: ret; - %p = icmp sge i32 %x, %y - %z = zext i1 %p to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) { -; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; -; CHECK: ret; - %p = icmp eq i32 %x, 1 - %z = zext i1 %p to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) { -; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; -; CHECK: ret; - %p = icmp ne i32 %x, 1 - %z = zext i1 %p to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) { -; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; -; CHECK: ret; - %p = icmp ult i32 %x, 1 - %z = zext i1 %p to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_le_u32_ri(i32 %x) { -; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 2; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; -; CHECK: ret; - %p = icmp ule i32 %x, 1 - %z = zext i1 %p to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) { -; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; -; CHECK: ret; - %p = icmp ugt i32 %x, 1 - %z = zext i1 %p to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) { -; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; -; CHECK: ret; - %p = icmp uge i32 %x, 1 - %z = zext i1 %p to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_lt_s32_ri(i32 %x) { -; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; -; CHECK: ret; - %p = icmp slt i32 %x, 1 - %z = zext i1 %p to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_le_s32_ri(i32 %x) { -; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 2; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; -; CHECK: ret; - %p = icmp sle i32 %x, 1 - %z = zext i1 %p to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_gt_s32_ri(i32 %x) { -; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; -; CHECK: ret; - %p = icmp sgt i32 %x, 1 - %z = zext i1 %p to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_ge_s32_ri(i32 %x) { -; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]]; -; CHECK: ret; - %p = icmp sge i32 %x, 1 - %z = zext i1 %p to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) { -; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}; -; CHECK: setp.eq.and.u32 %p1, %r{{[0-9]+}}, %r{{[0-9]+}}, %p[[P0]]; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p1; -; CHECK: ret; - %c = icmp eq i32 %x, %y - %d = icmp ugt i32 %u, %v - %e = and i1 %c, %d - %z = zext i1 %e to i32 - ret i32 %z -} - -define ptx_device i32 @test_setp_4_op_format_2(i32 %x, i32 %y, i32 %w) { -; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0; -; CHECK: setp.eq.and.u32 %p1, %r{{[0-9]+}}, %r{{[0-9]+}}, !%p[[P0]]; -; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p1; -; CHECK: ret; - %c = trunc i32 %w to i1 - %d = icmp eq i32 %x, %y - %e = xor i1 %c, 1 - %f = and i1 %d, %e - %z = zext i1 %f to i32 - ret i32 %z -} diff --git a/test/CodeGen/PTX/shl.ll b/test/CodeGen/PTX/shl.ll deleted file mode 100644 index d9fe2cd..0000000 --- a/test/CodeGen/PTX/shl.ll +++ /dev/null @@ -1,22 +0,0 @@ -; RUN: llc < %s -march=ptx32 | FileCheck %s - -define ptx_device i32 @t1(i32 %x, i32 %y) { -; CHECK: shl.b32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} - %z = shl i32 %x, %y -; CHECK: ret; - ret i32 %z -} - -define ptx_device i32 @t2(i32 %x) { -; CHECK: shl.b32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3 - %z = shl i32 %x, 3 -; CHECK: ret; - ret i32 %z -} - -define ptx_device i32 @t3(i32 %x) { -; CHECK: shl.b32 %ret{{[0-9]+}}, 3, %r{{[0-9]+}} - %z = shl i32 3, %x -; CHECK: ret; - ret i32 %z -} diff --git a/test/CodeGen/PTX/shr.ll b/test/CodeGen/PTX/shr.ll deleted file mode 100644 index eb4666f..0000000 --- a/test/CodeGen/PTX/shr.ll +++ /dev/null @@ -1,43 +0,0 @@ -; RUN: llc < %s -march=ptx32 | FileCheck %s - -define ptx_device i32 @t1(i32 %x, i32 %y) { -; CHECK: shr.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} - %z = lshr i32 %x, %y -; CHECK: ret; - ret i32 %z -} - -define ptx_device i32 @t2(i32 %x) { -; CHECK: shr.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3 - %z = lshr i32 %x, 3 -; CHECK: ret; - ret i32 %z -} - -define ptx_device i32 @t3(i32 %x) { -; CHECK: shr.u32 %ret{{[0-9]+}}, 3, %r{{[0-9]+}} - %z = lshr i32 3, %x -; CHECK: ret; - ret i32 %z -} - -define ptx_device i32 @t4(i32 %x, i32 %y) { -; CHECK: shr.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}} - %z = ashr i32 %x, %y -; CHECK: ret; - ret i32 %z -} - -define ptx_device i32 @t5(i32 %x) { -; CHECK: shr.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3 - %z = ashr i32 %x, 3 -; CHECK: ret; - ret i32 %z -} - -define ptx_device i32 @t6(i32 %x) { -; CHECK: shr.s32 %ret{{[0-9]+}}, -3, %r{{[0-9]+}} - %z = ashr i32 -3, %x -; CHECK: ret; - ret i32 %z -} diff --git a/test/CodeGen/PTX/simple-call.ll b/test/CodeGen/PTX/simple-call.ll deleted file mode 100644 index 77ea29e..0000000 --- a/test/CodeGen/PTX/simple-call.ll +++ /dev/null @@ -1,27 +0,0 @@ -; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s - -define ptx_device void @test_add(float %x, float %y) { -; CHECK: ret; - %z = fadd float %x, %y - ret void -} - -define ptx_device float @test_call(float %x, float %y) { - %a = fadd float %x, %y -; CHECK: call.uni test_add, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}}); - call void @test_add(float %a, float %y) - ret float %a -} - -define ptx_device float @test_compute(float %x, float %y) { -; CHECK: ret; - %z = fadd float %x, %y - ret float %z -} - -define ptx_device float @test_call_compute(float %x, float %y) { -; CHECK: call.uni (__localparam_{{[0-9]+}}), test_compute, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}}) - %z = call float @test_compute(float %x, float %y) - ret float %z -} - diff --git a/test/CodeGen/PTX/st.ll b/test/CodeGen/PTX/st.ll deleted file mode 100644 index c794363..0000000 --- a/test/CodeGen/PTX/st.ll +++ /dev/null @@ -1,337 +0,0 @@ -; RUN: llc < %s -march=ptx32 | FileCheck %s - -;CHECK: .extern .global .b16 array_i16[10]; -@array_i16 = external global [10 x i16] - -;CHECK: .extern .const .b16 array_constant_i16[10]; -@array_constant_i16 = external addrspace(1) constant [10 x i16] - -;CHECK: .extern .shared .b16 array_shared_i16[10]; -@array_shared_i16 = external addrspace(4) global [10 x i16] - -;CHECK: .extern .global .b32 array_i32[10]; -@array_i32 = external global [10 x i32] - -;CHECK: .extern .const .b32 array_constant_i32[10]; -@array_constant_i32 = external addrspace(1) constant [10 x i32] - -;CHECK: .extern .shared .b32 array_shared_i32[10]; -@array_shared_i32 = external addrspace(4) global [10 x i32] - -;CHECK: .extern .global .b64 array_i64[10]; -@array_i64 = external global [10 x i64] - -;CHECK: .extern .const .b64 array_constant_i64[10]; -@array_constant_i64 = external addrspace(1) constant [10 x i64] - -;CHECK: .extern .shared .b64 array_shared_i64[10]; -@array_shared_i64 = external addrspace(4) global [10 x i64] - -;CHECK: .extern .global .b32 array_float[10]; -@array_float = external global [10 x float] - -;CHECK: .extern .const .b32 array_constant_float[10]; -@array_constant_float = external addrspace(1) constant [10 x float] - -;CHECK: .extern .shared .b32 array_shared_float[10]; -@array_shared_float = external addrspace(4) global [10 x float] - -;CHECK: .extern .global .b64 array_double[10]; -@array_double = external global [10 x double] - -;CHECK: .extern .const .b64 array_constant_double[10]; -@array_constant_double = external addrspace(1) constant [10 x double] - -;CHECK: .extern .shared .b64 array_shared_double[10]; -@array_shared_double = external addrspace(4) global [10 x double] - - -define ptx_device void @t1_u16(i16* %p, i16 %x) { -entry: -;CHECK: st.global.u16 [%r{{[0-9]+}}], %rh{{[0-9]+}}; -;CHECK: ret; - store i16 %x, i16* %p - ret void -} - -define ptx_device void @t1_u32(i32* %p, i32 %x) { -entry: -;CHECK: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}; -;CHECK: ret; - store i32 %x, i32* %p - ret void -} - -define ptx_device void @t1_u64(i64* %p, i64 %x) { -entry: -;CHECK: st.global.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}}; -;CHECK: ret; - store i64 %x, i64* %p - ret void -} - -define ptx_device void @t1_f32(float* %p, float %x) { -entry: -;CHECK: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}; -;CHECK: ret; - store float %x, float* %p - ret void -} - -define ptx_device void @t1_f64(double* %p, double %x) { -entry: -;CHECK: st.global.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}}; -;CHECK: ret; - store double %x, double* %p - ret void -} - -define ptx_device void @t2_u16(i16* %p, i16 %x) { -entry: -;CHECK: st.global.u16 [%r{{[0-9]+}}+2], %rh{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr i16* %p, i32 1 - store i16 %x, i16* %i - ret void -} - -define ptx_device void @t2_u32(i32* %p, i32 %x) { -entry: -;CHECK: st.global.u32 [%r{{[0-9]+}}+4], %r{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr i32* %p, i32 1 - store i32 %x, i32* %i - ret void -} - -define ptx_device void @t2_u64(i64* %p, i64 %x) { -entry: -;CHECK: st.global.u64 [%r{{[0-9]+}}+8], %rd{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr i64* %p, i32 1 - store i64 %x, i64* %i - ret void -} - -define ptx_device void @t2_f32(float* %p, float %x) { -entry: -;CHECK: st.global.f32 [%r{{[0-9]+}}+4], %f{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr float* %p, i32 1 - store float %x, float* %i - ret void -} - -define ptx_device void @t2_f64(double* %p, double %x) { -entry: -;CHECK: st.global.f64 [%r{{[0-9]+}}+8], %fd{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr double* %p, i32 1 - store double %x, double* %i - ret void -} - -define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) { -entry: -;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 1; -;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; -;CHECK: st.global.u16 [%r{{[0-9]+}}], %rh{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr i16* %p, i32 %q - store i16 %x, i16* %i - ret void -} - -define ptx_device void @t3_u32(i32* %p, i32 %q, i32 %x) { -entry: -;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2; -;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; -;CHECK: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr i32* %p, i32 %q - store i32 %x, i32* %i - ret void -} - -define ptx_device void @t3_u64(i64* %p, i32 %q, i64 %x) { -entry: -;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3; -;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; -;CHECK: st.global.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr i64* %p, i32 %q - store i64 %x, i64* %i - ret void -} - -define ptx_device void @t3_f32(float* %p, i32 %q, float %x) { -entry: -;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2; -;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; -;CHECK: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr float* %p, i32 %q - store float %x, float* %i - ret void -} - -define ptx_device void @t3_f64(double* %p, i32 %q, double %x) { -entry: -;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3; -;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; -;CHECK: st.global.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr double* %p, i32 %q - store double %x, double* %i - ret void -} - -define ptx_device void @t4_global_u16(i16 %x) { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16; -;CHECK: st.global.u16 [%r[[R0]]], %rh{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr [10 x i16]* @array_i16, i16 0, i16 0 - store i16 %x, i16* %i - ret void -} - -define ptx_device void @t4_global_u32(i32 %x) { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32; -;CHECK: st.global.u32 [%r[[R0]]], %r{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0 - store i32 %x, i32* %i - ret void -} - -define ptx_device void @t4_global_u64(i64 %x) { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64; -;CHECK: st.global.u64 [%r[[R0]]], %rd{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0 - store i64 %x, i64* %i - ret void -} - -define ptx_device void @t4_global_f32(float %x) { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float; -;CHECK: st.global.f32 [%r[[R0]]], %f{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr [10 x float]* @array_float, i32 0, i32 0 - store float %x, float* %i - ret void -} - -define ptx_device void @t4_global_f64(double %x) { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double; -;CHECK: st.global.f64 [%r[[R0]]], %fd{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr [10 x double]* @array_double, i32 0, i32 0 - store double %x, double* %i - ret void -} - -define ptx_device void @t4_shared_u16(i16 %x) { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i16; -;CHECK: st.shared.u16 [%r[[R0]]], %rh{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0 - store i16 %x, i16 addrspace(4)* %i - ret void -} - -define ptx_device void @t4_shared_u32(i32 %x) { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i32; -;CHECK: st.shared.u32 [%r[[R0]]], %r{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0 - store i32 %x, i32 addrspace(4)* %i - ret void -} - -define ptx_device void @t4_shared_u64(i64 %x) { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i64; -;CHECK: st.shared.u64 [%r[[R0]]], %rd{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0 - store i64 %x, i64 addrspace(4)* %i - ret void -} - -define ptx_device void @t4_shared_f32(float %x) { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_float; -;CHECK: st.shared.f32 [%r[[R0]]], %f{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0 - store float %x, float addrspace(4)* %i - ret void -} - -define ptx_device void @t4_shared_f64(double %x) { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_double; -;CHECK: st.shared.f64 [%r[[R0]]], %fd{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0 - store double %x, double addrspace(4)* %i - ret void -} - -define ptx_device void @t5_u16(i16 %x) { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16; -;CHECK: st.global.u16 [%r[[R0]]+2], %rh{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1 - store i16 %x, i16* %i - ret void -} - -define ptx_device void @t5_u32(i32 %x) { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32; -;CHECK: st.global.u32 [%r[[R0]]+4], %r{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1 - store i32 %x, i32* %i - ret void -} - -define ptx_device void @t5_u64(i64 %x) { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64; -;CHECK: st.global.u64 [%r[[R0]]+8], %rd{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1 - store i64 %x, i64* %i - ret void -} - -define ptx_device void @t5_f32(float %x) { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float; -;CHECK: st.global.f32 [%r[[R0]]+4], %f{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr [10 x float]* @array_float, i32 0, i32 1 - store float %x, float* %i - ret void -} - -define ptx_device void @t5_f64(double %x) { -entry: -;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double; -;CHECK: st.global.f64 [%r[[R0]]+8], %fd{{[0-9]+}}; -;CHECK: ret; - %i = getelementptr [10 x double]* @array_double, i32 0, i32 1 - store double %x, double* %i - ret void -} diff --git a/test/CodeGen/PTX/stack-object.ll b/test/CodeGen/PTX/stack-object.ll deleted file mode 100644 index 65f8ee2..0000000 --- a/test/CodeGen/PTX/stack-object.ll +++ /dev/null @@ -1,19 +0,0 @@ -; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s - -define ptx_device float @stack1(float %a) { - ; CHECK: .local .align 4 .b8 __local0[4]; - %a.2 = alloca float, align 4 - ; CHECK: st.local.f32 [__local0], %f0 - store float %a, float* %a.2 - %a.3 = load float* %a.2 - ret float %a.3 -} - -define ptx_device float @stack1_align8(float %a) { - ; CHECK: .local .align 8 .b8 __local0[4]; - %a.2 = alloca float, align 8 - ; CHECK: st.local.f32 [__local0], %f0 - store float %a, float* %a.2 - %a.3 = load float* %a.2 - ret float %a.3 -} diff --git a/test/CodeGen/PTX/sub.ll b/test/CodeGen/PTX/sub.ll deleted file mode 100644 index 7ac886a..0000000 --- a/test/CodeGen/PTX/sub.ll +++ /dev/null @@ -1,71 +0,0 @@ -; RUN: llc < %s -march=ptx32 | FileCheck %s - -define ptx_device i16 @t1_u16(i16 %x, i16 %y) { -; CHECK: sub.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, %rh{{[0-9]+}}; -; CHECK: ret; - %z = sub i16 %x, %y - ret i16 %z -} - -define ptx_device i32 @t1_u32(i32 %x, i32 %y) { -; CHECK: sub.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}; -; CHECK: ret; - %z = sub i32 %x, %y - ret i32 %z -} - -define ptx_device i64 @t1_u64(i64 %x, i64 %y) { -; CHECK: sub.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}; -; CHECK: ret; - %z = sub i64 %x, %y - ret i64 %z -} - -define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: sub.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} -; CHECK: ret; - %z = fsub float %x, %y - ret float %z -} - -define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: sub.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}} -; CHECK: ret; - %z = fsub double %x, %y - ret double %z -} - -define ptx_device i16 @t2_u16(i16 %x) { -; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, -1; -; CHECK: ret; - %z = sub i16 %x, 1 - ret i16 %z -} - -define ptx_device i32 @t2_u32(i32 %x) { -; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, -1; -; CHECK: ret; - %z = sub i32 %x, 1 - ret i32 %z -} - -define ptx_device i64 @t2_u64(i64 %x) { -; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, -1; -; CHECK: ret; - %z = sub i64 %x, 1 - ret i64 %z -} - -define ptx_device float @t2_f32(float %x) { -; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0DBFF0000000000000; -; CHECK: ret; - %z = fsub float %x, 1.0 - ret float %z -} - -define ptx_device double @t2_f64(double %x) { -; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0DBFF0000000000000; -; CHECK: ret; - %z = fsub double %x, 1.0 - ret double %z -} diff --git a/unittests/ADT/TripleTest.cpp b/unittests/ADT/TripleTest.cpp index 3b75584..967437c 100644 --- a/unittests/ADT/TripleTest.cpp +++ b/unittests/ADT/TripleTest.cpp @@ -353,9 +353,9 @@ TEST(TripleTest, BitWidthArchVariants) { EXPECT_EQ(Triple::ppc, T.get32BitArchVariant().getArch()); EXPECT_EQ(Triple::ppc64, T.get64BitArchVariant().getArch()); - T.setArch(Triple::ptx32); - EXPECT_EQ(Triple::ptx32, T.get32BitArchVariant().getArch()); - EXPECT_EQ(Triple::ptx64, T.get64BitArchVariant().getArch()); + T.setArch(Triple::nvptx); + EXPECT_EQ(Triple::nvptx, T.get32BitArchVariant().getArch()); + EXPECT_EQ(Triple::nvptx64, T.get64BitArchVariant().getArch()); T.setArch(Triple::sparc); EXPECT_EQ(Triple::sparc, T.get32BitArchVariant().getArch()); @@ -377,9 +377,9 @@ TEST(TripleTest, BitWidthArchVariants) { EXPECT_EQ(Triple::ppc, T.get32BitArchVariant().getArch()); EXPECT_EQ(Triple::ppc64, T.get64BitArchVariant().getArch()); - T.setArch(Triple::ptx64); - EXPECT_EQ(Triple::ptx32, T.get32BitArchVariant().getArch()); - EXPECT_EQ(Triple::ptx64, T.get64BitArchVariant().getArch()); + T.setArch(Triple::nvptx64); + EXPECT_EQ(Triple::nvptx, T.get32BitArchVariant().getArch()); + EXPECT_EQ(Triple::nvptx64, T.get64BitArchVariant().getArch()); T.setArch(Triple::sparcv9); EXPECT_EQ(Triple::sparc, T.get32BitArchVariant().getArch()); |