diff options
Diffstat (limited to 'test/CodeGen/NVPTX')
-rw-r--r-- | test/CodeGen/NVPTX/access-non-generic.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/arg-lowering.ll | 13 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/atomics.ll | 141 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/bfe.ll | 32 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/envreg.ll | 139 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/gvar-init.ll | 5 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/imad.ll | 9 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/inline-asm.ll | 7 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/isspacep.ll | 35 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/ldu-i8.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/ldu-ldg.ll | 40 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/ldu-reg-plus-offset.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/lit.local.cfg | 3 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/managed.ll | 11 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/mulwide.ll | 37 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/nvvm-reflect.ll | 16 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/rotate.ll | 58 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/shift-parts.ll | 38 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/weak-global.ll | 9 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/weak-linkage.ll | 12 |
20 files changed, 613 insertions, 12 deletions
diff --git a/test/CodeGen/NVPTX/access-non-generic.ll b/test/CodeGen/NVPTX/access-non-generic.ll index 0622aa3..c225abf 100644 --- a/test/CodeGen/NVPTX/access-non-generic.ll +++ b/test/CodeGen/NVPTX/access-non-generic.ll @@ -74,13 +74,13 @@ define float @ld_st_shared_f32(i32 %i, float %v) { ret float %sum5 } -; Verifies nvptx-favor-non-generic keeps addrspacecasts between pointers of -; different element types. +; When hoisting an addrspacecast between different pointer types, replace the +; addrspacecast with a bitcast. define i32 @ld_int_from_float() { ; IR-LABEL: @ld_int_from_float -; IR: addrspacecast +; IR: load i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*) ; PTX-LABEL: ld_int_from_float( -; PTX: cvta.shared.u{{(32|64)}} +; PTX: ld.shared.u{{(32|64)}} %1 = load i32* addrspacecast(float addrspace(3)* @scalar to i32*), align 4 ret i32 %1 } diff --git a/test/CodeGen/NVPTX/arg-lowering.ll b/test/CodeGen/NVPTX/arg-lowering.ll new file mode 100644 index 0000000..f7b8a14 --- /dev/null +++ b/test/CodeGen/NVPTX/arg-lowering.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +; CHECK: .visible .func (.param .align 16 .b8 func_retval0[16]) foo0( +; CHECK: .param .align 4 .b8 foo0_param_0[8] +define <4 x float> @foo0({float, float} %arg0) { + ret <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0> +} + +; CHECK: .visible .func (.param .align 8 .b8 func_retval0[8]) foo1( +; CHECK: .param .align 8 .b8 foo1_param_0[16] +define <2 x float> @foo1({float, float, i64} %arg0) { + ret <2 x float> <float 1.0, float 1.0> +} diff --git a/test/CodeGen/NVPTX/atomics.ll b/test/CodeGen/NVPTX/atomics.ll new file mode 100644 index 0000000..10ab73d --- /dev/null +++ b/test/CodeGen/NVPTX/atomics.ll @@ -0,0 +1,141 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + + +; CHECK: atom0 +define i32 @atom0(i32* %addr, i32 %val) { +; CHECK: atom.add.u32 + %ret = atomicrmw add i32* %addr, i32 %val seq_cst + ret i32 %ret +} + +; CHECK: atom1 +define i64 @atom1(i64* %addr, i64 %val) { +; CHECK: atom.add.u64 + %ret = atomicrmw add i64* %addr, i64 %val seq_cst + ret i64 %ret +} + +; CHECK: atom2 +define i32 @atom2(i32* %subr, i32 %val) { +; CHECK: neg.s32 +; CHECK: atom.add.u32 + %ret = atomicrmw sub i32* %subr, i32 %val seq_cst + ret i32 %ret +} + +; CHECK: atom3 +define i64 @atom3(i64* %subr, i64 %val) { +; CHECK: neg.s64 +; CHECK: atom.add.u64 + %ret = atomicrmw sub i64* %subr, i64 %val seq_cst + ret i64 %ret +} + +; CHECK: atom4 +define i32 @atom4(i32* %subr, i32 %val) { +; CHECK: atom.and.b32 + %ret = atomicrmw and i32* %subr, i32 %val seq_cst + ret i32 %ret +} + +; CHECK: atom5 +define i64 @atom5(i64* %subr, i64 %val) { +; CHECK: atom.and.b64 + %ret = atomicrmw and i64* %subr, i64 %val seq_cst + ret i64 %ret +} + +;; NAND not yet supported +;define i32 @atom6(i32* %subr, i32 %val) { +; %ret = atomicrmw nand i32* %subr, i32 %val seq_cst +; ret i32 %ret +;} + +;define i64 @atom7(i64* %subr, i64 %val) { +; %ret = atomicrmw nand i64* %subr, i64 %val seq_cst +; ret i64 %ret +;} + +; CHECK: atom8 +define i32 @atom8(i32* %subr, i32 %val) { +; CHECK: atom.or.b32 + %ret = atomicrmw or i32* %subr, i32 %val seq_cst + ret i32 %ret +} + +; CHECK: atom9 +define i64 @atom9(i64* %subr, i64 %val) { +; CHECK: atom.or.b64 + %ret = atomicrmw or i64* %subr, i64 %val seq_cst + ret i64 %ret +} + +; CHECK: atom10 +define i32 @atom10(i32* %subr, i32 %val) { +; CHECK: atom.xor.b32 + %ret = atomicrmw xor i32* %subr, i32 %val seq_cst + ret i32 %ret +} + +; CHECK: atom11 +define i64 @atom11(i64* %subr, i64 %val) { +; CHECK: atom.xor.b64 + %ret = atomicrmw xor i64* %subr, i64 %val seq_cst + ret i64 %ret +} + +; CHECK: atom12 +define i32 @atom12(i32* %subr, i32 %val) { +; CHECK: atom.max.s32 + %ret = atomicrmw max i32* %subr, i32 %val seq_cst + ret i32 %ret +} + +; CHECK: atom13 +define i64 @atom13(i64* %subr, i64 %val) { +; CHECK: atom.max.s64 + %ret = atomicrmw max i64* %subr, i64 %val seq_cst + ret i64 %ret +} + +; CHECK: atom14 +define i32 @atom14(i32* %subr, i32 %val) { +; CHECK: atom.min.s32 + %ret = atomicrmw min i32* %subr, i32 %val seq_cst + ret i32 %ret +} + +; CHECK: atom15 +define i64 @atom15(i64* %subr, i64 %val) { +; CHECK: atom.min.s64 + %ret = atomicrmw min i64* %subr, i64 %val seq_cst + ret i64 %ret +} + +; CHECK: atom16 +define i32 @atom16(i32* %subr, i32 %val) { +; CHECK: atom.max.u32 + %ret = atomicrmw umax i32* %subr, i32 %val seq_cst + ret i32 %ret +} + +; CHECK: atom17 +define i64 @atom17(i64* %subr, i64 %val) { +; CHECK: atom.max.u64 + %ret = atomicrmw umax i64* %subr, i64 %val seq_cst + ret i64 %ret +} + +; CHECK: atom18 +define i32 @atom18(i32* %subr, i32 %val) { +; CHECK: atom.min.u32 + %ret = atomicrmw umin i32* %subr, i32 %val seq_cst + ret i32 %ret +} + +; CHECK: atom19 +define i64 @atom19(i64* %subr, i64 %val) { +; CHECK: atom.min.u64 + %ret = atomicrmw umin i64* %subr, i64 %val seq_cst + ret i64 %ret +} diff --git a/test/CodeGen/NVPTX/bfe.ll b/test/CodeGen/NVPTX/bfe.ll new file mode 100644 index 0000000..2e816fe --- /dev/null +++ b/test/CodeGen/NVPTX/bfe.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + + +; CHECK: bfe0 +define i32 @bfe0(i32 %a) { +; CHECK: bfe.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, 4, 4 +; CHECK-NOT: shr +; CHECK-NOT: and + %val0 = ashr i32 %a, 4 + %val1 = and i32 %val0, 15 + ret i32 %val1 +} + +; CHECK: bfe1 +define i32 @bfe1(i32 %a) { +; CHECK: bfe.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, 3, 3 +; CHECK-NOT: shr +; CHECK-NOT: and + %val0 = ashr i32 %a, 3 + %val1 = and i32 %val0, 7 + ret i32 %val1 +} + +; CHECK: bfe2 +define i32 @bfe2(i32 %a) { +; CHECK: bfe.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, 5, 3 +; CHECK-NOT: shr +; CHECK-NOT: and + %val0 = ashr i32 %a, 5 + %val1 = and i32 %val0, 7 + ret i32 %val1 +} diff --git a/test/CodeGen/NVPTX/envreg.ll b/test/CodeGen/NVPTX/envreg.ll new file mode 100644 index 0000000..a341b49 --- /dev/null +++ b/test/CodeGen/NVPTX/envreg.ll @@ -0,0 +1,139 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + + +declare i32 @llvm.nvvm.read.ptx.sreg.envreg0() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg1() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg2() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg3() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg4() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg5() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg6() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg7() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg8() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg9() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg10() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg11() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg12() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg13() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg14() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg15() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg16() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg17() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg18() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg19() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg20() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg21() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg22() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg23() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg24() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg25() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg26() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg27() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg28() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg29() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg30() +declare i32 @llvm.nvvm.read.ptx.sreg.envreg31() + + +; CHECK: foo +define i32 @foo() { +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg0 + %val0 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg0() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg1 + %val1 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg1() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg2 + %val2 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg2() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg3 + %val3 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg3() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg4 + %val4 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg4() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg5 + %val5 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg5() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg6 + %val6 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg6() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg7 + %val7 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg7() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg8 + %val8 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg8() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg9 + %val9 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg9() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg10 + %val10 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg10() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg11 + %val11 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg11() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg12 + %val12 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg12() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg13 + %val13 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg13() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg14 + %val14 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg14() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg15 + %val15 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg15() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg16 + %val16 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg16() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg17 + %val17 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg17() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg18 + %val18 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg18() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg19 + %val19 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg19() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg20 + %val20 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg20() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg21 + %val21 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg21() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg22 + %val22 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg22() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg23 + %val23 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg23() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg24 + %val24 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg24() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg25 + %val25 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg25() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg26 + %val26 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg26() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg27 + %val27 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg27() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg28 + %val28 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg28() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg29 + %val29 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg29() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg30 + %val30 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg30() +; CHECK: mov.b32 %r{{[0-9]+}}, %envreg31 + %val31 = tail call i32 @llvm.nvvm.read.ptx.sreg.envreg31() + + + %ret0 = add i32 %val0, %val1 + %ret1 = add i32 %ret0, %val2 + %ret2 = add i32 %ret1, %val3 + %ret3 = add i32 %ret2, %val4 + %ret4 = add i32 %ret3, %val5 + %ret5 = add i32 %ret4, %val6 + %ret6 = add i32 %ret5, %val7 + %ret7 = add i32 %ret6, %val8 + %ret8 = add i32 %ret7, %val9 + %ret9 = add i32 %ret8, %val10 + %ret10 = add i32 %ret9, %val11 + %ret11 = add i32 %ret10, %val12 + %ret12 = add i32 %ret11, %val13 + %ret13 = add i32 %ret12, %val14 + %ret14 = add i32 %ret13, %val15 + %ret15 = add i32 %ret14, %val16 + %ret16 = add i32 %ret15, %val17 + %ret17 = add i32 %ret16, %val18 + %ret18 = add i32 %ret17, %val19 + %ret19 = add i32 %ret18, %val20 + %ret20 = add i32 %ret19, %val21 + %ret21 = add i32 %ret20, %val22 + %ret22 = add i32 %ret21, %val23 + %ret23 = add i32 %ret22, %val24 + %ret24 = add i32 %ret23, %val25 + %ret25 = add i32 %ret24, %val26 + %ret26 = add i32 %ret25, %val27 + %ret27 = add i32 %ret26, %val28 + %ret28 = add i32 %ret27, %val29 + %ret29 = add i32 %ret28, %val30 + %ret30 = add i32 %ret29, %val31 + + ret i32 %ret30 +} diff --git a/test/CodeGen/NVPTX/gvar-init.ll b/test/CodeGen/NVPTX/gvar-init.ll new file mode 100644 index 0000000..8c95942 --- /dev/null +++ b/test/CodeGen/NVPTX/gvar-init.ll @@ -0,0 +1,5 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +; Error out if initializer is given for address spaces that do not support initializers +; XFAIL: * +@g0 = addrspace(3) global i32 42 diff --git a/test/CodeGen/NVPTX/imad.ll b/test/CodeGen/NVPTX/imad.ll new file mode 100644 index 0000000..67421c7 --- /dev/null +++ b/test/CodeGen/NVPTX/imad.ll @@ -0,0 +1,9 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +; CHECK: imad +define i32 @imad(i32 %a, i32 %b, i32 %c) { +; CHECK: mad.lo.s32 + %val0 = mul i32 %a, %b + %val1 = add i32 %val0, %c + ret i32 %val1 +} diff --git a/test/CodeGen/NVPTX/inline-asm.ll b/test/CodeGen/NVPTX/inline-asm.ll index d76eb42..6f0578d 100644 --- a/test/CodeGen/NVPTX/inline-asm.ll +++ b/test/CodeGen/NVPTX/inline-asm.ll @@ -7,3 +7,10 @@ entry: %0 = call float asm "ex2.approx.ftz.f32 $0, $1;", "=f,f"(float %x) ret float %0 } + +define i32 @foo(i1 signext %cond, i32 %a, i32 %b) #0 { +entry: +; CHECK: selp.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %p{{[0-9]+}} + %0 = tail call i32 asm "selp.b32 $0, $1, $2, $3;", "=r,r,r,b"(i32 %a, i32 %b, i1 %cond) + ret i32 %0 +} diff --git a/test/CodeGen/NVPTX/isspacep.ll b/test/CodeGen/NVPTX/isspacep.ll new file mode 100644 index 0000000..47fa7a6 --- /dev/null +++ b/test/CodeGen/NVPTX/isspacep.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +declare i1 @llvm.nvvm.isspacep.const(i8*) readnone noinline +declare i1 @llvm.nvvm.isspacep.global(i8*) readnone noinline +declare i1 @llvm.nvvm.isspacep.local(i8*) readnone noinline +declare i1 @llvm.nvvm.isspacep.shared(i8*) readnone noinline + +; CHECK: is_const +define i1 @is_const(i8* %addr) { +; CHECK: isspacep.const + %v = tail call i1 @llvm.nvvm.isspacep.const(i8* %addr) + ret i1 %v +} + +; CHECK: is_global +define i1 @is_global(i8* %addr) { +; CHECK: isspacep.global + %v = tail call i1 @llvm.nvvm.isspacep.global(i8* %addr) + ret i1 %v +} + +; CHECK: is_local +define i1 @is_local(i8* %addr) { +; CHECK: isspacep.local + %v = tail call i1 @llvm.nvvm.isspacep.local(i8* %addr) + ret i1 %v +} + +; CHECK: is_shared +define i1 @is_shared(i8* %addr) { +; CHECK: isspacep.shared + %v = tail call i1 @llvm.nvvm.isspacep.shared(i8* %addr) + ret i1 %v +} + diff --git a/test/CodeGen/NVPTX/ldu-i8.ll b/test/CodeGen/NVPTX/ldu-i8.ll index 81a82b2..9cc6675 100644 --- a/test/CodeGen/NVPTX/ldu-i8.ll +++ b/test/CodeGen/NVPTX/ldu-i8.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" -declare i8 @llvm.nvvm.ldu.global.i.i8(i8*) +declare i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8*) define i8 @foo(i8* %a) { ; Ensure we properly truncate off the high-order 24 bits ; CHECK: ldu.global.u8 ; CHECK: cvt.u32.u16 ; CHECK: and.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 255 - %val = tail call i8 @llvm.nvvm.ldu.global.i.i8(i8* %a) + %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8* %a), !align !0 ret i8 %val } + +!0 = metadata !{i32 4} diff --git a/test/CodeGen/NVPTX/ldu-ldg.ll b/test/CodeGen/NVPTX/ldu-ldg.ll new file mode 100644 index 0000000..3b0619f --- /dev/null +++ b/test/CodeGen/NVPTX/ldu-ldg.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + + +declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr) +declare i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr) +declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr) +declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr) + + +; CHECK: func0 +define i8 @func0(i8 addrspace(1)* %ptr) { +; ldu.global.u8 + %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0 + ret i8 %val +} + +; CHECK: func1 +define i32 @func1(i32 addrspace(1)* %ptr) { +; ldu.global.u32 + %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0 + ret i32 %val +} + +; CHECK: func2 +define i8 @func2(i8 addrspace(1)* %ptr) { +; ld.global.nc.u8 + %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0 + ret i8 %val +} + +; CHECK: func3 +define i32 @func3(i32 addrspace(1)* %ptr) { +; ld.global.nc.u32 + %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0 + ret i32 %val +} + + + +!0 = metadata !{i32 4} diff --git a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll index 26cadc4..55707ea 100644 --- a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll +++ b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll @@ -7,9 +7,9 @@ define void @reg_plus_offset(i32* %a) { ; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+32]; ; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+36]; %p2 = getelementptr i32* %a, i32 8 - %t1 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p2), !align !1 + %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2), !align !1 %p3 = getelementptr i32* %a, i32 9 - %t2 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p3), !align !1 + %t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3), !align !1 %t3 = mul i32 %t1, %t2 store i32 %t3, i32* %a ret void @@ -17,5 +17,5 @@ define void @reg_plus_offset(i32* %a) { !1 = metadata !{ i32 4 } -declare i32 @llvm.nvvm.ldu.global.i.i32(i32*) +declare i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32*) declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() diff --git a/test/CodeGen/NVPTX/lit.local.cfg b/test/CodeGen/NVPTX/lit.local.cfg index 85cf8c2..2cb98eb 100644 --- a/test/CodeGen/NVPTX/lit.local.cfg +++ b/test/CodeGen/NVPTX/lit.local.cfg @@ -1,3 +1,2 @@ -targets = set(config.root.targets_to_build.split()) -if not 'NVPTX' in targets: +if not 'NVPTX' in config.root.targets: config.unsupported = True diff --git a/test/CodeGen/NVPTX/managed.ll b/test/CodeGen/NVPTX/managed.ll new file mode 100644 index 0000000..4d7e781 --- /dev/null +++ b/test/CodeGen/NVPTX/managed.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + + +; CHECK: .visible .global .align 4 .u32 device_g; +@device_g = addrspace(1) global i32 zeroinitializer +; CHECK: .visible .global .attribute(.managed) .align 4 .u32 managed_g; +@managed_g = addrspace(1) global i32 zeroinitializer + + +!nvvm.annotations = !{!0} +!0 = metadata !{i32 addrspace(1)* @managed_g, metadata !"managed", i32 1} diff --git a/test/CodeGen/NVPTX/mulwide.ll b/test/CodeGen/NVPTX/mulwide.ll new file mode 100644 index 0000000..927946c --- /dev/null +++ b/test/CodeGen/NVPTX/mulwide.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +; CHECK: mulwide16 +define i32 @mulwide16(i16 %a, i16 %b) { +; CHECK: mul.wide.s16 + %val0 = sext i16 %a to i32 + %val1 = sext i16 %b to i32 + %val2 = mul i32 %val0, %val1 + ret i32 %val2 +} + +; CHECK: mulwideu16 +define i32 @mulwideu16(i16 %a, i16 %b) { +; CHECK: mul.wide.u16 + %val0 = zext i16 %a to i32 + %val1 = zext i16 %b to i32 + %val2 = mul i32 %val0, %val1 + ret i32 %val2 +} + +; CHECK: mulwide32 +define i64 @mulwide32(i32 %a, i32 %b) { +; CHECK: mul.wide.s32 + %val0 = sext i32 %a to i64 + %val1 = sext i32 %b to i64 + %val2 = mul i64 %val0, %val1 + ret i64 %val2 +} + +; CHECK: mulwideu32 +define i64 @mulwideu32(i32 %a, i32 %b) { +; CHECK: mul.wide.u32 + %val0 = zext i32 %a to i64 + %val1 = zext i32 %b to i64 + %val2 = mul i64 %val0, %val1 + ret i64 %val2 +} diff --git a/test/CodeGen/NVPTX/nvvm-reflect.ll b/test/CodeGen/NVPTX/nvvm-reflect.ll index 0d02194..21e9c69 100644 --- a/test/CodeGen/NVPTX/nvvm-reflect.ll +++ b/test/CodeGen/NVPTX/nvvm-reflect.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=0 -O2 | FileCheck %s --check-prefix=USE_MUL_0 ; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=1 -O2 | FileCheck %s --check-prefix=USE_MUL_1 -@str = private addrspace(4) unnamed_addr constant [8 x i8] c"USE_MUL\00" +@str = private unnamed_addr addrspace(4) constant [8 x i8] c"USE_MUL\00" declare i32 @__nvvm_reflect(i8*) declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*) @@ -32,3 +32,17 @@ exit: %ret = phi float [%ret1, %use_mul], [%ret2, %use_add] ret float %ret } + +declare i32 @llvm.nvvm.reflect.p0i8(i8*) + +; USE_MUL_0: define i32 @intrinsic +; USE_MUL_1: define i32 @intrinsic +define i32 @intrinsic() { +; USE_MUL_0-NOT: call i32 @llvm.nvvm.reflect +; USE_MUL_0: ret i32 0 +; USE_MUL_1-NOT: call i32 @llvm.nvvm.reflect +; USE_MUL_1: ret i32 1 + %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8] addrspace(4)* @str, i32 0, i32 0)) + %reflect = tail call i32 @llvm.nvvm.reflect.p0i8(i8* %ptr) + ret i32 %reflect +} diff --git a/test/CodeGen/NVPTX/rotate.ll b/test/CodeGen/NVPTX/rotate.ll new file mode 100644 index 0000000..dfc8b4f --- /dev/null +++ b/test/CodeGen/NVPTX/rotate.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck --check-prefix=SM20 %s +; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck --check-prefix=SM35 %s + + +declare i32 @llvm.nvvm.rotate.b32(i32, i32) +declare i64 @llvm.nvvm.rotate.b64(i64, i32) +declare i64 @llvm.nvvm.rotate.right.b64(i64, i32) + +; SM20: rotate32 +; SM35: rotate32 +define i32 @rotate32(i32 %a, i32 %b) { +; SM20: shl.b32 +; SM20: sub.s32 +; SM20: shr.b32 +; SM20: add.u32 +; SM35: shf.l.wrap.b32 + %val = tail call i32 @llvm.nvvm.rotate.b32(i32 %a, i32 %b) + ret i32 %val +} + +; SM20: rotate64 +; SM35: rotate64 +define i64 @rotate64(i64 %a, i32 %b) { +; SM20: shl.b64 +; SM20: sub.u32 +; SM20: shr.b64 +; SM20: add.u64 +; SM35: shf.l.wrap.b32 +; SM35: shf.l.wrap.b32 + %val = tail call i64 @llvm.nvvm.rotate.b64(i64 %a, i32 %b) + ret i64 %val +} + +; SM20: rotateright64 +; SM35: rotateright64 +define i64 @rotateright64(i64 %a, i32 %b) { +; SM20: shr.b64 +; SM20: sub.u32 +; SM20: shl.b64 +; SM20: add.u64 +; SM35: shf.r.wrap.b32 +; SM35: shf.r.wrap.b32 + %val = tail call i64 @llvm.nvvm.rotate.right.b64(i64 %a, i32 %b) + ret i64 %val +} + +; SM20: rotl0 +; SM35: rotl0 +define i32 @rotl0(i32 %x) { +; SM20: shl.b32 +; SM20: shr.b32 +; SM20: add.u32 +; SM35: shf.l.wrap.b32 + %t0 = shl i32 %x, 8 + %t1 = lshr i32 %x, 24 + %t2 = or i32 %t0, %t1 + ret i32 %t2 +} diff --git a/test/CodeGen/NVPTX/shift-parts.ll b/test/CodeGen/NVPTX/shift-parts.ll new file mode 100644 index 0000000..748297c --- /dev/null +++ b/test/CodeGen/NVPTX/shift-parts.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +; CHECK: shift_parts_left_128 +define void @shift_parts_left_128(i128* %val, i128* %amtptr) { +; CHECK: shl.b64 +; CHECK: mov.u32 +; CHECK: sub.s32 +; CHECK: shr.u64 +; CHECK: or.b64 +; CHECK: add.s32 +; CHECK: shl.b64 +; CHECK: setp.gt.s32 +; CHECK: selp.b64 +; CHECK: shl.b64 + %amt = load i128* %amtptr + %a = load i128* %val + %val0 = shl i128 %a, %amt + store i128 %val0, i128* %val + ret void +} + +; CHECK: shift_parts_right_128 +define void @shift_parts_right_128(i128* %val, i128* %amtptr) { +; CHECK: shr.u64 +; CHECK: sub.s32 +; CHECK: shl.b64 +; CHECK: or.b64 +; CHECK: add.s32 +; CHECK: shr.s64 +; CHECK: setp.gt.s32 +; CHECK: selp.b64 +; CHECK: shr.s64 + %amt = load i128* %amtptr + %a = load i128* %val + %val0 = ashr i128 %a, %amt + store i128 %val0, i128* %val + ret void +} diff --git a/test/CodeGen/NVPTX/weak-global.ll b/test/CodeGen/NVPTX/weak-global.ll new file mode 100644 index 0000000..2bef4c5 --- /dev/null +++ b/test/CodeGen/NVPTX/weak-global.ll @@ -0,0 +1,9 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +; CHECK: .weak .global .align 4 .u32 g +@g = common addrspace(1) global i32 zeroinitializer + +define i32 @func0() { + %val = load i32 addrspace(1)* @g + ret i32 %val +} diff --git a/test/CodeGen/NVPTX/weak-linkage.ll b/test/CodeGen/NVPTX/weak-linkage.ll new file mode 100644 index 0000000..7a13357 --- /dev/null +++ b/test/CodeGen/NVPTX/weak-linkage.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + + +; CHECK: .weak .func foo +define weak void @foo() { + ret void +} + +; CHECK: .visible .func bar +define void @bar() { + ret void +} |