diff options
author | Che-Liang Chiou <clchiou@gmail.com> | 2011-02-28 06:34:09 +0000 |
---|---|---|
committer | Che-Liang Chiou <clchiou@gmail.com> | 2011-02-28 06:34:09 +0000 |
commit | f71720231f6de9b2b7fe28edd179ae217a105329 (patch) | |
tree | 7ed3e644aac2d4be87b13f146f0467761e8e7600 /test/CodeGen/PTX | |
parent | d8d1584c13c554349c235177b2b89cb5117347b2 (diff) | |
download | external_llvm-f71720231f6de9b2b7fe28edd179ae217a105329.zip external_llvm-f71720231f6de9b2b7fe28edd179ae217a105329.tar.gz external_llvm-f71720231f6de9b2b7fe28edd179ae217a105329.tar.bz2 |
Add preliminary support for .f32 in the PTX backend.
- Add appropriate TableGen patterns for fadd, fsub, fmul.
- Add .f32 as the PTX type for the LLVM float type.
- Allow parameters, return values, and global variable declarations
to accept the float type.
- Add appropriate test cases.
Patch by Justin Holewinski
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126636 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/PTX')
-rw-r--r-- | test/CodeGen/PTX/add.ll | 14 | ||||
-rw-r--r-- | test/CodeGen/PTX/ld_float.ll | 86 | ||||
-rw-r--r-- | test/CodeGen/PTX/mov.ll | 12 | ||||
-rw-r--r-- | test/CodeGen/PTX/mul.ll | 25 | ||||
-rw-r--r-- | test/CodeGen/PTX/st_float.ll | 78 | ||||
-rw-r--r-- | test/CodeGen/PTX/sub.ll | 14 |
6 files changed, 229 insertions, 0 deletions
diff --git a/test/CodeGen/PTX/add.ll b/test/CodeGen/PTX/add.ll index 1259d03..9e777ae 100644 --- a/test/CodeGen/PTX/add.ll +++ b/test/CodeGen/PTX/add.ll @@ -13,3 +13,17 @@ define ptx_device i32 @t2(i32 %x) { ; CHECK: ret; ret i32 %z } + +define ptx_device float @t3(float %x, float %y) { +; CHECK: add.f32 f0, f1, f2 +; CHECK-NEXT: ret; + %z = fadd float %x, %y + ret float %z +} + +define ptx_device float @t4(float %x) { +; CHECK: add.f32 f0, f1, 0F3F800000; +; CHECK-NEXT: ret; + %z = fadd float %x, 1.0 + ret float %z +} diff --git a/test/CodeGen/PTX/ld_float.ll b/test/CodeGen/PTX/ld_float.ll new file mode 100644 index 0000000..62d2c36 --- /dev/null +++ b/test/CodeGen/PTX/ld_float.ll @@ -0,0 +1,86 @@ +; RUN: llc < %s -march=ptx | FileCheck %s + +;CHECK: .extern .global .f32 array[]; +@array = external global [10 x float] + +;CHECK: .extern .const .f32 array_constant[]; +@array_constant = external addrspace(1) constant [10 x float] + +;CHECK: .extern .local .f32 array_local[]; +@array_local = external addrspace(2) global [10 x float] + +;CHECK: .extern .shared .f32 array_shared[]; +@array_shared = external addrspace(4) global [10 x float] + +define ptx_device float @t1(float* %p) { +entry: +;CHECK: ld.global.f32 f0, [r1]; +;CHECK-NEXT: ret; + %x = load float* %p + ret float %x +} + +define ptx_device float @t2(float* %p) { +entry: +;CHECK: ld.global.f32 f0, [r1+4]; +;CHECK-NEXT: ret; + %i = getelementptr float* %p, i32 1 + %x = load float* %i + ret float %x +} + +define ptx_device float @t3(float* %p, i32 %q) { +entry: +;CHECK: shl.b32 r0, r2, 2; +;CHECK-NEXT: add.s32 r0, r1, r0; +;CHECK-NEXT: ld.global.f32 f0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr float* %p, i32 %q + %x = load float* %i + ret float %x +} + +define ptx_device float @t4_global() { +entry: +;CHECK: ld.global.f32 f0, [array]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float]* @array, i32 0, i32 0 + %x = load float* %i + ret float %x +} + +define ptx_device float @t4_const() { +entry: +;CHECK: ld.const.f32 f0, [array_constant]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float] addrspace(1)* @array_constant, i32 0, i32 0 + %x = load float addrspace(1)* %i + ret float %x +} + +define ptx_device float @t4_local() { +entry: +;CHECK: ld.local.f32 f0, [array_local]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float] addrspace(2)* @array_local, i32 0, i32 0 + %x = load float addrspace(2)* %i + ret float %x +} + +define ptx_device float @t4_shared() { +entry: +;CHECK: ld.shared.f32 f0, [array_shared]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float] addrspace(4)* @array_shared, i32 0, i32 0 + %x = load float addrspace(4)* %i + ret float %x +} + +define ptx_device float @t5() { +entry: +;CHECK: ld.global.f32 f0, [array+4]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float]* @array, i32 0, i32 1 + %x = load float* %i + ret float %x +} diff --git a/test/CodeGen/PTX/mov.ll b/test/CodeGen/PTX/mov.ll index c365e9b..d201a78 100644 --- a/test/CodeGen/PTX/mov.ll +++ b/test/CodeGen/PTX/mov.ll @@ -11,3 +11,15 @@ define ptx_device i32 @t2(i32 %x) { ; CHECK: ret; ret i32 %x } + +define ptx_device float @t3() { +; CHECK: mov.f32 f0, 0F00000000; +; CHECK-NEXT: ret; + ret float 0.0 +} + +define ptx_device float @t4(float %x) { +; CHECK: mov.f32 f0, f1; +; CHECK-NEXT: ret; + ret float %x +} diff --git a/test/CodeGen/PTX/mul.ll b/test/CodeGen/PTX/mul.ll new file mode 100644 index 0000000..01871da --- /dev/null +++ b/test/CodeGen/PTX/mul.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=ptx | FileCheck %s + +;define ptx_device i32 @t1(i32 %x, i32 %y) { +; %z = mul i32 %x, %y +; ret i32 %z +;} + +;define ptx_device i32 @t2(i32 %x) { +; %z = mul i32 %x, 1 +; ret i32 %z +;} + +define ptx_device float @t3(float %x, float %y) { +; CHECK: mul.f32 f0, f1, f2 +; CHECK-NEXT: ret; + %z = fmul float %x, %y + ret float %z +} + +define ptx_device float @t4(float %x) { +; CHECK: mul.f32 f0, f1, 0F40A00000; +; CHECK-NEXT: ret; + %z = fmul float %x, 5.0 + ret float %z +} diff --git a/test/CodeGen/PTX/st_float.ll b/test/CodeGen/PTX/st_float.ll new file mode 100644 index 0000000..f0e0010 --- /dev/null +++ b/test/CodeGen/PTX/st_float.ll @@ -0,0 +1,78 @@ +; RUN: llc < %s -march=ptx | FileCheck %s + +;CHECK: .extern .global .f32 array[]; +@array = external global [10 x float] + +;CHECK: .extern .const .f32 array_constant[]; +@array_constant = external addrspace(1) constant [10 x float] + +;CHECK: .extern .local .f32 array_local[]; +@array_local = external addrspace(2) global [10 x float] + +;CHECK: .extern .shared .f32 array_shared[]; +@array_shared = external addrspace(4) global [10 x float] + +define ptx_device void @t1(float* %p, float %x) { +entry: +;CHECK: st.global.f32 [r1], f1; +;CHECK-NEXT: ret; + store float %x, float* %p + ret void +} + +define ptx_device void @t2(float* %p, float %x) { +entry: +;CHECK: st.global.f32 [r1+4], f1; +;CHECK-NEXT: ret; + %i = getelementptr float* %p, i32 1 + store float %x, float* %i + ret void +} + +define ptx_device void @t3(float* %p, i32 %q, float %x) { +;CHECK: .reg .s32 r0; +entry: +;CHECK: shl.b32 r0, r2, 2; +;CHECK-NEXT: add.s32 r0, r1, r0; +;CHECK-NEXT: st.global.f32 [r0], f1; +;CHECK-NEXT: ret; + %i = getelementptr float* %p, i32 %q + store float %x, float* %i + ret void +} + +define ptx_device void @t4_global(float %x) { +entry: +;CHECK: st.global.f32 [array], f1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float]* @array, i32 0, i32 0 + store float %x, float* %i + ret void +} + +define ptx_device void @t4_local(float %x) { +entry: +;CHECK: st.local.f32 [array_local], f1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float] addrspace(2)* @array_local, i32 0, i32 0 + store float %x, float addrspace(2)* %i + ret void +} + +define ptx_device void @t4_shared(float %x) { +entry: +;CHECK: st.shared.f32 [array_shared], f1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float] addrspace(4)* @array_shared, i32 0, i32 0 + store float %x, float addrspace(4)* %i + ret void +} + +define ptx_device void @t5(float %x) { +entry: +;CHECK: st.global.f32 [array+4], f1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float]* @array, i32 0, i32 1 + store float %x, float* %i + ret void +} diff --git a/test/CodeGen/PTX/sub.ll b/test/CodeGen/PTX/sub.ll index aab3fda..e11deca 100644 --- a/test/CodeGen/PTX/sub.ll +++ b/test/CodeGen/PTX/sub.ll @@ -13,3 +13,17 @@ define ptx_device i32 @t2(i32 %x) { ;CHECK: ret; ret i32 %z } + +define ptx_device float @t3(float %x, float %y) { +; CHECK: sub.f32 f0, f1, f2 +; CHECK-NEXT: ret; + %z = fsub float %x, %y + ret float %z +} + +define ptx_device float @t4(float %x) { +; CHECK: add.f32 f0, f1, 0FBF800000; +; CHECK-NEXT: ret; + %z = fsub float %x, 1.0 + ret float %z +} |