aboutsummaryrefslogtreecommitdiffstats
path: root/test/CodeGen/PTX
diff options
context:
space:
mode:
authorChe-Liang Chiou <clchiou@gmail.com>2011-02-28 06:34:09 +0000
committerChe-Liang Chiou <clchiou@gmail.com>2011-02-28 06:34:09 +0000
commitf71720231f6de9b2b7fe28edd179ae217a105329 (patch)
tree7ed3e644aac2d4be87b13f146f0467761e8e7600 /test/CodeGen/PTX
parentd8d1584c13c554349c235177b2b89cb5117347b2 (diff)
downloadexternal_llvm-f71720231f6de9b2b7fe28edd179ae217a105329.zip
external_llvm-f71720231f6de9b2b7fe28edd179ae217a105329.tar.gz
external_llvm-f71720231f6de9b2b7fe28edd179ae217a105329.tar.bz2
Add preliminary support for .f32 in the PTX backend.
- Add appropriate TableGen patterns for fadd, fsub, fmul. - Add .f32 as the PTX type for the LLVM float type. - Allow parameters, return values, and global variable declarations to accept the float type. - Add appropriate test cases. Patch by Justin Holewinski git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126636 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/PTX')
-rw-r--r--test/CodeGen/PTX/add.ll14
-rw-r--r--test/CodeGen/PTX/ld_float.ll86
-rw-r--r--test/CodeGen/PTX/mov.ll12
-rw-r--r--test/CodeGen/PTX/mul.ll25
-rw-r--r--test/CodeGen/PTX/st_float.ll78
-rw-r--r--test/CodeGen/PTX/sub.ll14
6 files changed, 229 insertions, 0 deletions
diff --git a/test/CodeGen/PTX/add.ll b/test/CodeGen/PTX/add.ll
index 1259d03..9e777ae 100644
--- a/test/CodeGen/PTX/add.ll
+++ b/test/CodeGen/PTX/add.ll
@@ -13,3 +13,17 @@ define ptx_device i32 @t2(i32 %x) {
; CHECK: ret;
ret i32 %z
}
+
+define ptx_device float @t3(float %x, float %y) {
+; CHECK: add.f32 f0, f1, f2
+; CHECK-NEXT: ret;
+ %z = fadd float %x, %y
+ ret float %z
+}
+
+define ptx_device float @t4(float %x) {
+; CHECK: add.f32 f0, f1, 0F3F800000;
+; CHECK-NEXT: ret;
+ %z = fadd float %x, 1.0
+ ret float %z
+}
diff --git a/test/CodeGen/PTX/ld_float.ll b/test/CodeGen/PTX/ld_float.ll
new file mode 100644
index 0000000..62d2c36
--- /dev/null
+++ b/test/CodeGen/PTX/ld_float.ll
@@ -0,0 +1,86 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+;CHECK: .extern .global .f32 array[];
+@array = external global [10 x float]
+
+;CHECK: .extern .const .f32 array_constant[];
+@array_constant = external addrspace(1) constant [10 x float]
+
+;CHECK: .extern .local .f32 array_local[];
+@array_local = external addrspace(2) global [10 x float]
+
+;CHECK: .extern .shared .f32 array_shared[];
+@array_shared = external addrspace(4) global [10 x float]
+
+define ptx_device float @t1(float* %p) {
+entry:
+;CHECK: ld.global.f32 f0, [r1];
+;CHECK-NEXT: ret;
+ %x = load float* %p
+ ret float %x
+}
+
+define ptx_device float @t2(float* %p) {
+entry:
+;CHECK: ld.global.f32 f0, [r1+4];
+;CHECK-NEXT: ret;
+ %i = getelementptr float* %p, i32 1
+ %x = load float* %i
+ ret float %x
+}
+
+define ptx_device float @t3(float* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 r0, r2, 2;
+;CHECK-NEXT: add.s32 r0, r1, r0;
+;CHECK-NEXT: ld.global.f32 f0, [r0];
+;CHECK-NEXT: ret;
+ %i = getelementptr float* %p, i32 %q
+ %x = load float* %i
+ ret float %x
+}
+
+define ptx_device float @t4_global() {
+entry:
+;CHECK: ld.global.f32 f0, [array];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x float]* @array, i32 0, i32 0
+ %x = load float* %i
+ ret float %x
+}
+
+define ptx_device float @t4_const() {
+entry:
+;CHECK: ld.const.f32 f0, [array_constant];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x float] addrspace(1)* @array_constant, i32 0, i32 0
+ %x = load float addrspace(1)* %i
+ ret float %x
+}
+
+define ptx_device float @t4_local() {
+entry:
+;CHECK: ld.local.f32 f0, [array_local];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x float] addrspace(2)* @array_local, i32 0, i32 0
+ %x = load float addrspace(2)* %i
+ ret float %x
+}
+
+define ptx_device float @t4_shared() {
+entry:
+;CHECK: ld.shared.f32 f0, [array_shared];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x float] addrspace(4)* @array_shared, i32 0, i32 0
+ %x = load float addrspace(4)* %i
+ ret float %x
+}
+
+define ptx_device float @t5() {
+entry:
+;CHECK: ld.global.f32 f0, [array+4];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x float]* @array, i32 0, i32 1
+ %x = load float* %i
+ ret float %x
+}
diff --git a/test/CodeGen/PTX/mov.ll b/test/CodeGen/PTX/mov.ll
index c365e9b..d201a78 100644
--- a/test/CodeGen/PTX/mov.ll
+++ b/test/CodeGen/PTX/mov.ll
@@ -11,3 +11,15 @@ define ptx_device i32 @t2(i32 %x) {
; CHECK: ret;
ret i32 %x
}
+
+define ptx_device float @t3() {
+; CHECK: mov.f32 f0, 0F00000000;
+; CHECK-NEXT: ret;
+ ret float 0.0
+}
+
+define ptx_device float @t4(float %x) {
+; CHECK: mov.f32 f0, f1;
+; CHECK-NEXT: ret;
+ ret float %x
+}
diff --git a/test/CodeGen/PTX/mul.ll b/test/CodeGen/PTX/mul.ll
new file mode 100644
index 0000000..01871da
--- /dev/null
+++ b/test/CodeGen/PTX/mul.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+;define ptx_device i32 @t1(i32 %x, i32 %y) {
+; %z = mul i32 %x, %y
+; ret i32 %z
+;}
+
+;define ptx_device i32 @t2(i32 %x) {
+; %z = mul i32 %x, 1
+; ret i32 %z
+;}
+
+define ptx_device float @t3(float %x, float %y) {
+; CHECK: mul.f32 f0, f1, f2
+; CHECK-NEXT: ret;
+ %z = fmul float %x, %y
+ ret float %z
+}
+
+define ptx_device float @t4(float %x) {
+; CHECK: mul.f32 f0, f1, 0F40A00000;
+; CHECK-NEXT: ret;
+ %z = fmul float %x, 5.0
+ ret float %z
+}
diff --git a/test/CodeGen/PTX/st_float.ll b/test/CodeGen/PTX/st_float.ll
new file mode 100644
index 0000000..f0e0010
--- /dev/null
+++ b/test/CodeGen/PTX/st_float.ll
@@ -0,0 +1,78 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+;CHECK: .extern .global .f32 array[];
+@array = external global [10 x float]
+
+;CHECK: .extern .const .f32 array_constant[];
+@array_constant = external addrspace(1) constant [10 x float]
+
+;CHECK: .extern .local .f32 array_local[];
+@array_local = external addrspace(2) global [10 x float]
+
+;CHECK: .extern .shared .f32 array_shared[];
+@array_shared = external addrspace(4) global [10 x float]
+
+define ptx_device void @t1(float* %p, float %x) {
+entry:
+;CHECK: st.global.f32 [r1], f1;
+;CHECK-NEXT: ret;
+ store float %x, float* %p
+ ret void
+}
+
+define ptx_device void @t2(float* %p, float %x) {
+entry:
+;CHECK: st.global.f32 [r1+4], f1;
+;CHECK-NEXT: ret;
+ %i = getelementptr float* %p, i32 1
+ store float %x, float* %i
+ ret void
+}
+
+define ptx_device void @t3(float* %p, i32 %q, float %x) {
+;CHECK: .reg .s32 r0;
+entry:
+;CHECK: shl.b32 r0, r2, 2;
+;CHECK-NEXT: add.s32 r0, r1, r0;
+;CHECK-NEXT: st.global.f32 [r0], f1;
+;CHECK-NEXT: ret;
+ %i = getelementptr float* %p, i32 %q
+ store float %x, float* %i
+ ret void
+}
+
+define ptx_device void @t4_global(float %x) {
+entry:
+;CHECK: st.global.f32 [array], f1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x float]* @array, i32 0, i32 0
+ store float %x, float* %i
+ ret void
+}
+
+define ptx_device void @t4_local(float %x) {
+entry:
+;CHECK: st.local.f32 [array_local], f1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x float] addrspace(2)* @array_local, i32 0, i32 0
+ store float %x, float addrspace(2)* %i
+ ret void
+}
+
+define ptx_device void @t4_shared(float %x) {
+entry:
+;CHECK: st.shared.f32 [array_shared], f1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x float] addrspace(4)* @array_shared, i32 0, i32 0
+ store float %x, float addrspace(4)* %i
+ ret void
+}
+
+define ptx_device void @t5(float %x) {
+entry:
+;CHECK: st.global.f32 [array+4], f1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x float]* @array, i32 0, i32 1
+ store float %x, float* %i
+ ret void
+}
diff --git a/test/CodeGen/PTX/sub.ll b/test/CodeGen/PTX/sub.ll
index aab3fda..e11deca 100644
--- a/test/CodeGen/PTX/sub.ll
+++ b/test/CodeGen/PTX/sub.ll
@@ -13,3 +13,17 @@ define ptx_device i32 @t2(i32 %x) {
;CHECK: ret;
ret i32 %z
}
+
+define ptx_device float @t3(float %x, float %y) {
+; CHECK: sub.f32 f0, f1, f2
+; CHECK-NEXT: ret;
+ %z = fsub float %x, %y
+ ret float %z
+}
+
+define ptx_device float @t4(float %x) {
+; CHECK: add.f32 f0, f1, 0FBF800000;
+; CHECK-NEXT: ret;
+ %z = fsub float %x, 1.0
+ ret float %z
+}