diff options
author | Stephen Hines <srhines@google.com> | 2014-12-01 14:51:49 -0800 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-12-02 16:08:10 -0800 |
commit | 37ed9c199ca639565f6ce88105f9e39e898d82d0 (patch) | |
tree | 8fb36d3910e3ee4c4e1b7422f4f017108efc52f5 /test/CodeGen/R600/flat-address-space.ll | |
parent | d2327b22152ced7bc46dc629fc908959e8a52d03 (diff) | |
download | external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.zip external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.tar.gz external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.tar.bz2 |
Update aosp/master LLVM for rebase to r222494.
Change-Id: Ic787f5e0124df789bd26f3f24680f45e678eef2d
Diffstat (limited to 'test/CodeGen/R600/flat-address-space.ll')
-rw-r--r-- | test/CodeGen/R600/flat-address-space.ll | 182 |
1 files changed, 182 insertions, 0 deletions
diff --git a/test/CodeGen/R600/flat-address-space.ll b/test/CodeGen/R600/flat-address-space.ll new file mode 100644 index 0000000..fc5af7c --- /dev/null +++ b/test/CodeGen/R600/flat-address-space.ll @@ -0,0 +1,182 @@ +; RUN: llc -O0 -march=r600 -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s +; RUN: llc -O0 -march=r600 -mcpu=bonaire -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s + +; Disable optimizations in case there are optimizations added that +; specialize away generic pointer accesses. + + +; CHECK-LABEL: {{^}}branch_use_flat_i32: +; CHECK: flat_store_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, [M0, FLAT_SCRATCH] +; CHECK: s_endpgm +define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 { +entry: + %cmp = icmp ne i32 %c, 0 + br i1 %cmp, label %local, label %global + +local: + %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)* + br label %end + +global: + %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)* + br label %end + +end: + %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ] + store i32 %x, i32 addrspace(4)* %fptr, align 4 +; %val = load i32 addrspace(4)* %fptr, align 4 +; store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + + + +; These testcases might become useless when there are optimizations to +; remove generic pointers. + +; CHECK-LABEL: {{^}}store_flat_i32: +; CHECK: v_mov_b32_e32 v[[DATA:[0-9]+]], {{s[0-9]+}} +; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], {{s[0-9]+}} +; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], {{s[0-9]+}} +; CHECK: flat_store_dword v[[DATA]], v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 { + %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)* + store i32 %x, i32 addrspace(4)* %fptr, align 4 + ret void +} + +; CHECK-LABEL: {{^}}store_flat_i64: +; CHECK: flat_store_dwordx2 +define void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 { + %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)* + store i64 %x, i64 addrspace(4)* %fptr, align 8 + ret void +} + +; CHECK-LABEL: {{^}}store_flat_v4i32: +; CHECK: flat_store_dwordx4 +define void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 { + %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)* + store <4 x i32> %x, <4 x i32> addrspace(4)* %fptr, align 16 + ret void +} + +; CHECK-LABEL: {{^}}store_flat_trunc_i16: +; CHECK: flat_store_short +define void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 { + %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)* + %y = trunc i32 %x to i16 + store i16 %y, i16 addrspace(4)* %fptr, align 2 + ret void +} + +; CHECK-LABEL: {{^}}store_flat_trunc_i8: +; CHECK: flat_store_byte +define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 { + %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)* + %y = trunc i32 %x to i8 + store i8 %y, i8 addrspace(4)* %fptr, align 2 + ret void +} + + + +; CHECK-LABEL @load_flat_i32: +; CHECK: flat_load_dword +define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 { + %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)* + %fload = load i32 addrspace(4)* %fptr, align 4 + store i32 %fload, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK-LABEL @load_flat_i64: +; CHECK: flat_load_dwordx2 +define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 { + %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)* + %fload = load i64 addrspace(4)* %fptr, align 4 + store i64 %fload, i64 addrspace(1)* %out, align 8 + ret void +} + +; CHECK-LABEL @load_flat_v4i32: +; CHECK: flat_load_dwordx4 +define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 { + %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)* + %fload = load <4 x i32> addrspace(4)* %fptr, align 4 + store <4 x i32> %fload, <4 x i32> addrspace(1)* %out, align 8 + ret void +} + +; CHECK-LABEL @sextload_flat_i8: +; CHECK: flat_load_sbyte +define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 { + %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)* + %fload = load i8 addrspace(4)* %fptr, align 4 + %ext = sext i8 %fload to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK-LABEL @zextload_flat_i8: +; CHECK: flat_load_ubyte +define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 { + %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)* + %fload = load i8 addrspace(4)* %fptr, align 4 + %ext = zext i8 %fload to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK-LABEL @sextload_flat_i16: +; CHECK: flat_load_sshort +define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 { + %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)* + %fload = load i16 addrspace(4)* %fptr, align 4 + %ext = sext i16 %fload to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK-LABEL @zextload_flat_i16: +; CHECK: flat_load_ushort +define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 { + %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)* + %fload = load i16 addrspace(4)* %fptr, align 4 + %ext = zext i16 %fload to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + + + +; TODO: This should not be zero when registers are used for small +; scratch allocations again. + +; Check for prologue initializing special SGPRs pointing to scratch. +; CHECK-LABEL: {{^}}store_flat_scratch: +; CHECK: s_movk_i32 flat_scratch_lo, 0 +; CHECK-NO-PROMOTE: s_movk_i32 flat_scratch_hi, 0x28{{$}} +; CHECK-PROMOTE: s_movk_i32 flat_scratch_hi, 0x0{{$}} +; CHECK: flat_store_dword +; CHECK: s_barrier +; CHECK: flat_load_dword +define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 { + %alloca = alloca i32, i32 9, align 4 + %x = call i32 @llvm.r600.read.tidig.x() #3 + %pptr = getelementptr i32* %alloca, i32 %x + %fptr = addrspacecast i32* %pptr to i32 addrspace(4)* + store i32 %x, i32 addrspace(4)* %fptr + ; Dummy call + call void @llvm.AMDGPU.barrier.local() #1 + %reload = load i32 addrspace(4)* %fptr, align 4 + store i32 %reload, i32 addrspace(1)* %out, align 4 + ret void +} + +declare void @llvm.AMDGPU.barrier.local() #1 +declare i32 @llvm.r600.read.tidig.x() #3 + +attributes #0 = { nounwind } +attributes #1 = { nounwind noduplicate } +attributes #3 = { nounwind readnone } |