diff options
author | Pirama Arumuga Nainar <pirama@google.com> | 2015-05-06 11:46:36 -0700 |
---|---|---|
committer | Pirama Arumuga Nainar <pirama@google.com> | 2015-05-18 10:52:30 -0700 |
commit | 2c3e0051c31c3f5b2328b447eadf1cf9c4427442 (patch) | |
tree | c0104029af14e9f47c2ef58ca60e6137691f3c9b /test/CodeGen/R600 | |
parent | e1bc145815f4334641be19f1c45ecf85d25b6e5a (diff) | |
download | external_llvm-2c3e0051c31c3f5b2328b447eadf1cf9c4427442.zip external_llvm-2c3e0051c31c3f5b2328b447eadf1cf9c4427442.tar.gz external_llvm-2c3e0051c31c3f5b2328b447eadf1cf9c4427442.tar.bz2 |
Update aosp/master LLVM for rebase to r235153
Change-Id: I9bf53792f9fc30570e81a8d80d296c681d005ea7
(cherry picked from commit 0c7f116bb6950ef819323d855415b2f2b0aad987)
Diffstat (limited to 'test/CodeGen/R600')
-rw-r--r-- | test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/R600/ds_read2.ll | 24 | ||||
-rw-r--r-- | test/CodeGen/R600/ds_read2st64.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/R600/ds_write2.ll | 30 | ||||
-rw-r--r-- | test/CodeGen/R600/ds_write2st64.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/R600/fmaxnum.ll | 91 | ||||
-rw-r--r-- | test/CodeGen/R600/fminnum.ll | 91 | ||||
-rw-r--r-- | test/CodeGen/R600/ftrunc.f64.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/R600/local-memory-two-objects.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/R600/operand-folding.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/R600/si-annotate-cf.ll | 25 | ||||
-rw-r--r-- | test/CodeGen/R600/unaligned-load-store.ll | 4 |
12 files changed, 248 insertions, 41 deletions
diff --git a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll index c381fc4..e7e13d6 100644 --- a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll +++ b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll @@ -18,7 +18,7 @@ declare void @llvm.AMDGPU.barrier.local() #1 ; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]] -; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:0 offset1:1 +; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:1 ; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:33 ; CI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] offset:256 ; CHECK: s_endpgm diff --git a/test/CodeGen/R600/ds_read2.ll b/test/CodeGen/R600/ds_read2.ll index 7110a90..5929898 100644 --- a/test/CodeGen/R600/ds_read2.ll +++ b/test/CodeGen/R600/ds_read2.ll @@ -7,7 +7,7 @@ @lds.f64 = addrspace(3) global [512 x double] undef, align 8 ; SI-LABEL: @simple_read2_f32 -; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:8 +; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:8 ; SI: s_waitcnt lgkmcnt(0) ; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]] ; SI: buffer_store_dword [[RESULT]] @@ -26,7 +26,7 @@ define void @simple_read2_f32(float addrspace(1)* %out) #0 { } ; SI-LABEL: @simple_read2_f32_max_offset -; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:255 +; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:255 ; SI: s_waitcnt lgkmcnt(0) ; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]] ; SI: buffer_store_dword [[RESULT]] @@ -63,7 +63,7 @@ define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 { } ; SI-LABEL: @simple_read2_f32_x2 -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8 +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 ; SI: s_endpgm define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 { @@ -94,7 +94,7 @@ define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 { ; Make sure there is an instruction between the two sets of reads. ; SI-LABEL: @simple_read2_f32_x2_barrier -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8 +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8 ; SI: s_barrier ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 ; SI: s_endpgm @@ -313,7 +313,7 @@ define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrs ; SI-LABEL: @simple_read2_f64 ; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}} -; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset0:0 offset1:8 +; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset1:8 ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}} ; SI: buffer_store_dwordx2 [[RESULT]] ; SI: s_endpgm @@ -331,7 +331,7 @@ define void @simple_read2_f64(double addrspace(1)* %out) #0 { } ; SI-LABEL: @simple_read2_f64_max_offset -; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:255 +; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:255 ; SI: s_endpgm define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -366,7 +366,7 @@ define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 { ; Alignment only 4 ; SI-LABEL: @misaligned_read2_f64 -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1 +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15 ; SI: s_endpgm define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { @@ -386,7 +386,7 @@ define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3) ; SI-LABEL: @load_constant_adjacent_offsets ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1 +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1 define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) { %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4 @@ -397,7 +397,7 @@ define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) { ; SI-LABEL: @load_constant_disjoint_offsets ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2 +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:2 define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) { %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4 @@ -410,7 +410,7 @@ define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) { ; SI-LABEL: @load_misaligned64_constant_offsets ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1 +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3 define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) { %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4 @@ -425,8 +425,8 @@ define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) { ; SI-LABEL: @load_misaligned64_constant_large_offsets ; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}} ; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000 -; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1 -; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1 +; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset1:1 +; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset1:1 ; SI: s_endpgm define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) { %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4 diff --git a/test/CodeGen/R600/ds_read2st64.ll b/test/CodeGen/R600/ds_read2st64.ll index 482debb..54b3b45 100644 --- a/test/CodeGen/R600/ds_read2st64.ll +++ b/test/CodeGen/R600/ds_read2st64.ll @@ -5,7 +5,7 @@ ; SI-LABEL: @simple_read2st64_f32_0_1 -; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1 +; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1 ; SI: s_waitcnt lgkmcnt(0) ; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]] ; SI: buffer_store_dword [[RESULT]] @@ -117,7 +117,7 @@ define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 { } ; SI-LABEL: @simple_read2st64_f64_0_1 -; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1 +; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1 ; SI: s_waitcnt lgkmcnt(0) ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}} ; SI: buffer_store_dwordx2 [[RESULT]] @@ -158,7 +158,7 @@ define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspac ; Alignment only ; SI-LABEL: @misaligned_read2st64_f64 -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1 +; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129 ; SI: s_endpgm define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { @@ -237,7 +237,7 @@ define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double ; SI-LABEL: @byte_size_only_divisible_64_read2_f64 ; SI-NOT: ds_read2st_b64 -; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:8 +; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:8 ; SI: s_endpgm define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 diff --git a/test/CodeGen/R600/ds_write2.ll b/test/CodeGen/R600/ds_write2.ll index 1d94c57..b553d34 100644 --- a/test/CodeGen/R600/ds_write2.ll +++ b/test/CodeGen/R600/ds_write2.ll @@ -7,7 +7,7 @@ ; SI-LABEL: @simple_write2_one_val_f32 ; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]] ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 +; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:8 ; SI: s_endpgm define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -25,7 +25,7 @@ define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1 ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 +; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8 ; SI: s_endpgm define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -84,7 +84,7 @@ define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float ; SI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}} ; SI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}} ; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 +; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8 ; SI: s_endpgm define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -105,7 +105,7 @@ define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 ; SI-LABEL: @simple_write2_two_val_subreg2_f32 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}} ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 +; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8 ; SI: s_endpgm define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -124,7 +124,7 @@ define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x floa ; SI-LABEL: @simple_write2_two_val_subreg4_f32 ; SI-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}} ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 +; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8 ; SI: s_endpgm define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -144,7 +144,7 @@ define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x floa ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 +; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255 ; SI: s_endpgm define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -179,7 +179,7 @@ define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float add } ; SI-LABEL: @simple_write2_two_val_f32_x2 -; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:0 offset1:8 +; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset1:8 ; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27 ; SI: s_endpgm define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { @@ -268,7 +268,7 @@ define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float add ; SI-LABEL: @simple_write2_one_val_f64 ; SI: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]], ; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}} -; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 +; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset1:8 ; SI: s_endpgm define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -285,7 +285,7 @@ define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace ; SI-LABEL: @misaligned_simple_write2_one_val_f64 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}} ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1 +; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:1 ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 ; SI: s_endpgm define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 { @@ -304,7 +304,7 @@ define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, doubl ; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}} -; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 +; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8 ; SI: s_endpgm define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -324,7 +324,7 @@ define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace ; SI-LABEL: @store_constant_adjacent_offsets ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} -; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1 +; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1 define void @store_constant_adjacent_offsets() { store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4 @@ -334,7 +334,7 @@ define void @store_constant_adjacent_offsets() { ; SI-LABEL: @store_constant_disjoint_offsets ; SI-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b{{$}} ; SI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} -; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2 +; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset1:2 define void @store_constant_disjoint_offsets() { store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4 @@ -345,7 +345,7 @@ define void @store_constant_disjoint_offsets() { ; SI-LABEL: @store_misaligned64_constant_offsets ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} -; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1 +; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1 ; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3 define void @store_misaligned64_constant_offsets() { store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4 @@ -358,8 +358,8 @@ define void @store_misaligned64_constant_offsets() { ; SI-LABEL: @store_misaligned64_constant_large_offsets ; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}} ; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}} -; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1 -; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1 +; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1 +; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1 ; SI: s_endpgm define void @store_misaligned64_constant_large_offsets() { store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4 diff --git a/test/CodeGen/R600/ds_write2st64.ll b/test/CodeGen/R600/ds_write2st64.ll index 2044df2..1d9d881 100644 --- a/test/CodeGen/R600/ds_write2st64.ll +++ b/test/CodeGen/R600/ds_write2st64.ll @@ -7,7 +7,7 @@ ; SI-LABEL: @simple_write2st64_one_val_f32_0_1 ; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]] ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1 +; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:1 ; SI: s_endpgm define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -46,7 +46,7 @@ define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float add ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 +; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255 ; SI: s_endpgm define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -85,7 +85,7 @@ define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, d ; SI-LABEL: @byte_size_only_divisible_64_write2st64_f64 ; SI-NOT: ds_write2st64_b64 -; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:0 offset1:8 +; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:8 ; SI: s_endpgm define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 diff --git a/test/CodeGen/R600/fmaxnum.ll b/test/CodeGen/R600/fmaxnum.ll index c105598..3029bd02 100644 --- a/test/CodeGen/R600/fmaxnum.ll +++ b/test/CodeGen/R600/fmaxnum.ll @@ -11,6 +11,9 @@ declare double @llvm.maxnum.f64(double, double) ; FUNC-LABEL: @test_fmax_f32 ; SI: v_max_f32_e32 + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG: MAX_DX10 {{.*}}[[OUT]] define void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) nounwind { %val = call float @llvm.maxnum.f32(float %a, float %b) #0 store float %val, float addrspace(1)* %out, align 4 @@ -20,6 +23,10 @@ define void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) nounwin ; FUNC-LABEL: @test_fmax_v2f32 ; SI: v_max_f32_e32 ; SI: v_max_f32_e32 + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]] +; EG: MAX_DX10 {{.*}}[[OUT]] +; EG: MAX_DX10 {{.*}}[[OUT]] define void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind { %val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b) #0 store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8 @@ -31,6 +38,12 @@ define void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 ; SI: v_max_f32_e32 ; SI: v_max_f32_e32 ; SI: v_max_f32_e32 + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]] +; EG: MAX_DX10 {{.*}}[[OUT]] +; EG: MAX_DX10 {{.*}}[[OUT]] +; EG: MAX_DX10 {{.*}}[[OUT]] +; EG: MAX_DX10 {{.*}}[[OUT]] define void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind { %val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) #0 store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16 @@ -46,6 +59,17 @@ define void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 ; SI: v_max_f32_e32 ; SI: v_max_f32_e32 ; SI: v_max_f32_e32 + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]] +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]] +; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X +; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y +; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z +; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W +; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X +; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y +; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z +; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W define void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind { %val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b) #0 store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32 @@ -69,6 +93,27 @@ define void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 ; SI: v_max_f32_e32 ; SI: v_max_f32_e32 ; SI: v_max_f32_e32 + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]] +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]] +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT3:T[0-9]+]] +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT4:T[0-9]+]] +; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X +; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y +; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z +; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W +; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X +; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y +; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z +; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W +; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].X +; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Y +; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Z +; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].W +; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].X +; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Y +; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Z +; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].W define void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind { %val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b) #0 store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64 @@ -79,6 +124,10 @@ define void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, ; SI-NOT: v_max_f32_e32 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0 ; SI: buffer_store_dword [[REG]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG-NOT: MAX_DX10 +; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} define void @constant_fold_fmax_f32(float addrspace(1)* %out) nounwind { %val = call float @llvm.maxnum.f32(float 1.0, float 2.0) #0 store float %val, float addrspace(1)* %out, align 4 @@ -89,6 +138,11 @@ define void @constant_fold_fmax_f32(float addrspace(1)* %out) nounwind { ; SI-NOT: v_max_f32_e32 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000 ; SI: buffer_store_dword [[REG]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG-NOT: MAX_DX10 +; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} +; EG: 2143289344(nan) define void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) nounwind { %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0 store float %val, float addrspace(1)* %out, align 4 @@ -99,6 +153,10 @@ define void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) nounwind { ; SI-NOT: v_max_f32_e32 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0 ; SI: buffer_store_dword [[REG]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG-NOT: MAX_DX10 +; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} define void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) nounwind { %val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000) #0 store float %val, float addrspace(1)* %out, align 4 @@ -109,6 +167,10 @@ define void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) nounwind { ; SI-NOT: v_max_f32_e32 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0 ; SI: buffer_store_dword [[REG]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG-NOT: MAX_DX10 +; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} define void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) nounwind { %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0) #0 store float %val, float addrspace(1)* %out, align 4 @@ -119,6 +181,10 @@ define void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) nounwind { ; SI-NOT: v_max_f32_e32 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0 ; SI: buffer_store_dword [[REG]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG-NOT: MAX_DX10 +; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} define void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) nounwind { %val = call float @llvm.maxnum.f32(float 0.0, float 0.0) #0 store float %val, float addrspace(1)* %out, align 4 @@ -129,6 +195,10 @@ define void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) nounwind { ; SI-NOT: v_max_f32_e32 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0 ; SI: buffer_store_dword [[REG]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG-NOT: MAX_DX10 +; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} define void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) nounwind { %val = call float @llvm.maxnum.f32(float 0.0, float -0.0) #0 store float %val, float addrspace(1)* %out, align 4 @@ -139,6 +209,10 @@ define void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) nounwind { ; SI-NOT: v_max_f32_e32 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000 ; SI: buffer_store_dword [[REG]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG-NOT: MAX_DX10 +; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} define void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) nounwind { %val = call float @llvm.maxnum.f32(float -0.0, float 0.0) #0 store float %val, float addrspace(1)* %out, align 4 @@ -149,6 +223,10 @@ define void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) nounwind { ; SI-NOT: v_max_f32_e32 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000 ; SI: buffer_store_dword [[REG]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG-NOT: MAX_DX10 +; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} define void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) nounwind { %val = call float @llvm.maxnum.f32(float -0.0, float -0.0) #0 store float %val, float addrspace(1)* %out, align 4 @@ -157,6 +235,10 @@ define void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) nounwind { ; FUNC-LABEL: @fmax_var_immediate_f32 ; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}} + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG-NOT: MAX_DX10 +; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} define void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind { %val = call float @llvm.maxnum.f32(float %a, float 2.0) #0 store float %val, float addrspace(1)* %out, align 4 @@ -165,6 +247,9 @@ define void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind ; FUNC-LABEL: @fmax_immediate_var_f32 ; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}} + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} define void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind { %val = call float @llvm.maxnum.f32(float 2.0, float %a) #0 store float %val, float addrspace(1)* %out, align 4 @@ -174,6 +259,9 @@ define void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind ; FUNC-LABEL: @fmax_var_literal_f32 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000 ; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} define void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) nounwind { %val = call float @llvm.maxnum.f32(float %a, float 99.0) #0 store float %val, float addrspace(1)* %out, align 4 @@ -183,6 +271,9 @@ define void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) nounwind { ; FUNC-LABEL: @fmax_literal_var_f32 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000 ; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} define void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) nounwind { %val = call float @llvm.maxnum.f32(float 99.0, float %a) #0 store float %val, float addrspace(1)* %out, align 4 diff --git a/test/CodeGen/R600/fminnum.ll b/test/CodeGen/R600/fminnum.ll index 6b93b83..4d7b525 100644 --- a/test/CodeGen/R600/fminnum.ll +++ b/test/CodeGen/R600/fminnum.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare float @llvm.minnum.f32(float, float) #0 declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #0 @@ -9,6 +10,9 @@ declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #0 ; FUNC-LABEL: @test_fmin_f32 ; SI: v_min_f32_e32 + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG: MIN_DX10 {{.*}}[[OUT]] define void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) nounwind { %val = call float @llvm.minnum.f32(float %a, float %b) #0 store float %val, float addrspace(1)* %out, align 4 @@ -18,6 +22,10 @@ define void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) nounwin ; FUNC-LABEL: @test_fmin_v2f32 ; SI: v_min_f32_e32 ; SI: v_min_f32_e32 + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]] +; EG: MIN_DX10 {{.*}}[[OUT]] +; EG: MIN_DX10 {{.*}}[[OUT]] define void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind { %val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b) #0 store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8 @@ -29,6 +37,12 @@ define void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 ; SI: v_min_f32_e32 ; SI: v_min_f32_e32 ; SI: v_min_f32_e32 + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]] +; EG: MIN_DX10 {{.*}}[[OUT]] +; EG: MIN_DX10 {{.*}}[[OUT]] +; EG: MIN_DX10 {{.*}}[[OUT]] +; EG: MIN_DX10 {{.*}}[[OUT]] define void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind { %val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b) #0 store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16 @@ -44,6 +58,17 @@ define void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 ; SI: v_min_f32_e32 ; SI: v_min_f32_e32 ; SI: v_min_f32_e32 + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]] +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]] +; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].X +; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Y +; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Z +; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].W +; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].X +; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Y +; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Z +; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].W define void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind { %val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b) #0 store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32 @@ -67,6 +92,27 @@ define void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 ; SI: v_min_f32_e32 ; SI: v_min_f32_e32 ; SI: v_min_f32_e32 + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]] +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]] +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT3:T[0-9]+]] +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT4:T[0-9]+]] +; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].X +; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Y +; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Z +; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].W +; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].X +; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Y +; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Z +; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].W +; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].X +; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].Y +; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].Z +; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].W +; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].X +; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].Y +; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].Z +; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].W define void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind { %val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b) #0 store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64 @@ -77,6 +123,10 @@ define void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, ; SI-NOT: v_min_f32_e32 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0 ; SI: buffer_store_dword [[REG]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG-NOT: MIN_DX10 +; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} define void @constant_fold_fmin_f32(float addrspace(1)* %out) nounwind { %val = call float @llvm.minnum.f32(float 1.0, float 2.0) #0 store float %val, float addrspace(1)* %out, align 4 @@ -87,6 +137,11 @@ define void @constant_fold_fmin_f32(float addrspace(1)* %out) nounwind { ; SI-NOT: v_min_f32_e32 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000 ; SI: buffer_store_dword [[REG]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG-NOT: MIN_DX10 +; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} +; EG: 2143289344({{nan|1\.#QNAN0e\+00}}) define void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) nounwind { %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0 store float %val, float addrspace(1)* %out, align 4 @@ -97,6 +152,10 @@ define void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) nounwind { ; SI-NOT: v_min_f32_e32 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0 ; SI: buffer_store_dword [[REG]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG-NOT: MIN_DX10 +; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} define void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) nounwind { %val = call float @llvm.minnum.f32(float 1.0, float 0x7FF8000000000000) #0 store float %val, float addrspace(1)* %out, align 4 @@ -107,6 +166,10 @@ define void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) nounwind { ; SI-NOT: v_min_f32_e32 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0 ; SI: buffer_store_dword [[REG]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG-NOT: MIN_DX10 +; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} define void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) nounwind { %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 1.0) #0 store float %val, float addrspace(1)* %out, align 4 @@ -117,6 +180,10 @@ define void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) nounwind { ; SI-NOT: v_min_f32_e32 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0 ; SI: buffer_store_dword [[REG]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG-NOT: MIN_DX10 +; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} define void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) nounwind { %val = call float @llvm.minnum.f32(float 0.0, float 0.0) #0 store float %val, float addrspace(1)* %out, align 4 @@ -127,6 +194,10 @@ define void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) nounwind { ; SI-NOT: v_min_f32_e32 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0 ; SI: buffer_store_dword [[REG]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG-NOT: MIN_DX10 +; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} define void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) nounwind { %val = call float @llvm.minnum.f32(float 0.0, float -0.0) #0 store float %val, float addrspace(1)* %out, align 4 @@ -137,6 +208,10 @@ define void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) nounwind { ; SI-NOT: v_min_f32_e32 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000 ; SI: buffer_store_dword [[REG]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG-NOT: MIN_DX10 +; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} define void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) nounwind { %val = call float @llvm.minnum.f32(float -0.0, float 0.0) #0 store float %val, float addrspace(1)* %out, align 4 @@ -147,6 +222,10 @@ define void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) nounwind { ; SI-NOT: v_min_f32_e32 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000 ; SI: buffer_store_dword [[REG]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG-NOT: MIN_DX10 +; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} define void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) nounwind { %val = call float @llvm.minnum.f32(float -0.0, float -0.0) #0 store float %val, float addrspace(1)* %out, align 4 @@ -155,6 +234,9 @@ define void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) nounwind { ; FUNC-LABEL: @fmin_var_immediate_f32 ; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}} + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} define void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind { %val = call float @llvm.minnum.f32(float %a, float 2.0) #0 store float %val, float addrspace(1)* %out, align 4 @@ -163,6 +245,9 @@ define void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind ; FUNC-LABEL: @fmin_immediate_var_f32 ; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}} + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} define void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind { %val = call float @llvm.minnum.f32(float 2.0, float %a) #0 store float %val, float addrspace(1)* %out, align 4 @@ -172,6 +257,9 @@ define void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind ; FUNC-LABEL: @fmin_var_literal_f32 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000 ; SI: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} define void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) nounwind { %val = call float @llvm.minnum.f32(float %a, float 99.0) #0 store float %val, float addrspace(1)* %out, align 4 @@ -181,6 +269,9 @@ define void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) nounwind { ; FUNC-LABEL: @fmin_literal_var_f32 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000 ; SI: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] +; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} define void @fmin_literal_var_f32(float addrspace(1)* %out, float %a) nounwind { %val = call float @llvm.minnum.f32(float 99.0, float %a) #0 store float %val, float addrspace(1)* %out, align 4 diff --git a/test/CodeGen/R600/ftrunc.f64.ll b/test/CodeGen/R600/ftrunc.f64.ll index 4ea84a7..6618d8b 100644 --- a/test/CodeGen/R600/ftrunc.f64.ll +++ b/test/CodeGen/R600/ftrunc.f64.ll @@ -27,9 +27,9 @@ define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) { ; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 ; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01 ; SI: s_lshr_b64 -; SI: cmp_gt_i32 ; SI: s_not_b64 ; SI: s_and_b64 +; SI: cmp_gt_i32 ; SI: cndmask_b32 ; SI: cndmask_b32 ; SI: cmp_lt_i32 diff --git a/test/CodeGen/R600/local-memory-two-objects.ll b/test/CodeGen/R600/local-memory-two-objects.ll index caa4b19..06a8b12 100644 --- a/test/CodeGen/R600/local-memory-two-objects.ll +++ b/test/CodeGen/R600/local-memory-two-objects.ll @@ -32,8 +32,8 @@ ; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]] ; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}} ; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]] -; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] -; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]] offset:16 +; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16 +; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]] define void @local_memory_two_objects(i32 addrspace(1)* %out) { entry: diff --git a/test/CodeGen/R600/operand-folding.ll b/test/CodeGen/R600/operand-folding.ll index 4bf748e..816755e 100644 --- a/test/CodeGen/R600/operand-folding.ll +++ b/test/CodeGen/R600/operand-folding.ll @@ -19,7 +19,7 @@ endif: } ; CHECK-LABEL: {{^}}fold_imm: -; CHECK v_or_i32_e32 v{{[0-9]+}}, 5 +; CHECK: v_or_b32_e32 v{{[0-9]+}}, 5 define void @fold_imm(i32 addrspace(1)* %out, i32 %cmp) { entry: %fold = add i32 3, 2 diff --git a/test/CodeGen/R600/si-annotate-cf.ll b/test/CodeGen/R600/si-annotate-cf.ll new file mode 100644 index 0000000..3f30988 --- /dev/null +++ b/test/CodeGen/R600/si-annotate-cf.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}break_inserted_outside_of_loop: + +; SI: [[LOOP_LABEL:[A-Z0-9]+]]: +; Lowered break instructin: +; SI: s_or_b64 +; Lowered Loop instruction: +; SI: s_andn2_b64 +; s_cbranch_execnz [[LOOP_LABEL]] +; SI: s_endpgm +define void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a, i32 %b) { +main_body: + %0 = and i32 %a, %b + %1 = trunc i32 %0 to i1 + br label %ENDIF + +ENDLOOP: + store i32 0, i32 addrspace(1)* %out + ret void + +ENDIF: + br i1 %1, label %ENDLOOP, label %ENDIF +} diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll index efb1de2..82d88eb 100644 --- a/test/CodeGen/R600/unaligned-load-store.ll +++ b/test/CodeGen/R600/unaligned-load-store.ll @@ -195,7 +195,7 @@ define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, ; SI-LABEL: {{^}}load_lds_i64_align_4_with_split_offset: ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits -; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:0 offset1:1 +; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1 ; SI: s_endpgm define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)* @@ -243,7 +243,7 @@ define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 { ; SI-LABEL: {{^}}store_lds_i64_align_4_with_split_offset: ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits -; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1 +; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1 ; SI: s_endpgm define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 { %ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)* |