diff options
Diffstat (limited to 'test/CodeGen/R600')
321 files changed, 10798 insertions, 3360 deletions
diff --git a/test/CodeGen/R600/128bit-kernel-args.ll b/test/CodeGen/R600/128bit-kernel-args.ll index d9b0ff2..557d86a 100644 --- a/test/CodeGen/R600/128bit-kernel-args.ll +++ b/test/CodeGen/R600/128bit-kernel-args.ll @@ -1,26 +1,27 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=SI -; R600-CHECK: {{^}}v4i32_kernel_arg: -; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y -; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z -; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W -; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X -; SI-CHECK: {{^}}v4i32_kernel_arg: -; SI-CHECK: buffer_store_dwordx4 +; R600: {{^}}v4i32_kernel_arg: +; R600-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y +; R600-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z +; R600-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W +; R600-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X +; SI: {{^}}v4i32_kernel_arg: +; SI: buffer_store_dwordx4 define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32> %in) { entry: store <4 x i32> %in, <4 x i32> addrspace(1)* %out ret void } -; R600-CHECK: {{^}}v4f32_kernel_arg: -; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y -; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z -; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W -; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X -; SI-CHECK: {{^}}v4f32_kernel_arg: -; SI-CHECK: buffer_store_dwordx4 +; R600: {{^}}v4f32_kernel_arg: +; R600-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y +; R600-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z +; R600-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W +; R600-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X +; SI: {{^}}v4f32_kernel_arg: +; SI: buffer_store_dwordx4 define void @v4f32_kernel_arg(<4 x float> addrspace(1)* %out, <4 x float> %in) { entry: store <4 x float> %in, <4 x float> addrspace(1)* %out diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll index 4ff2762..6aca826 100644 --- a/test/CodeGen/R600/32-bit-local-address-space.ll +++ b/test/CodeGen/R600/32-bit-local-address-space.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; On Southern Islands GPUs the local address space(3) uses 32-bit pointers and ; the global address space(1) uses 64-bit pointers. These tests check to make sure @@ -130,7 +131,7 @@ define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %v ; FUNC-LABEL: {{^}}local_address_gep_large_const_offset_store: ; SI: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004 ; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] -; SI: ds_write_b32 [[VPTR]], v{{[0-9]+}} [M0]{{$}} +; SI: ds_write_b32 [[VPTR]], v{{[0-9]+$}} define void @local_address_gep_large_const_offset_store(i32 addrspace(3)* %out, i32 %val) { %gep = getelementptr i32 addrspace(3)* %out, i32 16385 store i32 %val, i32 addrspace(3)* %gep, align 4 diff --git a/test/CodeGen/R600/64bit-kernel-args.ll b/test/CodeGen/R600/64bit-kernel-args.ll index cf4e055..2e08901 100644 --- a/test/CodeGen/R600/64bit-kernel-args.ll +++ b/test/CodeGen/R600/64bit-kernel-args.ll @@ -1,9 +1,12 @@ -; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=SI +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=VI -; SI-CHECK: {{^}}f64_kernel_arg: -; SI-CHECK-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9 -; SI-CHECK-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb -; SI-CHECK: buffer_store_dwordx2 +; GCN: {{^}}f64_kernel_arg: +; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9 +; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb +; VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x24 +; VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x2c +; GCN: buffer_store_dwordx2 define void @f64_kernel_arg(double addrspace(1)* %out, double %in) { entry: store double %in, double addrspace(1)* %out diff --git a/test/CodeGen/R600/add-debug.ll b/test/CodeGen/R600/add-debug.ll index 166e0f6..a83c689 100644 --- a/test/CodeGen/R600/add-debug.ll +++ b/test/CodeGen/R600/add-debug.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=r600 -mcpu=tahiti -debug +; RUN: llc < %s -march=amdgcn -mcpu=tahiti -debug +; RUN: llc < %s -march=amdgcn -mcpu=tonga -debug ; REQUIRES: asserts ; Check that SelectionDAGDumper does not crash on int_SI_if. diff --git a/test/CodeGen/R600/add.ll b/test/CodeGen/R600/add.ll index 767a642..3a8b97c 100644 --- a/test/CodeGen/R600/add.ll +++ b/test/CodeGen/R600/add.ll @@ -1,12 +1,13 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK --check-prefix=FUNC %s -; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s ;FUNC-LABEL: {{^}}test1: -;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: v_add_i32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}} -;SI-CHECK-NOT: [[REG]] -;SI-CHECK: buffer_store_dword [[REG]], +;SI: v_add_i32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}} +;SI-NOT: [[REG]] +;SI: buffer_store_dword [[REG]], define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1 %a = load i32 addrspace(1)* %in @@ -17,11 +18,11 @@ define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { } ;FUNC-LABEL: {{^}}test2: -;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 @@ -33,15 +34,15 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { } ;FUNC-LABEL: {{^}}test4: -;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 @@ -53,22 +54,22 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { } ; FUNC-LABEL: {{^}}test8: -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 define void @test8(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) { entry: %0 = add <8 x i32> %a, %b @@ -77,38 +78,38 @@ entry: } ; FUNC-LABEL: {{^}}test16: -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; EG-CHECK: ADD_INT -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 -; SI-CHECK: s_add_i32 +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; EG: ADD_INT +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 define void @test16(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) { entry: %0 = add <16 x i32> %a, %b @@ -117,8 +118,8 @@ entry: } ; FUNC-LABEL: {{^}}add64: -; SI-CHECK: s_add_u32 -; SI-CHECK: s_addc_u32 +; SI: s_add_u32 +; SI: s_addc_u32 define void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) { entry: %0 = add i64 %a, %b @@ -132,7 +133,7 @@ entry: ; to a VGPR before doing the add. ; FUNC-LABEL: {{^}}add64_sgpr_vgpr: -; SI-CHECK-NOT: v_addc_u32_e32 s +; SI-NOT: v_addc_u32_e32 s define void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) { entry: %0 = load i64 addrspace(1)* %in @@ -143,8 +144,8 @@ entry: ; Test i64 add inside a branch. ; FUNC-LABEL: {{^}}add64_in_branch: -; SI-CHECK: s_add_u32 -; SI-CHECK: s_addc_u32 +; SI: s_add_u32 +; SI: s_addc_u32 define void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) { entry: %0 = icmp eq i64 %a, 0 diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/R600/add_i64.ll index 47ecf6d..1769409 100644 --- a/test/CodeGen/R600/add_i64.ll +++ b/test/CodeGen/R600/add_i64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s declare i32 @llvm.r600.read.tidig.x() readnone diff --git a/test/CodeGen/R600/address-space.ll b/test/CodeGen/R600/address-space.ll index d04afe6..74ea9f0 100644 --- a/test/CodeGen/R600/address-space.ll +++ b/test/CodeGen/R600/address-space.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; Test that codegenprepare understands address space sizes @@ -9,9 +10,10 @@ ; CHECK-LABEL: {{^}}do_as_ptr_calcs: ; CHECK: s_load_dword [[SREG1:s[0-9]+]], +; CHECK: v_mov_b32_e32 [[VREG2:v[0-9]+]], [[SREG1]] ; CHECK: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]] ; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG1]] offset:12 -; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:20 +; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG2]] offset:20 define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind { entry: %x = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0 diff --git a/test/CodeGen/R600/and.ll b/test/CodeGen/R600/and.ll index 9a76fce..bb7cba3 100644 --- a/test/CodeGen/R600/and.ll +++ b/test/CodeGen/R600/and.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}test2: ; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} @@ -63,8 +64,8 @@ define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addr ret void } -; FUNC-LABEL: {{^}}v_and_constant_i32: -; SI: v_and_b32 +; FUNC-LABEL: {{^}}v_and_constant_i32 +; SI: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, v{{[0-9]+}} define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) { %a = load i32 addrspace(1)* %aptr, align 4 %and = and i32 %a, 1234567 @@ -72,7 +73,25 @@ define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) ret void } -; FUNC-LABEL: {{^}}s_and_i64: +; FUNC-LABEL: {{^}}v_and_inline_imm_64_i32 +; SI: v_and_b32_e32 v{{[0-9]+}}, 64, v{{[0-9]+}} +define void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) { + %a = load i32 addrspace(1)* %aptr, align 4 + %and = and i32 %a, 64 + store i32 %and, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_and_inline_imm_neg_16_i32 +; SI: v_and_b32_e32 v{{[0-9]+}}, -16, v{{[0-9]+}} +define void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) { + %a = load i32 addrspace(1)* %aptr, align 4 + %and = and i32 %a, -16 + store i32 %and, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}s_and_i64 ; SI: s_and_b64 define void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { %and = and i64 %a, %b @@ -89,8 +108,8 @@ define void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) { ret void } -; FUNC-LABEL: {{^}}s_and_constant_i64: -; SI: s_and_b64 +; FUNC-LABEL: {{^}}s_and_constant_i64 +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) { %and = and i64 %a, 281474976710655 store i64 %and, i64 addrspace(1)* %out, align 8 @@ -149,10 +168,129 @@ define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %apt ret void } -; FUNC-LABEL: {{^}}s_and_inline_imm_i64: +; FUNC-LABEL: {{^}}s_and_inline_imm_64_i64 ; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 64 -define void @s_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { +define void @s_and_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { %and = and i64 %a, 64 store i64 %and, i64 addrspace(1)* %out, align 8 ret void } + +; FUNC-LABEL: {{^}}s_and_inline_imm_1_i64 +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 1 +define void @s_and_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 1 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_and_inline_imm_1.0_i64 +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 1.0 +define void @s_and_inline_imm_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 4607182418800017408 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_and_inline_imm_neg_1.0_i64 +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -1.0 +define void @s_and_inline_imm_neg_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 13830554455654793216 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_and_inline_imm_0.5_i64 +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0.5 +define void @s_and_inline_imm_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 4602678819172646912 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_and_inline_imm_neg_0.5_i64 +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -0.5 +define void @s_and_inline_imm_neg_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 13826050856027422720 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_and_inline_imm_2.0_i64 +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 2.0 +define void @s_and_inline_imm_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 4611686018427387904 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_and_inline_imm_neg_2.0_i64 +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -2.0 +define void @s_and_inline_imm_neg_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 13835058055282163712 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_and_inline_imm_4.0_i64 +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 4.0 +define void @s_and_inline_imm_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 4616189618054758400 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_and_inline_imm_neg_4.0_i64 +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -4.0 +define void @s_and_inline_imm_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 13839561654909534208 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + + +; Test with the 64-bit integer bitpattern for a 32-bit float in the +; low 32-bits, which is not a valid 64-bit inline immmediate. + +; FUNC-LABEL: {{^}}s_and_inline_imm_f32_4.0_i64 +; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 4.0 +; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0{{$}} +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}} +define void @s_and_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 1082130432 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FIXME: Copy of -1 register +; FUNC-LABEL: {{^}}s_and_inline_imm_f32_neg_4.0_i64 +; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], -4.0 +; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], -1{{$}} +; SI-DAG: s_mov_b32 s[[K_HI_COPY:[0-9]+]], s[[K_HI]] +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI_COPY]]{{\]}} +define void @s_and_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, -1065353216 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; Shift into upper 32-bits +; FUNC-LABEL: {{^}}s_and_inline_high_imm_f32_4.0_i64 +; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 4.0 +; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}} +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}} +define void @s_and_inline_high_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 4647714815446351872 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_and_inline_high_imm_f32_neg_4.0_i64 +; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], -4.0 +; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}} +; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}} +define void @s_and_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 13871086852301127680 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/anyext.ll b/test/CodeGen/R600/anyext.ll index 23fdcbb..48d8f31 100644 --- a/test/CodeGen/R600/anyext.ll +++ b/test/CodeGen/R600/anyext.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ; CHECK-LABEL: {{^}}anyext_i1_i32: ; CHECK: v_cndmask_b32_e64 diff --git a/test/CodeGen/R600/array-ptr-calc-i32.ll b/test/CodeGen/R600/array-ptr-calc-i32.ll index 84d3540..33a8aee 100644 --- a/test/CodeGen/R600/array-ptr-calc-i32.ll +++ b/test/CodeGen/R600/array-ptr-calc-i32.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s -; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s declare i32 @llvm.SI.tid() nounwind readnone declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate diff --git a/test/CodeGen/R600/array-ptr-calc-i64.ll b/test/CodeGen/R600/array-ptr-calc-i64.ll index 75f6394..32e657d 100644 --- a/test/CodeGen/R600/array-ptr-calc-i64.ll +++ b/test/CodeGen/R600/array-ptr-calc-i64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.SI.tid() readnone diff --git a/test/CodeGen/R600/atomic_cmp_swap_local.ll b/test/CodeGen/R600/atomic_cmp_swap_local.ll index 223f4d3..6c76ad7 100644 --- a/test/CodeGen/R600/atomic_cmp_swap_local.ll +++ b/test/CodeGen/R600/atomic_cmp_swap_local.ll @@ -1,14 +1,17 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SICI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SICI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset: -; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb -; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc -; SI-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7 -; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]] -; SI: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0] -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7 +; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SICI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc +; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]] +; GCN: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 +; GCN: s_endpgm define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic @@ -18,17 +21,18 @@ define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrs } ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i64_offset: -; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb -; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd -; SI: s_mov_b64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7 -; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]] -; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]] -; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]] -; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]] -; SI: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0] -; SI: buffer_store_dwordx2 [[RESULT]], -; SI: s_endpgm +; GCN-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7 +; GCN-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0 +; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SICI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd +; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34 +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; GCN-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]] +; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]] +; GCN: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 +; GCN: buffer_store_dwordx2 [[RESULT]], +; GCN: s_endpgm define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic @@ -39,8 +43,8 @@ define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrs ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_bad_si_offset ; SI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; CI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0] -; SI: s_endpgm +; CIVI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind { %sub = sub i32 %a, %b %add = add i32 %sub, 4 @@ -52,13 +56,15 @@ define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i3 } ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i32_offset: -; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 -; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa -; SI-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7 -; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]] -; SI: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0] -; SI: s_endpgm +; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 +; SICI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa +; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24 +; VI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x28 +; GCN-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7 +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]] +; GCN: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 +; GCN: s_endpgm define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %swap) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic @@ -67,16 +73,17 @@ define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %sw } ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i64_offset: -; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 -; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb -; SI: s_mov_b64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7 -; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]] -; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]] -; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]] -; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]] -; SI: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0] -; SI: s_endpgm +; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 +; SICI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24 +; VI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; GCN-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7 +; GCN-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0 +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; GCN-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]] +; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]] +; GCN: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 +; GCN: s_endpgm define void @lds_atomic_cmpxchg_noret_i64_offset(i64 addrspace(3)* %ptr, i64 %swap) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic diff --git a/test/CodeGen/R600/atomic_load_add.ll b/test/CodeGen/R600/atomic_load_add.ll index f0eff21..5fe05f2 100644 --- a/test/CodeGen/R600/atomic_load_add.ll +++ b/test/CodeGen/R600/atomic_load_add.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}atomic_add_local: diff --git a/test/CodeGen/R600/atomic_load_sub.ll b/test/CodeGen/R600/atomic_load_sub.ll index 61ff296..4072283 100644 --- a/test/CodeGen/R600/atomic_load_sub.ll +++ b/test/CodeGen/R600/atomic_load_sub.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}atomic_sub_local: ; R600: LDS_SUB * diff --git a/test/CodeGen/R600/basic-branch.ll b/test/CodeGen/R600/basic-branch.ll index 073ab79..abdc4af 100644 --- a/test/CodeGen/R600/basic-branch.ll +++ b/test/CodeGen/R600/basic-branch.ll @@ -1,5 +1,6 @@ ; XFAIL: * -; RUN: llc -O0 -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -O0 -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -O0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}test_branch( define void @test_branch(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) nounwind { diff --git a/test/CodeGen/R600/basic-loop.ll b/test/CodeGen/R600/basic-loop.ll index 3cd609135..f0263ca 100644 --- a/test/CodeGen/R600/basic-loop.ll +++ b/test/CodeGen/R600/basic-loop.ll @@ -1,5 +1,5 @@ -; XFAIL: * -; RUN: llc -O0 -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s +; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s +; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s ; CHECK-LABEL: {{^}}test_loop: define void @test_loop(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) nounwind { diff --git a/test/CodeGen/R600/bfi_int.ll b/test/CodeGen/R600/bfi_int.ll index 2a0bb37..0334934 100644 --- a/test/CodeGen/R600/bfi_int.ll +++ b/test/CodeGen/R600/bfi_int.ll @@ -1,13 +1,14 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s ; BFI_INT Definition pattern from ISA docs ; (y & x) | (z & ~x) ; -; R600-CHECK: {{^}}bfi_def: -; R600-CHECK: BFI_INT -; SI-CHECK: @bfi_def -; SI-CHECK: v_bfi_b32 +; R600: {{^}}bfi_def: +; R600: BFI_INT +; SI: @bfi_def +; SI: v_bfi_b32 define void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { entry: %0 = xor i32 %x, -1 @@ -20,10 +21,10 @@ entry: ; SHA-256 Ch function ; z ^ (x & (y ^ z)) -; R600-CHECK: {{^}}bfi_sha256_ch: -; R600-CHECK: BFI_INT -; SI-CHECK: @bfi_sha256_ch -; SI-CHECK: v_bfi_b32 +; R600: {{^}}bfi_sha256_ch: +; R600: BFI_INT +; SI: @bfi_sha256_ch +; SI: v_bfi_b32 define void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { entry: %0 = xor i32 %y, %z @@ -35,11 +36,11 @@ entry: ; SHA-256 Ma function ; ((x & z) | (y & (x | z))) -; R600-CHECK: {{^}}bfi_sha256_ma: -; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W -; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W -; SI-CHECK: v_xor_b32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}} -; SI-CHECK: v_bfi_b32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}} +; R600: {{^}}bfi_sha256_ma: +; R600: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W +; R600: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W +; SI: v_xor_b32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}} +; SI: v_bfi_b32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}} define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { entry: diff --git a/test/CodeGen/R600/bitcast.ll b/test/CodeGen/R600/bitcast.ll index 725d5ba..1ba64af 100644 --- a/test/CodeGen/R600/bitcast.ll +++ b/test/CodeGen/R600/bitcast.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; This test just checks that the compiler doesn't crash. diff --git a/test/CodeGen/R600/bswap.ll b/test/CodeGen/R600/bswap.ll index 1c5a0c6..e93543d 100644 --- a/test/CodeGen/R600/bswap.ll +++ b/test/CodeGen/R600/bswap.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.bswap.i32(i32) nounwind readnone declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) nounwind readnone diff --git a/test/CodeGen/R600/build_vector.ll b/test/CodeGen/R600/build_vector.ll index 9137eee..65eacf5 100644 --- a/test/CodeGen/R600/build_vector.ll +++ b/test/CodeGen/R600/build_vector.ll @@ -1,32 +1,33 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=SI -; R600-CHECK: {{^}}build_vector2: -; R600-CHECK: MOV -; R600-CHECK: MOV -; R600-CHECK-NOT: MOV -; SI-CHECK: {{^}}build_vector2: -; SI-CHECK-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5 -; SI-CHECK-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6 -; SI-CHECK: buffer_store_dwordx2 v{{\[}}[[X]]:[[Y]]{{\]}} +; R600: {{^}}build_vector2: +; R600: MOV +; R600: MOV +; R600-NOT: MOV +; SI: {{^}}build_vector2: +; SI-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5 +; SI-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6 +; SI: buffer_store_dwordx2 v{{\[}}[[X]]:[[Y]]{{\]}} define void @build_vector2 (<2 x i32> addrspace(1)* %out) { entry: store <2 x i32> <i32 5, i32 6>, <2 x i32> addrspace(1)* %out ret void } -; R600-CHECK: {{^}}build_vector4: -; R600-CHECK: MOV -; R600-CHECK: MOV -; R600-CHECK: MOV -; R600-CHECK: MOV -; R600-CHECK-NOT: MOV -; SI-CHECK: {{^}}build_vector4: -; SI-CHECK-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5 -; SI-CHECK-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6 -; SI-CHECK-DAG: v_mov_b32_e32 v[[Z:[0-9]]], 7 -; SI-CHECK-DAG: v_mov_b32_e32 v[[W:[0-9]]], 8 -; SI-CHECK: buffer_store_dwordx4 v{{\[}}[[X]]:[[W]]{{\]}} +; R600: {{^}}build_vector4: +; R600: MOV +; R600: MOV +; R600: MOV +; R600: MOV +; R600-NOT: MOV +; SI: {{^}}build_vector4: +; SI-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5 +; SI-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6 +; SI-DAG: v_mov_b32_e32 v[[Z:[0-9]]], 7 +; SI-DAG: v_mov_b32_e32 v[[W:[0-9]]], 8 +; SI: buffer_store_dwordx4 v{{\[}}[[X]]:[[W]]{{\]}} define void @build_vector4 (<4 x i32> addrspace(1)* %out) { entry: store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> addrspace(1)* %out diff --git a/test/CodeGen/R600/call.ll b/test/CodeGen/R600/call.ll index 1448f04..6de51f1 100644 --- a/test/CodeGen/R600/call.ll +++ b/test/CodeGen/R600/call.ll @@ -1,4 +1,5 @@ -; RUN: not llc -march=r600 -mcpu=SI -verify-machineinstrs< %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s 2>&1 | FileCheck %s ; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s ; CHECK: error: unsupported call to function external_function in test_call_external diff --git a/test/CodeGen/R600/call_fs.ll b/test/CodeGen/R600/call_fs.ll index 7df2240..db2cb6e 100644 --- a/test/CodeGen/R600/call_fs.ll +++ b/test/CodeGen/R600/call_fs.ll @@ -1,13 +1,13 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood -show-mc-encoding -o - | FileCheck --check-prefix=EG-CHECK %s -; RUN: llc < %s -march=r600 -mcpu=rv710 -show-mc-encoding -o - | FileCheck --check-prefix=R600-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=redwood -show-mc-encoding -o - | FileCheck --check-prefix=EG %s +; RUN: llc < %s -march=r600 -mcpu=rv710 -show-mc-encoding -o - | FileCheck --check-prefix=R600 %s -; EG-CHECK: {{^}}call_fs: -; EG-CHECK: .long 257 -; EG-CHECK: CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0xc0,0x84] -; R600-CHECK: {{^}}call_fs: -; R600-CHECK: .long 257 -; R600-CHECK:CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x89] +; EG: {{^}}call_fs: +; EG: .long 257 +; EG: CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0xc0,0x84] +; R600: {{^}}call_fs: +; R600: .long 257 +; R600:CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x89] define void @call_fs() #0 { diff --git a/test/CodeGen/R600/cf_end.ll b/test/CodeGen/R600/cf_end.ll index 138004d..c74ee22 100644 --- a/test/CodeGen/R600/cf_end.ll +++ b/test/CodeGen/R600/cf_end.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood --show-mc-encoding | FileCheck --check-prefix=EG-CHECK %s -; RUN: llc < %s -march=r600 -mcpu=caicos --show-mc-encoding | FileCheck --check-prefix=EG-CHECK %s -; RUN: llc < %s -march=r600 -mcpu=cayman --show-mc-encoding | FileCheck --check-prefix=CM-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=redwood --show-mc-encoding | FileCheck --check-prefix=EG %s +; RUN: llc < %s -march=r600 -mcpu=caicos --show-mc-encoding | FileCheck --check-prefix=EG %s +; RUN: llc < %s -march=r600 -mcpu=cayman --show-mc-encoding | FileCheck --check-prefix=CM %s -; EG-CHECK: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x80] -; CM-CHECK: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x88] +; EG: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x80] +; CM: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x88] define void @eop() { ret void } diff --git a/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll b/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll index b42b904..e16a397 100644 --- a/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll +++ b/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll @@ -1,5 +1,5 @@ ; RUN: opt -codegenprepare -S -o - %s | FileCheck --check-prefix=OPT %s -; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-LLC %s +; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-LLC %s target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "r600--" diff --git a/test/CodeGen/R600/commute_modifiers.ll b/test/CodeGen/R600/commute_modifiers.ll index 30c8067..6fddb6d 100644 --- a/test/CodeGen/R600/commute_modifiers.ll +++ b/test/CodeGen/R600/commute_modifiers.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() #1 declare float @llvm.fabs.f32(float) #1 @@ -65,7 +65,7 @@ define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace( ; FUNC-LABEL: @commute_add_fabs_f32 ; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_add_f32_e64 [[REG:v[0-9]+]], [[X]], |[[Y]]| ; SI-NEXT: buffer_store_dword [[REG]] define void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { @@ -82,7 +82,7 @@ define void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)* ; FUNC-LABEL: @commute_mul_fneg_f32 ; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -[[Y]] ; SI-NEXT: buffer_store_dword [[REG]] define void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { @@ -99,7 +99,7 @@ define void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)* ; FUNC-LABEL: @commute_mul_fabs_fneg_f32 ; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -|[[Y]]| ; SI-NEXT: buffer_store_dword [[REG]] define void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { @@ -118,7 +118,7 @@ define void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace ; There's no reason to commute this. ; FUNC-LABEL: @commute_mul_fabs_x_fabs_y_f32 ; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, |[[Y]]| ; SI-NEXT: buffer_store_dword [[REG]] define void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { @@ -136,7 +136,7 @@ define void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrs ; FUNC-LABEL: @commute_mul_fabs_x_fneg_fabs_y_f32 ; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, -|[[Y]]| ; SI-NEXT: buffer_store_dword [[REG]] define void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { @@ -158,7 +158,7 @@ define void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float ; SI-LABEL: {{^}}fma_a_2.0_neg_b_f32 ; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], |[[R2]]| ; SI: buffer_store_dword [[RESULT]] define void @fma_a_2.0_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { diff --git a/test/CodeGen/R600/concat_vectors.ll b/test/CodeGen/R600/concat_vectors.ll index 19992eb..6b3fae3 100644 --- a/test/CodeGen/R600/concat_vectors.ll +++ b/test/CodeGen/R600/concat_vectors.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}test_concat_v1i32: ; 0x80f000 is the high 32 bits of the resource descriptor used by MUBUF diff --git a/test/CodeGen/R600/copy-illegal-type.ll b/test/CodeGen/R600/copy-illegal-type.ll index 66ea88e..56c43d2 100644 --- a/test/CodeGen/R600/copy-illegal-type.ll +++ b/test/CodeGen/R600/copy-illegal-type.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}test_copy_v4i8: ; SI: buffer_load_dword [[REG:v[0-9]+]] diff --git a/test/CodeGen/R600/copy-to-reg.ll b/test/CodeGen/R600/copy-to-reg.ll index f90ee78..9c1de73 100644 --- a/test/CodeGen/R600/copy-to-reg.ll +++ b/test/CodeGen/R600/copy-to-reg.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s ; Test that CopyToReg instructions don't have non-register operands prior ; to being emitted. diff --git a/test/CodeGen/R600/ctlz_zero_undef.ll b/test/CodeGen/R600/ctlz_zero_undef.ll index f699127..1a4317b 100644 --- a/test/CodeGen/R600/ctlz_zero_undef.ll +++ b/test/CodeGen/R600/ctlz_zero_undef.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll index 5cfdaef..6f7d92b 100644 --- a/test/CodeGen/R600/ctpop.ll +++ b/test/CodeGen/R600/ctpop.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.ctpop.i32(i32) nounwind readnone @@ -8,11 +9,11 @@ declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone ; FUNC-LABEL: {{^}}s_ctpop_i32: -; SI: s_load_dword [[SVAL:s[0-9]+]], -; SI: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]] -; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] -; SI: buffer_store_dword [[VRESULT]], -; SI: s_endpgm +; GCN: s_load_dword [[SVAL:s[0-9]+]], +; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]] +; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] +; GCN: buffer_store_dword [[VRESULT]], +; GCN: s_endpgm ; EG: BCNT_INT define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { @@ -23,11 +24,10 @@ define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { ; XXX - Why 0 in register? ; FUNC-LABEL: {{^}}v_ctpop_i32: -; SI: buffer_load_dword [[VAL:v[0-9]+]], -; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0 -; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; GCN: buffer_load_dword [[VAL:v[0-9]+]], +; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0 +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm ; EG: BCNT_INT define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { @@ -38,13 +38,13 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali } ; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32: -; SI: buffer_load_dword [[VAL0:v[0-9]+]], -; SI: buffer_load_dword [[VAL1:v[0-9]+]], -; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0 -; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]] -; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; GCN: buffer_load_dword [[VAL1:v[0-9]+]], +; GCN: buffer_load_dword [[VAL0:v[0-9]+]], +; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0 +; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] +; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm ; EG: BCNT_INT ; EG: BCNT_INT @@ -59,11 +59,11 @@ define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace } ; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32: -; SI: buffer_load_dword [[VAL0:v[0-9]+]], -; SI-NEXT: s_waitcnt -; SI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}} -; SI-NEXT: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; GCN: buffer_load_dword [[VAL0:v[0-9]+]], +; GCN-NEXT: s_waitcnt +; GCN-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}} +; GCN-NEXT: buffer_store_dword [[RESULT]], +; GCN: s_endpgm define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind { %val0 = load i32 addrspace(1)* %in0, align 4 %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone @@ -73,9 +73,9 @@ define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace( } ; FUNC-LABEL: {{^}}v_ctpop_v2i32: -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: s_endpgm +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: s_endpgm ; EG: BCNT_INT ; EG: BCNT_INT @@ -87,11 +87,11 @@ define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrs } ; FUNC-LABEL: {{^}}v_ctpop_v4i32: -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: s_endpgm +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: s_endpgm ; EG: BCNT_INT ; EG: BCNT_INT @@ -105,15 +105,15 @@ define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrs } ; FUNC-LABEL: {{^}}v_ctpop_v8i32: -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: s_endpgm +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: s_endpgm ; EG: BCNT_INT ; EG: BCNT_INT @@ -131,23 +131,23 @@ define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrs } ; FUNC-LABEL: {{^}}v_ctpop_v16i32: -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: s_endpgm +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: s_endpgm ; EG: BCNT_INT ; EG: BCNT_INT @@ -173,10 +173,10 @@ define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> ad } ; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant: -; SI: buffer_load_dword [[VAL:v[0-9]+]], -; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; GCN: buffer_load_dword [[VAL:v[0-9]+]], +; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm ; EG: BCNT_INT define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { @@ -188,10 +188,10 @@ define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 } ; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv: -; SI: buffer_load_dword [[VAL:v[0-9]+]], -; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; GCN: buffer_load_dword [[VAL:v[0-9]+]], +; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm ; EG: BCNT_INT define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { @@ -203,11 +203,12 @@ define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, } ; FUNC-LABEL: {{^}}v_ctpop_i32_add_literal: -; SI: buffer_load_dword [[VAL:v[0-9]+]], -; SI: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f +; GCN: buffer_load_dword [[VAL:v[0-9]+]], +; GCN: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { %val = load i32 addrspace(1)* %in, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone @@ -217,11 +218,11 @@ define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspa } ; FUNC-LABEL: {{^}}v_ctpop_i32_add_var: -; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]], -; SI-DAG: s_load_dword [[VAR:s[0-9]+]], -; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], +; GCN-DAG: s_load_dword [[VAR:s[0-9]+]], +; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm ; EG: BCNT_INT define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { @@ -233,11 +234,11 @@ define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1 } ; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv: -; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]], -; SI-DAG: s_load_dword [[VAR:s[0-9]+]], -; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], +; GCN-DAG: s_load_dword [[VAR:s[0-9]+]], +; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm ; EG: BCNT_INT define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { @@ -249,11 +250,12 @@ define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspa } ; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv: -; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}} -; SI-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:0x10 +; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}} +; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:16 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm ; EG: BCNT_INT define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind { @@ -271,10 +273,11 @@ define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrsp ; FUNC-LABEL: {{^}}ctpop_i32_in_br: ; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd -; SI: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]] -; SI: v_mov_b32_e32 [[RESULT]], [[SRESULT]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x34 +; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]] +; GCN: v_mov_b32_e32 [[RESULT]], [[SRESULT]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm ; EG: BCNT_INT define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, i32 %cond) { entry: diff --git a/test/CodeGen/R600/ctpop64.ll b/test/CodeGen/R600/ctpop64.ll index 2efac8f..8bcd818 100644 --- a/test/CodeGen/R600/ctpop64.ll +++ b/test/CodeGen/R600/ctpop64.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s declare i64 @llvm.ctpop.i64(i64) nounwind readnone declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone @@ -8,10 +9,11 @@ declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>) nounwind readnone ; FUNC-LABEL: {{^}}s_ctpop_i64: ; SI: s_load_dwordx2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb -; SI: s_bcnt1_i32_b64 [[SRESULT:s[0-9]+]], [[SVAL]] -; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] -; SI: buffer_store_dword [[VRESULT]], -; SI: s_endpgm +; VI: s_load_dwordx2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; GCN: s_bcnt1_i32_b64 [[SRESULT:s[0-9]+]], [[SVAL]] +; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] +; GCN: buffer_store_dword [[VRESULT]], +; GCN: s_endpgm define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind { %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone %truncctpop = trunc i64 %ctpop to i32 @@ -20,12 +22,12 @@ define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind { } ; FUNC-LABEL: {{^}}v_ctpop_i64: -; SI: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, -; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0 -; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], [[VZERO]] +; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, +; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0 ; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %val = load i64 addrspace(1)* %in, align 8 %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone @@ -35,9 +37,9 @@ define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noali } ; FUNC-LABEL: {{^}}s_ctpop_v2i64: -; SI: s_bcnt1_i32_b64 -; SI: s_bcnt1_i32_b64 -; SI: s_endpgm +; GCN: s_bcnt1_i32_b64 +; GCN: s_bcnt1_i32_b64 +; GCN: s_endpgm define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val) nounwind { %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone %truncctpop = trunc <2 x i64> %ctpop to <2 x i32> @@ -46,11 +48,11 @@ define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val) } ; FUNC-LABEL: {{^}}s_ctpop_v4i64: -; SI: s_bcnt1_i32_b64 -; SI: s_bcnt1_i32_b64 -; SI: s_bcnt1_i32_b64 -; SI: s_bcnt1_i32_b64 -; SI: s_endpgm +; GCN: s_bcnt1_i32_b64 +; GCN: s_bcnt1_i32_b64 +; GCN: s_bcnt1_i32_b64 +; GCN: s_bcnt1_i32_b64 +; GCN: s_endpgm define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val) nounwind { %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone %truncctpop = trunc <4 x i64> %ctpop to <4 x i32> @@ -59,11 +61,11 @@ define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val) } ; FUNC-LABEL: {{^}}v_ctpop_v2i64: -; SI: v_bcnt_u32_b32 -; SI: v_bcnt_u32_b32 -; SI: v_bcnt_u32_b32 -; SI: v_bcnt_u32_b32 -; SI: s_endpgm +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: s_endpgm define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind { %val = load <2 x i64> addrspace(1)* %in, align 16 %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone @@ -73,15 +75,15 @@ define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrs } ; FUNC-LABEL: {{^}}v_ctpop_v4i64: -; SI: v_bcnt_u32_b32 -; SI: v_bcnt_u32_b32 -; SI: v_bcnt_u32_b32 -; SI: v_bcnt_u32_b32 -; SI: v_bcnt_u32_b32 -; SI: v_bcnt_u32_b32 -; SI: v_bcnt_u32_b32 -; SI: v_bcnt_u32_b32 -; SI: s_endpgm +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: s_endpgm define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind { %val = load <4 x i64> addrspace(1)* %in, align 32 %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone @@ -95,11 +97,12 @@ define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrs ; FUNC-LABEL: {{^}}ctpop_i64_in_br: ; SI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd -; SI: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}} -; SI: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]] -; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]] -; SI: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}} -; SI: s_endpgm +; VI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x34 +; GCN: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}} +; GCN: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]] +; GCN: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]] +; GCN: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}} +; GCN: s_endpgm define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) { entry: %tmp0 = icmp eq i32 %cond, 0 diff --git a/test/CodeGen/R600/cttz_zero_undef.ll b/test/CodeGen/R600/cttz_zero_undef.ll index c4b1463..d9d284c 100644 --- a/test/CodeGen/R600/cttz_zero_undef.ll +++ b/test/CodeGen/R600/cttz_zero_undef.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone diff --git a/test/CodeGen/R600/cvt_f32_ubyte.ll b/test/CodeGen/R600/cvt_f32_ubyte.ll index 0d1db19..4d4bf93 100644 --- a/test/CodeGen/R600/cvt_f32_ubyte.ll +++ b/test/CodeGen/R600/cvt_f32_ubyte.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}load_i8_to_f32: ; SI: buffer_load_ubyte [[LOADREG:v[0-9]+]], @@ -22,7 +23,7 @@ define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* n ; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]] ; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}}, define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind { - %load = load <2 x i8> addrspace(1)* %in, align 1 + %load = load <2 x i8> addrspace(1)* %in, align 2 %cvt = uitofp <2 x i8> %load to <2 x float> store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16 ret void @@ -36,18 +37,14 @@ define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> ; SI-DAG: v_cvt_f32_ubyte0_e32 ; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}}, define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind { - %load = load <3 x i8> addrspace(1)* %in, align 1 + %load = load <3 x i8> addrspace(1)* %in, align 4 %cvt = uitofp <3 x i8> %load to <3 x float> store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16 ret void } ; SI-LABEL: {{^}}load_v4i8_to_v4f32: -; We can't use buffer_load_dword here, because the load is byte aligned, and -; buffer_load_dword requires dword alignment. -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: v_or_b32_e32 [[LOADREG:v[0-9]+]] +; SI: buffer_load_dword [[LOADREG:v[0-9]+]] ; SI-NOT: bfe ; SI-NOT: lshr ; SI-DAG: v_cvt_f32_ubyte3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]] @@ -56,6 +53,30 @@ define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> ; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]] ; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}}, define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { + %load = load <4 x i8> addrspace(1)* %in, align 4 + %cvt = uitofp <4 x i8> %load to <4 x float> + store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 + ret void +} + +; This should not be adding instructions to shift into the correct +; position in the word for the component. + +; SI-LABEL: {{^}}load_v4i8_to_v4f32_unaligned: +; SI: buffer_load_ubyte [[LOADREG3:v[0-9]+]] +; SI: buffer_load_ubyte [[LOADREG2:v[0-9]+]] +; SI: buffer_load_ubyte [[LOADREG1:v[0-9]+]] +; SI: buffer_load_ubyte [[LOADREG0:v[0-9]+]] +; SI-NOT: v_lshlrev_b32 +; SI-NOT: v_or_b32 + +; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG0]] +; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG1]] +; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG2]] +; SI-DAG: v_cvt_f32_ubyte0_e32 v[[HIRESULT:[0-9]+]], [[LOADREG3]] + +; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}}, +define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { %load = load <4 x i8> addrspace(1)* %in, align 1 %cvt = uitofp <4 x i8> %load to <4 x float> store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 @@ -125,7 +146,7 @@ define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> ; SI: buffer_store_dword ; SI: buffer_store_dword define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind { - %load = load <8 x i8> addrspace(1)* %in, align 1 + %load = load <8 x i8> addrspace(1)* %in, align 8 %cvt = uitofp <8 x i8> %load to <8 x float> store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16 ret void diff --git a/test/CodeGen/R600/cvt_flr_i32_f32.ll b/test/CodeGen/R600/cvt_flr_i32_f32.ll new file mode 100644 index 0000000..2dd3a9f --- /dev/null +++ b/test/CodeGen/R600/cvt_flr_i32_f32.ll @@ -0,0 +1,86 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare float @llvm.fabs.f32(float) #1 +declare float @llvm.floor.f32(float) #1 + +; FUNC-LABEL: {{^}}cvt_flr_i32_f32_0: +; SI-SAFE-NOT: v_cvt_flr_i32_f32 +; SI-NOT: add +; SI-NONAN: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}} +; SI: s_endpgm +define void @cvt_flr_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 { + %floor = call float @llvm.floor.f32(float %x) #1 + %cvt = fptosi float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}cvt_flr_i32_f32_1: +; SI: v_add_f32_e64 [[TMP:v[0-9]+]], 1.0, s{{[0-9]+}} +; SI-SAFE-NOT: v_cvt_flr_i32_f32 +; SI-NONAN: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, [[TMP]] +; SI: s_endpgm +define void @cvt_flr_i32_f32_1(i32 addrspace(1)* %out, float %x) #0 { + %fadd = fadd float %x, 1.0 + %floor = call float @llvm.floor.f32(float %fadd) #1 + %cvt = fptosi float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fabs: +; SI-NOT: add +; SI-SAFE-NOT: v_cvt_flr_i32_f32 +; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}| +; SI: s_endpgm +define void @cvt_flr_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 { + %x.fabs = call float @llvm.fabs.f32(float %x) #1 + %floor = call float @llvm.floor.f32(float %x.fabs) #1 + %cvt = fptosi float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fneg: +; SI-NOT: add +; SI-SAFE-NOT: v_cvt_flr_i32_f32 +; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}} +; SI: s_endpgm +define void @cvt_flr_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 { + %x.fneg = fsub float -0.000000e+00, %x + %floor = call float @llvm.floor.f32(float %x.fneg) #1 + %cvt = fptosi float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fabs_fneg: +; SI-NOT: add +; SI-SAFE-NOT: v_cvt_flr_i32_f32 +; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}| +; SI: s_endpgm +define void @cvt_flr_i32_f32_fabs_fneg(i32 addrspace(1)* %out, float %x) #0 { + %x.fabs = call float @llvm.fabs.f32(float %x) #1 + %x.fabs.fneg = fsub float -0.000000e+00, %x.fabs + %floor = call float @llvm.floor.f32(float %x.fabs.fneg) #1 + %cvt = fptosi float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}no_cvt_flr_i32_f32_0: +; SI-NOT: v_cvt_flr_i32_f32 +; SI: v_floor_f32 +; SI: v_cvt_u32_f32_e32 +; SI: s_endpgm +define void @no_cvt_flr_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 { + %floor = call float @llvm.floor.f32(float %x) #1 + %cvt = fptoui float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/R600/cvt_rpi_i32_f32.ll b/test/CodeGen/R600/cvt_rpi_i32_f32.ll new file mode 100644 index 0000000..864ac40 --- /dev/null +++ b/test/CodeGen/R600/cvt_rpi_i32_f32.ll @@ -0,0 +1,83 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s + +declare float @llvm.fabs.f32(float) #1 +declare float @llvm.floor.f32(float) #1 + +; FUNC-LABEL: {{^}}cvt_rpi_i32_f32: +; SI-SAFE-NOT: v_cvt_rpi_i32_f32 +; SI-NONAN: v_cvt_rpi_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}} +; SI: s_endpgm +define void @cvt_rpi_i32_f32(i32 addrspace(1)* %out, float %x) #0 { + %fadd = fadd float %x, 0.5 + %floor = call float @llvm.floor.f32(float %fadd) #1 + %cvt = fptosi float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fabs: +; SI-SAFE-NOT: v_cvt_rpi_i32_f32 +; SI-NONAN: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}} +; SI: s_endpgm +define void @cvt_rpi_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 { + %x.fabs = call float @llvm.fabs.f32(float %x) #1 + %fadd = fadd float %x.fabs, 0.5 + %floor = call float @llvm.floor.f32(float %fadd) #1 + %cvt = fptosi float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +; FIXME: This doesn't work because it forms fsub 0.5, x +; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fneg: +; XSI-NONAN: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}} +; SI: v_sub_f32_e64 [[TMP:v[0-9]+]], 0.5, s{{[0-9]+}} +; SI-SAFE-NOT: v_cvt_flr_i32_f32 +; SI-NONAN: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]] +; SI: s_endpgm +define void @cvt_rpi_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 { + %x.fneg = fsub float -0.000000e+00, %x + %fadd = fadd float %x.fneg, 0.5 + %floor = call float @llvm.floor.f32(float %fadd) #1 + %cvt = fptosi float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +; FIXME: This doesn't work for same reason as above +; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fabs_fneg: +; SI-SAFE-NOT: v_cvt_rpi_i32_f32 +; XSI-NONAN: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}| + +; SI: v_sub_f32_e64 [[TMP:v[0-9]+]], 0.5, |s{{[0-9]+}}| +; SI-SAFE-NOT: v_cvt_flr_i32_f32 +; SI-NONAN: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]] +; SI: s_endpgm +define void @cvt_rpi_i32_f32_fabs_fneg(i32 addrspace(1)* %out, float %x) #0 { + %x.fabs = call float @llvm.fabs.f32(float %x) #1 + %x.fabs.fneg = fsub float -0.000000e+00, %x.fabs + %fadd = fadd float %x.fabs.fneg, 0.5 + %floor = call float @llvm.floor.f32(float %fadd) #1 + %cvt = fptosi float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}no_cvt_rpi_i32_f32_0: +; SI-NOT: v_cvt_rpi_i32_f32 +; SI: v_add_f32 +; SI: v_floor_f32 +; SI: v_cvt_u32_f32 +; SI: s_endpgm +define void @no_cvt_rpi_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 { + %fadd = fadd float %x, 0.5 + %floor = call float @llvm.floor.f32(float %fadd) #1 + %cvt = fptoui float %floor to i32 + store i32 %cvt, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/R600/default-fp-mode.ll b/test/CodeGen/R600/default-fp-mode.ll index 935bf97..da8e914 100644 --- a/test/CodeGen/R600/default-fp-mode.ll +++ b/test/CodeGen/R600/default-fp-mode.ll @@ -1,10 +1,17 @@ -; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}test_kernel: diff --git a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll index f334062..41afd50 100644 --- a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll +++ b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI --check-prefix=CHECK %s -; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s declare i32 @llvm.r600.read.tidig.x() #0 declare void @llvm.AMDGPU.barrier.local() #1 diff --git a/test/CodeGen/R600/ds_read2.ll b/test/CodeGen/R600/ds_read2.ll index 6e0c8be..c06b0b1 100644 --- a/test/CodeGen/R600/ds_read2.ll +++ b/test/CodeGen/R600/ds_read2.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s ; FIXME: We don't get cases where the address was an SGPR because we ; get a copy to the address register for each one. diff --git a/test/CodeGen/R600/ds_read2_offset_order.ll b/test/CodeGen/R600/ds_read2_offset_order.ll new file mode 100644 index 0000000..44306bc --- /dev/null +++ b/test/CodeGen/R600/ds_read2_offset_order.ll @@ -0,0 +1,45 @@ +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s + +; XFAIL: * + +@lds = addrspace(3) global [512 x float] undef, align 4 + +; SI-LABEL: {{^}}offset_order: + +; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:56 +; SI: ds_read2st64_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:0 offset1:4 +; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:2 offset1:3 +; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:11 offset1:1 + +define void @offset_order(float addrspace(1)* %out) { +entry: + %ptr0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 0 + %val0 = load float addrspace(3)* %ptr0 + + %ptr1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 256 + %val1 = load float addrspace(3)* %ptr1 + %add1 = fadd float %val0, %val1 + + %ptr2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 3 + %val2 = load float addrspace(3)* %ptr2 + %add2 = fadd float %add1, %val2 + + %ptr3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 2 + %val3 = load float addrspace(3)* %ptr3 + %add3 = fadd float %add2, %val3 + + %ptr4 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 12 + %val4 = load float addrspace(3)* %ptr4 + %add4 = fadd float %add3, %val4 + + %ptr5 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 14 + %val5 = load float addrspace(3)* %ptr5 + %add5 = fadd float %add4, %val5 + + %ptr6 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 11 + %val6 = load float addrspace(3)* %ptr6 + %add6 = fadd float %add5, %val6 + store float %add6, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/ds_read2st64.ll b/test/CodeGen/R600/ds_read2st64.ll index 3e98e59..efd875e 100644 --- a/test/CodeGen/R600/ds_read2st64.ll +++ b/test/CodeGen/R600/ds_read2st64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s @lds = addrspace(3) global [512 x float] undef, align 4 @lds.f64 = addrspace(3) global [512 x double] undef, align 8 @@ -65,8 +65,8 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add ; SI-LABEL: @simple_read2st64_f32_over_max_offset ; SI-NOT: ds_read2st64_b32 -; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256 ; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}} +; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256 ; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]] ; SI: s_endpgm define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { @@ -197,8 +197,8 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a ; SI-LABEL: @simple_read2st64_f64_over_max_offset ; SI-NOT: ds_read2st64_b64 -; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512 ; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}} +; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512 ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]] ; SI: s_endpgm define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { diff --git a/test/CodeGen/R600/ds_write2.ll b/test/CodeGen/R600/ds_write2.ll index 1807fb5..e2db81a 100644 --- a/test/CodeGen/R600/ds_write2.ll +++ b/test/CodeGen/R600/ds_write2.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s @lds = addrspace(3) global [512 x float] undef, align 4 @lds.f64 = addrspace(3) global [512 x double] undef, align 8 @@ -7,7 +7,7 @@ ; SI-LABEL: @simple_write2_one_val_f32 ; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]] ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0] +; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 ; SI: s_endpgm define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -23,9 +23,9 @@ define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1 ; SI-LABEL: @simple_write2_two_val_f32 ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0] +; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 ; SI: s_endpgm define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -84,7 +84,7 @@ define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float ; SI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}} ; SI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}} ; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0] +; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 ; SI: s_endpgm define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -105,7 +105,7 @@ define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 ; SI-LABEL: @simple_write2_two_val_subreg2_f32 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}} ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0] +; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 ; SI: s_endpgm define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -124,7 +124,7 @@ define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x floa ; SI-LABEL: @simple_write2_two_val_subreg4_f32 ; SI-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}} ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0] +; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 ; SI: s_endpgm define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -142,9 +142,9 @@ define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x floa ; SI-LABEL: @simple_write2_two_val_max_offset_f32 ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0] +; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 ; SI: s_endpgm define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -268,7 +268,7 @@ define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float add ; SI-LABEL: @simple_write2_one_val_f64 ; SI: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]], ; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}} -; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0] +; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 ; SI: s_endpgm define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -285,8 +285,8 @@ define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace ; SI-LABEL: @misaligned_simple_write2_one_val_f64 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}} ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1 [M0] -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 [M0] +; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1 +; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 ; SI: s_endpgm define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -302,9 +302,9 @@ define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, doubl ; SI-LABEL: @simple_write2_two_val_f64 ; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8 +; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}} -; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0] +; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 ; SI: s_endpgm define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 diff --git a/test/CodeGen/R600/ds_write2st64.ll b/test/CodeGen/R600/ds_write2st64.ll index 4cafb7c..0f1c662 100644 --- a/test/CodeGen/R600/ds_write2st64.ll +++ b/test/CodeGen/R600/ds_write2st64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s @lds = addrspace(3) global [512 x float] undef, align 4 @@ -7,7 +7,7 @@ ; SI-LABEL: @simple_write2st64_one_val_f32_0_1 ; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]] ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1 [M0] +; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1 ; SI: s_endpgm define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -23,9 +23,9 @@ define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float add ; SI-LABEL: @simple_write2st64_two_val_f32_2_5 ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5 [M0] +; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5 ; SI: s_endpgm define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -44,9 +44,9 @@ define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float add ; SI-LABEL: @simple_write2st64_two_val_max_offset_f32 ; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0] +; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 ; SI: s_endpgm define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -64,9 +64,9 @@ define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, fl ; SI-LABEL: @simple_write2st64_two_val_max_offset_f64 ; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8 +; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 ; SI-DAG: v_add_i32_e32 [[VPTR:v[0-9]+]], -; SI: ds_write2st64_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127 [M0] +; SI: ds_write2st64_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127 ; SI: s_endpgm define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 diff --git a/test/CodeGen/R600/elf.ll b/test/CodeGen/R600/elf.ll index 6c521d0..aca3109 100644 --- a/test/CodeGen/R600/elf.ll +++ b/test/CodeGen/R600/elf.ll @@ -1,15 +1,21 @@ -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF-CHECK %s -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG-CHECK %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG %s -; ELF-CHECK: Format: ELF32 -; ELF-CHECK: Name: .AMDGPU.config -; ELF-CHECK: Type: SHT_PROGBITS +; ELF: Format: ELF32 +; ELF: Name: .AMDGPU.config +; ELF: Type: SHT_PROGBITS -; CONFIG-CHECK: .align 256 -; CONFIG-CHECK: test: -; CONFIG-CHECK: .section .AMDGPU.config -; CONFIG-CHECK-NEXT: .long 45096 -; CONFIG-CHECK-NEXT: .long 0 +; ELF: Symbol { +; ELF: Name: test +; ELF: Binding: Global + +; CONFIG: .align 256 +; CONFIG: test: +; CONFIG: .section .AMDGPU.config +; CONFIG-NEXT: .long 45096 +; CONFIG-NEXT: .long 0 define void @test(i32 %p) #0 { %i = add i32 %p, 2 %r = bitcast i32 %i to float diff --git a/test/CodeGen/R600/elf.r600.ll b/test/CodeGen/R600/elf.r600.ll index 4436c07..51cd085 100644 --- a/test/CodeGen/R600/elf.r600.ll +++ b/test/CodeGen/R600/elf.r600.ll @@ -1,14 +1,14 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF-CHECK %s -; RUN: llc < %s -march=r600 -mcpu=redwood -o - | FileCheck --check-prefix=CONFIG-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=redwood -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -march=r600 -mcpu=redwood -o - | FileCheck --check-prefix=CONFIG %s -; ELF-CHECK: Format: ELF32 -; ELF-CHECK: Name: .AMDGPU.config +; ELF: Format: ELF32 +; ELF: Name: .AMDGPU.config -; CONFIG-CHECK: .section .AMDGPU.config -; CONFIG-CHECK-NEXT: .long 166100 -; CONFIG-CHECK-NEXT: .long 2 -; CONFIG-CHECK-NEXT: .long 165900 -; CONFIG-CHECK-NEXT: .long 0 +; CONFIG: .section .AMDGPU.config +; CONFIG-NEXT: .long 166100 +; CONFIG-NEXT: .long 2 +; CONFIG-NEXT: .long 165900 +; CONFIG-NEXT: .long 0 define void @test(float addrspace(1)* %out, i32 %p) { %i = add i32 %p, 2 %r = bitcast i32 %i to float diff --git a/test/CodeGen/R600/empty-function.ll b/test/CodeGen/R600/empty-function.ll index d4ff803..b5593eb 100644 --- a/test/CodeGen/R600/empty-function.ll +++ b/test/CodeGen/R600/empty-function.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; Make sure we don't assert on empty functions diff --git a/test/CodeGen/R600/endcf-loop-header.ll b/test/CodeGen/R600/endcf-loop-header.ll new file mode 100644 index 0000000..e3c5b3c --- /dev/null +++ b/test/CodeGen/R600/endcf-loop-header.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s + +; This tests that the llvm.SI.end.cf intrinsic is not inserted into the +; loop block. This intrinsic will be lowered to s_or_b64 by the code +; generator. + +; CHECK-LABEL: {{^}}test: + +; This is was lowered from the llvm.SI.end.cf intrinsic: +; CHECK: s_or_b64 exec, exec + +; CHECK: [[LOOP_LABEL:[0-9A-Za-z_]+]]: ; %loop{{$}} +; CHECK-NOT: s_or_b64 exec, exec +; CHECK: s_cbranch_execnz [[LOOP_LABEL]] +define void @test(i32 addrspace(1)* %out, i32 %cond) { +entry: + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %if, label %loop + +if: + store i32 0, i32 addrspace(1)* %out + br label %loop + +loop: + %tmp1 = phi i32 [0, %entry], [0, %if], [%inc, %loop] + %inc = add i32 %tmp1, %cond + %tmp2 = icmp ugt i32 %inc, 10 + br i1 %tmp2, label %done, label %loop + +done: + %tmp3 = getelementptr i32 addrspace(1)* %out, i64 1 + store i32 %inc, i32 addrspace(1)* %tmp3 + ret void +} diff --git a/test/CodeGen/R600/extload-private.ll b/test/CodeGen/R600/extload-private.ll new file mode 100644 index 0000000..fec8682 --- /dev/null +++ b/test/CodeGen/R600/extload-private.ll @@ -0,0 +1,46 @@ +; RUN: llc < %s -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}load_i8_sext_private: +; SI: buffer_load_sbyte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen +define void @load_i8_sext_private(i32 addrspace(1)* %out) { +entry: + %tmp0 = alloca i8 + %tmp1 = load i8* %tmp0 + %tmp2 = sext i8 %tmp1 to i32 + store i32 %tmp2, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}load_i8_zext_private: +; SI: buffer_load_ubyte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen +define void @load_i8_zext_private(i32 addrspace(1)* %out) { +entry: + %tmp0 = alloca i8 + %tmp1 = load i8* %tmp0 + %tmp2 = zext i8 %tmp1 to i32 + store i32 %tmp2, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}load_i16_sext_private: +; SI: buffer_load_sshort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen +define void @load_i16_sext_private(i32 addrspace(1)* %out) { +entry: + %tmp0 = alloca i16 + %tmp1 = load i16* %tmp0 + %tmp2 = sext i16 %tmp1 to i32 + store i32 %tmp2, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}load_i16_zext_private: +; SI: buffer_load_ushort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen +define void @load_i16_zext_private(i32 addrspace(1)* %out) { +entry: + %tmp0 = alloca i16 + %tmp1 = load i16* %tmp0 + %tmp2 = zext i16 %tmp1 to i32 + store i32 %tmp2, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/extload.ll b/test/CodeGen/R600/extload.ll index 5bda8f8..77e5dc3 100644 --- a/test/CodeGen/R600/extload.ll +++ b/test/CodeGen/R600/extload.ll @@ -1,9 +1,11 @@ ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}anyext_load_i8: -; EG: AND_INT -; EG: 255 +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+.[XYZW]]], +; EG: VTX_READ_32 [[VAL]] + define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind { %cast = bitcast i8 addrspace(1)* %src to i32 addrspace(1)* %load = load i32 addrspace(1)* %cast, align 1 @@ -14,10 +16,9 @@ define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspac } ; FUNC-LABEL: {{^}}anyext_load_i16: -; EG: AND_INT -; EG: AND_INT -; EG-DAG: 65535 -; EG-DAG: -65536 +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+.[XYZW]]], +; EG: VTX_READ_32 [[VAL]] + define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind { %cast = bitcast i16 addrspace(1)* %src to i32 addrspace(1)* %load = load i32 addrspace(1)* %cast, align 1 @@ -28,8 +29,8 @@ define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrs } ; FUNC-LABEL: {{^}}anyext_load_lds_i8: -; EG: AND_INT -; EG: 255 +; EG: LDS_READ_RET {{.*}}, [[VAL:T[0-9]+.[XYZW]]] +; EG: LDS_WRITE * [[VAL]] define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind { %cast = bitcast i8 addrspace(3)* %src to i32 addrspace(3)* %load = load i32 addrspace(3)* %cast, align 1 @@ -40,10 +41,8 @@ define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addr } ; FUNC-LABEL: {{^}}anyext_load_lds_i16: -; EG: AND_INT -; EG: AND_INT -; EG-DAG: 65535 -; EG-DAG: -65536 +; EG: LDS_READ_RET {{.*}}, [[VAL:T[0-9]+.[XYZW]]] +; EG: LDS_WRITE * [[VAL]] define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind { %cast = bitcast i16 addrspace(3)* %src to i32 addrspace(3)* %load = load i32 addrspace(3)* %cast, align 1 @@ -52,72 +51,3 @@ define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 a store <2 x i16> %x, <2 x i16> addrspace(3)* %castOut, align 1 ret void } - -; FUNC-LABEL: {{^}}sextload_global_i8_to_i64: -; SI: buffer_load_sbyte [[LOAD:v[0-9]+]], -; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]] -; SI: buffer_store_dwordx2 -define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { - %a = load i8 addrspace(1)* %in, align 8 - %ext = sext i8 %a to i64 - store i64 %ext, i64 addrspace(1)* %out, align 8 - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_i16_to_i64: -; SI: buffer_load_sshort [[LOAD:v[0-9]+]], -; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]] -; SI: buffer_store_dwordx2 -define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { - %a = load i16 addrspace(1)* %in, align 8 - %ext = sext i16 %a to i64 - store i64 %ext, i64 addrspace(1)* %out, align 8 - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_i32_to_i64: -; SI: buffer_load_dword [[LOAD:v[0-9]+]], -; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]] -; SI: buffer_store_dwordx2 -define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { - %a = load i32 addrspace(1)* %in, align 8 - %ext = sext i32 %a to i64 - store i64 %ext, i64 addrspace(1)* %out, align 8 - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_i8_to_i64: -; SI-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} -; SI-DAG: buffer_load_ubyte [[LOAD:v[0-9]+]], -; SI: v_mov_b32_e32 {{v[0-9]+}}, [[ZERO]] -; SI: buffer_store_dwordx2 -define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { - %a = load i8 addrspace(1)* %in, align 8 - %ext = zext i8 %a to i64 - store i64 %ext, i64 addrspace(1)* %out, align 8 - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_i16_to_i64: -; SI-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} -; SI-DAG: buffer_load_ushort [[LOAD:v[0-9]+]], -; SI: v_mov_b32_e32 {{v[0-9]+}}, [[ZERO]] -; SI: buffer_store_dwordx2 -define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { - %a = load i16 addrspace(1)* %in, align 8 - %ext = zext i16 %a to i64 - store i64 %ext, i64 addrspace(1)* %out, align 8 - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_i32_to_i64: -; SI-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} -; SI-DAG: buffer_load_dword [[LOAD:v[0-9]+]], -; SI: v_mov_b32_e32 {{v[0-9]+}}, [[ZERO]] -; SI: buffer_store_dwordx2 -define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { - %a = load i32 addrspace(1)* %in, align 8 - %ext = zext i32 %a to i64 - store i64 %ext, i64 addrspace(1)* %out, align 8 - ret void -} diff --git a/test/CodeGen/R600/extract_vector_elt_i16.ll b/test/CodeGen/R600/extract_vector_elt_i16.ll index efdc1c8..0774a9a 100644 --- a/test/CodeGen/R600/extract_vector_elt_i16.ll +++ b/test/CodeGen/R600/extract_vector_elt_i16.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}extract_vector_elt_v2i16: ; SI: buffer_load_ushort diff --git a/test/CodeGen/R600/fabs.f64.ll b/test/CodeGen/R600/fabs.f64.ll index d2ba320..d87c082 100644 --- a/test/CodeGen/R600/fabs.f64.ll +++ b/test/CodeGen/R600/fabs.f64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() nounwind readnone diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll index 06cc97f..419a73d 100644 --- a/test/CodeGen/R600/fabs.ll +++ b/test/CodeGen/R600/fabs.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s @@ -10,7 +11,7 @@ ; R600-NOT: AND ; R600: |PV.{{[XYZW]}}| -; SI: v_and_b32 +; GCN: v_and_b32 define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) { %bc= bitcast i32 %in to float @@ -23,7 +24,7 @@ define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) { ; R600-NOT: AND ; R600: |PV.{{[XYZW]}}| -; SI: v_and_b32 +; GCN: v_and_b32 define void @fabs_free(float addrspace(1)* %out, i32 %in) { %bc= bitcast i32 %in to float @@ -35,7 +36,7 @@ define void @fabs_free(float addrspace(1)* %out, i32 %in) { ; FUNC-LABEL: {{^}}fabs_f32: ; R600: |{{(PV|T[0-9])\.[XYZW]}}| -; SI: v_and_b32 +; GCN: v_and_b32 define void @fabs_f32(float addrspace(1)* %out, float %in) { %fabs = call float @llvm.fabs.f32(float %in) store float %fabs, float addrspace(1)* %out @@ -46,8 +47,8 @@ define void @fabs_f32(float addrspace(1)* %out, float %in) { ; R600: |{{(PV|T[0-9])\.[XYZW]}}| ; R600: |{{(PV|T[0-9])\.[XYZW]}}| -; SI: v_and_b32 -; SI: v_and_b32 +; GCN: v_and_b32 +; GCN: v_and_b32 define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in) store <2 x float> %fabs, <2 x float> addrspace(1)* %out @@ -60,20 +61,21 @@ define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { ; R600: |{{(PV|T[0-9])\.[XYZW]}}| ; R600: |{{(PV|T[0-9])\.[XYZW]}}| -; SI: v_and_b32 -; SI: v_and_b32 -; SI: v_and_b32 -; SI: v_and_b32 +; GCN: v_and_b32 +; GCN: v_and_b32 +; GCN: v_and_b32 +; GCN: v_and_b32 define void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) { %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in) store <4 x float> %fabs, <4 x float> addrspace(1)* %out ret void } -; SI-LABEL: {{^}}fabs_fn_fold: +; GCN-LABEL: {{^}}fabs_fn_fold: ; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb -; SI-NOT: and -; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}} +; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c +; GCN-NOT: and +; GCN: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}} define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) { %fabs = call float @fabs(float %in0) %fmul = fmul float %fabs, %in1 @@ -81,10 +83,11 @@ define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) { ret void } -; SI-LABEL: {{^}}fabs_fold: +; GCN-LABEL: {{^}}fabs_fold: ; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb -; SI-NOT: and -; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}} +; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c +; GCN-NOT: and +; GCN: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}} define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) { %fabs = call float @llvm.fabs.f32(float %in0) %fmul = fmul float %fabs, %in1 diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll index 774dd0b..365af9b 100644 --- a/test/CodeGen/R600/fadd.ll +++ b/test/CodeGen/R600/fadd.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC ; FUNC-LABEL: {{^}}fadd_f32: ; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W diff --git a/test/CodeGen/R600/fadd64.ll b/test/CodeGen/R600/fadd64.ll index 3ca8500..f1f6fef 100644 --- a/test/CodeGen/R600/fadd64.ll +++ b/test/CodeGen/R600/fadd64.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ; CHECK: {{^}}fadd_f64: ; CHECK: v_add_f64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}} diff --git a/test/CodeGen/R600/fceil.ll b/test/CodeGen/R600/fceil.ll index 56dc796..f23e891 100644 --- a/test/CodeGen/R600/fceil.ll +++ b/test/CodeGen/R600/fceil.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare float @llvm.ceil.f32(float) nounwind readnone diff --git a/test/CodeGen/R600/fceil64.ll b/test/CodeGen/R600/fceil64.ll index 029f41d..e3244fa 100644 --- a/test/CodeGen/R600/fceil64.ll +++ b/test/CodeGen/R600/fceil64.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s declare double @llvm.ceil.f64(double) nounwind readnone declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone @@ -11,23 +12,24 @@ declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone ; FUNC-LABEL: {{^}}fceil_f64: ; CI: v_ceil_f64_e32 ; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014 +; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 ; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01 ; SI: s_lshr_b64 ; SI: s_not_b64 ; SI: s_and_b64 -; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 -; SI-DAG: cmp_lt_i32 +; SI: cmp_lt_i32 ; SI: cndmask_b32 ; SI: cndmask_b32 ; SI: cmp_gt_i32 ; SI: cndmask_b32 ; SI: cndmask_b32 -; SI: cmp_gt_f64 -; SI: cndmask_b32 -; SI: cmp_ne_i32 -; SI: cndmask_b32 -; SI: cndmask_b32 +; SI-DAG: v_cmp_gt_f64 +; SI-DAG: v_cmp_lg_f64 +; SI: s_and_b64 +; SI: v_cndmask_b32 +; SI: v_cndmask_b32 ; SI: v_add_f64 +; SI: s_endpgm define void @fceil_f64(double addrspace(1)* %out, double %x) { %y = call double @llvm.ceil.f64(double %x) nounwind readnone store double %y, double addrspace(1)* %out diff --git a/test/CodeGen/R600/fcmp64.ll b/test/CodeGen/R600/fcmp64.ll index dc24443..9dc8b50 100644 --- a/test/CodeGen/R600/fcmp64.ll +++ b/test/CodeGen/R600/fcmp64.ll @@ -1,7 +1,8 @@ -; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ; CHECK-LABEL: {{^}}flt_f64: -; CHECK: v_cmp_lt_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} +; CHECK: v_cmp_nge_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} define void @flt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { %r0 = load double addrspace(1)* %in1 @@ -13,7 +14,7 @@ define void @flt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1, } ; CHECK-LABEL: {{^}}fle_f64: -; CHECK: v_cmp_le_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} +; CHECK: v_cmp_ngt_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} define void @fle_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { %r0 = load double addrspace(1)* %in1 @@ -25,7 +26,7 @@ define void @fle_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1, } ; CHECK-LABEL: {{^}}fgt_f64: -; CHECK: v_cmp_gt_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} +; CHECK: v_cmp_nle_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} define void @fgt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { %r0 = load double addrspace(1)* %in1 @@ -37,7 +38,7 @@ define void @fgt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1, } ; CHECK-LABEL: {{^}}fge_f64: -; CHECK: v_cmp_ge_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} +; CHECK: v_cmp_nlt_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} define void @fge_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { %r0 = load double addrspace(1)* %in1 @@ -61,7 +62,7 @@ define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1, } ; CHECK-LABEL: {{^}}feq_f64: -; CHECK: v_cmp_eq_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} +; CHECK: v_cmp_nlg_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} define void @feq_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { %r0 = load double addrspace(1)* %in1 diff --git a/test/CodeGen/R600/fconst64.ll b/test/CodeGen/R600/fconst64.ll index 097c89f..28e0c90 100644 --- a/test/CodeGen/R600/fconst64.ll +++ b/test/CodeGen/R600/fconst64.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ; CHECK: {{^}}fconst_f64: ; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0x40140000 diff --git a/test/CodeGen/R600/fcopysign.f32.ll b/test/CodeGen/R600/fcopysign.f32.ll index 897830e..b719d5a 100644 --- a/test/CodeGen/R600/fcopysign.f32.ll +++ b/test/CodeGen/R600/fcopysign.f32.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s @@ -10,12 +11,14 @@ declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind read ; FUNC-LABEL: {{^}}test_copysign_f32: ; SI: s_load_dword [[SMAG:s[0-9]+]], {{.*}} 0xb ; SI: s_load_dword [[SSIGN:s[0-9]+]], {{.*}} 0xc -; SI-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]] -; SI-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], [[SMAG]] -; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff -; SI: v_bfi_b32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; VI: s_load_dword [[SMAG:s[0-9]+]], {{.*}} 0x2c +; VI: s_load_dword [[SSIGN:s[0-9]+]], {{.*}} 0x30 +; GCN-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]] +; GCN-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], [[SMAG]] +; GCN-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff +; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm ; EG: BFI_INT define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign) nounwind { @@ -25,7 +28,7 @@ define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign } ; FUNC-LABEL: {{^}}test_copysign_v2f32: -; SI: s_endpgm +; GCN: s_endpgm ; EG: BFI_INT ; EG: BFI_INT @@ -36,7 +39,7 @@ define void @test_copysign_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %ma } ; FUNC-LABEL: {{^}}test_copysign_v4f32: -; SI: s_endpgm +; GCN: s_endpgm ; EG: BFI_INT ; EG: BFI_INT diff --git a/test/CodeGen/R600/fcopysign.f64.ll b/test/CodeGen/R600/fcopysign.f64.ll index 90f0ce3..3d8c559 100644 --- a/test/CodeGen/R600/fcopysign.f64.ll +++ b/test/CodeGen/R600/fcopysign.f64.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s declare double @llvm.copysign.f64(double, double) nounwind readnone declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind readnone @@ -7,13 +8,15 @@ declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind r ; FUNC-LABEL: {{^}}test_copysign_f64: ; SI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd -; SI-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]] -; SI-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]] -; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff -; SI: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]] -; SI: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]] -; SI: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}} -; SI: s_endpgm +; VI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34 +; GCN-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]] +; GCN-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]] +; GCN-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff +; GCN: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]] +; GCN: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]] +; GCN: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}} +; GCN: s_endpgm define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %sign) nounwind { %result = call double @llvm.copysign.f64(double %mag, double %sign) store double %result, double addrspace(1)* %out, align 8 @@ -21,7 +24,7 @@ define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %s } ; FUNC-LABEL: {{^}}test_copysign_v2f64: -; SI: s_endpgm +; GCN: s_endpgm define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %mag, <2 x double> %sign) nounwind { %result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign) store <2 x double> %result, <2 x double> addrspace(1)* %out, align 8 @@ -29,7 +32,7 @@ define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> % } ; FUNC-LABEL: {{^}}test_copysign_v4f64: -; SI: s_endpgm +; GCN: s_endpgm define void @test_copysign_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %mag, <4 x double> %sign) nounwind { %result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign) store <4 x double> %result, <4 x double> addrspace(1)* %out, align 8 diff --git a/test/CodeGen/R600/fdiv.f64.ll b/test/CodeGen/R600/fdiv.f64.ll new file mode 100644 index 0000000..6367f32 --- /dev/null +++ b/test/CodeGen/R600/fdiv.f64.ll @@ -0,0 +1,96 @@ +; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=COMMON %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=COMMON %s + + +; COMMON-LABEL: {{^}}fdiv_f64: +; COMMON-DAG: buffer_load_dwordx2 [[NUM:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0 +; COMMON-DAG: buffer_load_dwordx2 [[DEN:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0 offset:8 +; CI-DAG: v_div_scale_f64 [[SCALE0:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[DEN]], [[DEN]], [[NUM]] +; CI-DAG: v_div_scale_f64 [[SCALE1:v\[[0-9]+:[0-9]+\]]], vcc, [[NUM]], [[DEN]], [[NUM]] + +; Check for div_scale bug workaround on SI +; SI-DAG: v_div_scale_f64 [[SCALE0:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[DEN]], [[DEN]], [[NUM]] +; SI-DAG: v_div_scale_f64 [[SCALE1:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[NUM]], [[DEN]], [[NUM]] + +; COMMON-DAG: v_rcp_f64_e32 [[RCP_SCALE0:v\[[0-9]+:[0-9]+\]]], [[SCALE0]] + +; SI-DAG: v_cmp_eq_i32_e32 vcc, {{v[0-9]+}}, {{v[0-9]+}} +; SI-DAG: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, {{v[0-9]+}} +; SI-DAG: s_xor_b64 vcc, [[CMP0]], vcc + +; COMMON-DAG: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[SCALE0]], [[RCP_SCALE0]], 1.0 +; COMMON-DAG: v_fma_f64 [[FMA1:v\[[0-9]+:[0-9]+\]]], [[RCP_SCALE0]], [[FMA0]], [[RCP_SCALE0]] +; COMMON-DAG: v_fma_f64 [[FMA2:v\[[0-9]+:[0-9]+\]]], -[[SCALE0]], [[FMA1]], 1.0 +; COMMON-DAG: v_fma_f64 [[FMA3:v\[[0-9]+:[0-9]+\]]], [[FMA1]], [[FMA2]], [[FMA1]] +; COMMON-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[SCALE1]], [[FMA3]] +; COMMON-DAG: v_fma_f64 [[FMA4:v\[[0-9]+:[0-9]+\]]], -[[SCALE0]], [[MUL]], [[SCALE1]] +; COMMON: v_div_fmas_f64 [[FMAS:v\[[0-9]+:[0-9]+\]]], [[FMA4]], [[FMA3]], [[MUL]] +; COMMON: v_div_fixup_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[FMAS]], [[DEN]], [[NUM]] +; COMMON: buffer_store_dwordx2 [[RESULT]] +; COMMON: s_endpgm +define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in) nounwind { + %gep.1 = getelementptr double addrspace(1)* %in, i32 1 + %num = load double addrspace(1)* %in + %den = load double addrspace(1)* %gep.1 + %result = fdiv double %num, %den + store double %result, double addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}fdiv_f64_s_v: +define void @fdiv_f64_s_v(double addrspace(1)* %out, double addrspace(1)* %in, double %num) nounwind { + %den = load double addrspace(1)* %in + %result = fdiv double %num, %den + store double %result, double addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}fdiv_f64_v_s: +define void @fdiv_f64_v_s(double addrspace(1)* %out, double addrspace(1)* %in, double %den) nounwind { + %num = load double addrspace(1)* %in + %result = fdiv double %num, %den + store double %result, double addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}fdiv_f64_s_s: +define void @fdiv_f64_s_s(double addrspace(1)* %out, double %num, double %den) nounwind { + %result = fdiv double %num, %den + store double %result, double addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}v_fdiv_v2f64: +define void @v_fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in) nounwind { + %gep.1 = getelementptr <2 x double> addrspace(1)* %in, i32 1 + %num = load <2 x double> addrspace(1)* %in + %den = load <2 x double> addrspace(1)* %gep.1 + %result = fdiv <2 x double> %num, %den + store <2 x double> %result, <2 x double> addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}s_fdiv_v2f64: +define void @s_fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %num, <2 x double> %den) { + %result = fdiv <2 x double> %num, %den + store <2 x double> %result, <2 x double> addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}v_fdiv_v4f64: +define void @v_fdiv_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) nounwind { + %gep.1 = getelementptr <4 x double> addrspace(1)* %in, i32 1 + %num = load <4 x double> addrspace(1)* %in + %den = load <4 x double> addrspace(1)* %gep.1 + %result = fdiv <4 x double> %num, %den + store <4 x double> %result, <4 x double> addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}s_fdiv_v4f64: +define void @s_fdiv_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %num, <4 x double> %den) { + %result = fdiv <4 x double> %num, %den + store <4 x double> %result, <4 x double> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fdiv.ll b/test/CodeGen/R600/fdiv.ll index 5321fdb..603287f 100644 --- a/test/CodeGen/R600/fdiv.ll +++ b/test/CodeGen/R600/fdiv.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; These tests check that fdiv is expanded correctly and also test that the ; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate diff --git a/test/CodeGen/R600/fdiv64.ll b/test/CodeGen/R600/fdiv64.ll deleted file mode 100644 index d424898..0000000 --- a/test/CodeGen/R600/fdiv64.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s - -; CHECK: {{^}}fdiv_f64: -; CHECK: v_rcp_f64_e32 {{v\[[0-9]+:[0-9]+\]}} -; CHECK: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} - -define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in1, - double addrspace(1)* %in2) { - %r0 = load double addrspace(1)* %in1 - %r1 = load double addrspace(1)* %in2 - %r2 = fdiv double %r0, %r1 - store double %r2, double addrspace(1)* %out - ret void -} diff --git a/test/CodeGen/R600/ffloor.f64.ll b/test/CodeGen/R600/ffloor.f64.ll new file mode 100644 index 0000000..745ad3b --- /dev/null +++ b/test/CodeGen/R600/ffloor.f64.ll @@ -0,0 +1,106 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s + +declare double @llvm.floor.f64(double) nounwind readnone +declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone +declare <3 x double> @llvm.floor.v3f64(<3 x double>) nounwind readnone +declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone +declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone +declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone + +; FUNC-LABEL: {{^}}ffloor_f64: +; CI: v_floor_f64_e32 + +; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014 +; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 +; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01 +; SI: s_lshr_b64 +; SI: s_not_b64 +; SI: s_and_b64 +; SI: cmp_lt_i32 +; SI: cndmask_b32 +; SI: cndmask_b32 +; SI: cmp_gt_i32 +; SI: cndmask_b32 +; SI: cndmask_b32 +; SI-DAG: v_cmp_lt_f64 +; SI-DAG: v_cmp_lg_f64 +; SI-DAG: s_and_b64 +; SI-DAG: v_cndmask_b32 +; SI-DAG: v_cndmask_b32 +; SI: v_add_f64 +; SI: s_endpgm +define void @ffloor_f64(double addrspace(1)* %out, double %x) { + %y = call double @llvm.floor.f64(double %x) nounwind readnone + store double %y, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}ffloor_v2f64: +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { + %y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone + store <2 x double> %y, <2 x double> addrspace(1)* %out + ret void +} + +; FIXME-FUNC-LABEL: {{^}}ffloor_v3f64: +; FIXME-CI: v_floor_f64_e32 +; FIXME-CI: v_floor_f64_e32 +; FIXME-CI: v_floor_f64_e32 +; define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { +; %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone +; store <3 x double> %y, <3 x double> addrspace(1)* %out +; ret void +; } + +; FUNC-LABEL: {{^}}ffloor_v4f64: +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { + %y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone + store <4 x double> %y, <4 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}ffloor_v8f64: +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { + %y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone + store <8 x double> %y, <8 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}ffloor_v16f64: +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +; CI: v_floor_f64_e32 +define void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) { + %y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone + store <16 x double> %y, <16 x double> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/ffloor.ll b/test/CodeGen/R600/ffloor.ll index 166f705..61c46ac 100644 --- a/test/CodeGen/R600/ffloor.ll +++ b/test/CodeGen/R600/ffloor.ll @@ -1,104 +1,49 @@ -; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s - -declare double @llvm.floor.f64(double) nounwind readnone -declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone -declare <3 x double> @llvm.floor.v3f64(<3 x double>) nounwind readnone -declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone -declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone -declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}floor_f32: +; SI: v_floor_f32_e32 +; R600: FLOOR +define void @floor_f32(float addrspace(1)* %out, float %in) { + %tmp = call float @llvm.floor.f32(float %in) #0 + store float %tmp, float addrspace(1)* %out + ret void +} -; FUNC-LABEL: {{^}}ffloor_f64: -; CI: v_floor_f64_e32 +; FUNC-LABEL: {{^}}floor_v2f32: +; SI: v_floor_f32_e32 +; SI: v_floor_f32_e32 -; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014 -; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01 -; SI: s_lshr_b64 -; SI: s_not_b64 -; SI: s_and_b64 -; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 -; SI-DAG: cmp_lt_i32 -; SI: cndmask_b32 -; SI: cndmask_b32 -; SI: cmp_gt_i32 -; SI: cndmask_b32 -; SI: cndmask_b32 -; SI: cmp_lt_f64 -; SI: cndmask_b32 -; SI: cmp_ne_i32 -; SI: cndmask_b32 -; SI: cndmask_b32 -; SI: v_add_f64 -define void @ffloor_f64(double addrspace(1)* %out, double %x) { - %y = call double @llvm.floor.f64(double %x) nounwind readnone - store double %y, double addrspace(1)* %out +define void @floor_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { + %tmp = call <2 x float> @llvm.floor.v2f32(<2 x float> %in) #0 + store <2 x float> %tmp, <2 x float> addrspace(1)* %out ret void } -; FUNC-LABEL: {{^}}ffloor_v2f64: -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { - %y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone - store <2 x double> %y, <2 x double> addrspace(1)* %out +; FUNC-LABEL: {{^}}floor_v4f32: +; SI: v_floor_f32_e32 +; SI: v_floor_f32_e32 +; SI: v_floor_f32_e32 +; SI: v_floor_f32_e32 + +; R600: FLOOR +; R600: FLOOR +; R600: FLOOR +; R600: FLOOR +define void @floor_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) { + %tmp = call <4 x float> @llvm.floor.v4f32(<4 x float> %in) #0 + store <4 x float> %tmp, <4 x float> addrspace(1)* %out ret void } -; FIXME-FUNC-LABEL: {{^}}ffloor_v3f64: -; FIXME-CI: v_floor_f64_e32 -; FIXME-CI: v_floor_f64_e32 -; FIXME-CI: v_floor_f64_e32 -; define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { -; %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone -; store <3 x double> %y, <3 x double> addrspace(1)* %out -; ret void -; } +; Function Attrs: nounwind readonly +declare float @llvm.floor.f32(float) #0 -; FUNC-LABEL: {{^}}ffloor_v4f64: -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { - %y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone - store <4 x double> %y, <4 x double> addrspace(1)* %out - ret void -} +; Function Attrs: nounwind readonly +declare <2 x float> @llvm.floor.v2f32(<2 x float>) #0 -; FUNC-LABEL: {{^}}ffloor_v8f64: -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { - %y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone - store <8 x double> %y, <8 x double> addrspace(1)* %out - ret void -} +; Function Attrs: nounwind readonly +declare <4 x float> @llvm.floor.v4f32(<4 x float>) #0 -; FUNC-LABEL: {{^}}ffloor_v16f64: -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -; CI: v_floor_f64_e32 -define void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) { - %y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone - store <16 x double> %y, <16 x double> addrspace(1)* %out - ret void -} +attributes #0 = { nounwind readnone } diff --git a/test/CodeGen/R600/flat-address-space.ll b/test/CodeGen/R600/flat-address-space.ll index fc5af7c..2e98bf5 100644 --- a/test/CodeGen/R600/flat-address-space.ll +++ b/test/CodeGen/R600/flat-address-space.ll @@ -1,5 +1,7 @@ -; RUN: llc -O0 -march=r600 -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s -; RUN: llc -O0 -march=r600 -mcpu=bonaire -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s +; RUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s +; RUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s +; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s +; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s ; Disable optimizations in case there are optimizations added that ; specialize away generic pointer accesses. diff --git a/test/CodeGen/R600/floor.ll b/test/CodeGen/R600/floor.ll index 67e86c4..c6bfb85 100644 --- a/test/CodeGen/R600/floor.ll +++ b/test/CodeGen/R600/floor.ll @@ -1,7 +1,6 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s - -;CHECK: FLOOR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s +; CHECK: FLOOR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test(<4 x float> inreg %reg0) #0 { %r0 = extractelement <4 x float> %reg0, i32 0 %r1 = call float @floor(float %r0) @@ -13,4 +12,4 @@ define void @test(<4 x float> inreg %reg0) #0 { declare float @floor(float) readonly declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -attributes #0 = { "ShaderType"="0" }
\ No newline at end of file +attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/fma-combine.ll b/test/CodeGen/R600/fma-combine.ll new file mode 100644 index 0000000..9aac90c --- /dev/null +++ b/test/CodeGen/R600/fma-combine.ll @@ -0,0 +1,368 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-FASTFMAF -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-SLOWFMAF -check-prefix=SI -check-prefix=FUNC %s + +declare i32 @llvm.r600.read.tidig.x() #0 +declare double @llvm.fabs.f64(double) #0 +declare double @llvm.fma.f64(double, double, double) #0 +declare float @llvm.fma.f32(float, float, float) #0 + +; (fadd (fmul x, y), z) -> (fma x, y, z) +; FUNC-LABEL: {{^}}combine_to_fma_f64_0: +; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} +; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]] +; SI: buffer_store_dwordx2 [[RESULT]] +define void @combine_to_fma_f64_0(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2 + %gep.out = getelementptr double addrspace(1)* %out, i32 %tid + + %a = load double addrspace(1)* %gep.0 + %b = load double addrspace(1)* %gep.1 + %c = load double addrspace(1)* %gep.2 + + %mul = fmul double %a, %b + %fma = fadd double %mul, %c + store double %fma, double addrspace(1)* %gep.out + ret void +} + +; (fadd (fmul x, y), z) -> (fma x, y, z) +; FUNC-LABEL: {{^}}combine_to_fma_f64_0_2use: +; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} +; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}} +; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]] +; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[D]] +; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI: s_endpgm +define void @combine_to_fma_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr double addrspace(1)* %gep.0, i32 3 + %gep.out.0 = getelementptr double addrspace(1)* %out, i32 %tid + %gep.out.1 = getelementptr double addrspace(1)* %gep.out.0, i32 1 + + %a = load double addrspace(1)* %gep.0 + %b = load double addrspace(1)* %gep.1 + %c = load double addrspace(1)* %gep.2 + %d = load double addrspace(1)* %gep.3 + + %mul = fmul double %a, %b + %fma0 = fadd double %mul, %c + %fma1 = fadd double %mul, %d + store double %fma0, double addrspace(1)* %gep.out.0 + store double %fma1, double addrspace(1)* %gep.out.1 + ret void +} + +; (fadd x, (fmul y, z)) -> (fma y, z, x) +; FUNC-LABEL: {{^}}combine_to_fma_f64_1: +; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} +; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]] +; SI: buffer_store_dwordx2 [[RESULT]] +define void @combine_to_fma_f64_1(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2 + %gep.out = getelementptr double addrspace(1)* %out, i32 %tid + + %a = load double addrspace(1)* %gep.0 + %b = load double addrspace(1)* %gep.1 + %c = load double addrspace(1)* %gep.2 + + %mul = fmul double %a, %b + %fma = fadd double %c, %mul + store double %fma, double addrspace(1)* %gep.out + ret void +} + +; (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) +; FUNC-LABEL: {{^}}combine_to_fma_fsub_0_f64: +; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} +; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]] +; SI: buffer_store_dwordx2 [[RESULT]] +define void @combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2 + %gep.out = getelementptr double addrspace(1)* %out, i32 %tid + + %a = load double addrspace(1)* %gep.0 + %b = load double addrspace(1)* %gep.1 + %c = load double addrspace(1)* %gep.2 + + %mul = fmul double %a, %b + %fma = fsub double %mul, %c + store double %fma, double addrspace(1)* %gep.out + ret void +} + +; (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) +; FUNC-LABEL: {{^}}combine_to_fma_fsub_f64_0_2use: +; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} +; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}} +; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]] +; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[D]] +; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI: s_endpgm +define void @combine_to_fma_fsub_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr double addrspace(1)* %gep.0, i32 3 + %gep.out.0 = getelementptr double addrspace(1)* %out, i32 %tid + %gep.out.1 = getelementptr double addrspace(1)* %gep.out.0, i32 1 + + %a = load double addrspace(1)* %gep.0 + %b = load double addrspace(1)* %gep.1 + %c = load double addrspace(1)* %gep.2 + %d = load double addrspace(1)* %gep.3 + + %mul = fmul double %a, %b + %fma0 = fsub double %mul, %c + %fma1 = fsub double %mul, %d + store double %fma0, double addrspace(1)* %gep.out.0 + store double %fma1, double addrspace(1)* %gep.out.1 + ret void +} + +; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) +; FUNC-LABEL: {{^}}combine_to_fma_fsub_1_f64: +; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} +; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]] +; SI: buffer_store_dwordx2 [[RESULT]] +define void @combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2 + %gep.out = getelementptr double addrspace(1)* %out, i32 %tid + + %a = load double addrspace(1)* %gep.0 + %b = load double addrspace(1)* %gep.1 + %c = load double addrspace(1)* %gep.2 + + %mul = fmul double %a, %b + %fma = fsub double %c, %mul + store double %fma, double addrspace(1)* %gep.out + ret void +} + +; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) +; FUNC-LABEL: {{^}}combine_to_fma_fsub_1_f64_2use: +; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} +; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}} +; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]] +; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[D]] +; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI: s_endpgm +define void @combine_to_fma_fsub_1_f64_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr double addrspace(1)* %gep.0, i32 3 + %gep.out.0 = getelementptr double addrspace(1)* %out, i32 %tid + %gep.out.1 = getelementptr double addrspace(1)* %gep.out.0, i32 1 + + %a = load double addrspace(1)* %gep.0 + %b = load double addrspace(1)* %gep.1 + %c = load double addrspace(1)* %gep.2 + %d = load double addrspace(1)* %gep.3 + + %mul = fmul double %a, %b + %fma0 = fsub double %c, %mul + %fma1 = fsub double %d, %mul + store double %fma0, double addrspace(1)* %gep.out.0 + store double %fma1, double addrspace(1)* %gep.out.1 + ret void +} + +; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z)) +; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64: +; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} +; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]] +; SI: buffer_store_dwordx2 [[RESULT]] +define void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2 + %gep.out = getelementptr double addrspace(1)* %out, i32 %tid + + %a = load double addrspace(1)* %gep.0 + %b = load double addrspace(1)* %gep.1 + %c = load double addrspace(1)* %gep.2 + + %mul = fmul double %a, %b + %mul.neg = fsub double -0.0, %mul + %fma = fsub double %mul.neg, %c + + store double %fma, double addrspace(1)* %gep.out + ret void +} + +; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z)) +; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64_2uses_neg: +; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} +; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]] +; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[D]] +; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI: s_endpgm +define void @combine_to_fma_fsub_2_f64_2uses_neg(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr double addrspace(1)* %gep.0, i32 3 + %gep.out.0 = getelementptr double addrspace(1)* %out, i32 %tid + %gep.out.1 = getelementptr double addrspace(1)* %gep.out.0, i32 1 + + %a = load double addrspace(1)* %gep.0 + %b = load double addrspace(1)* %gep.1 + %c = load double addrspace(1)* %gep.2 + %d = load double addrspace(1)* %gep.3 + + %mul = fmul double %a, %b + %mul.neg = fsub double -0.0, %mul + %fma0 = fsub double %mul.neg, %c + %fma1 = fsub double %mul.neg, %d + + store double %fma0, double addrspace(1)* %gep.out.0 + store double %fma1, double addrspace(1)* %gep.out.1 + ret void +} + +; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z)) +; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64_2uses_mul: +; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} +; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]] +; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[D]] +; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI: s_endpgm +define void @combine_to_fma_fsub_2_f64_2uses_mul(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr double addrspace(1)* %gep.0, i32 3 + %gep.out.0 = getelementptr double addrspace(1)* %out, i32 %tid + %gep.out.1 = getelementptr double addrspace(1)* %gep.out.0, i32 1 + + %a = load double addrspace(1)* %gep.0 + %b = load double addrspace(1)* %gep.1 + %c = load double addrspace(1)* %gep.2 + %d = load double addrspace(1)* %gep.3 + + %mul = fmul double %a, %b + %mul.neg = fsub double -0.0, %mul + %fma0 = fsub double %mul.neg, %c + %fma1 = fsub double %mul, %d + + store double %fma0, double addrspace(1)* %gep.out.0 + store double %fma1, double addrspace(1)* %gep.out.1 + ret void +} + +; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z))) + +; FUNC-LABEL: {{^}}aggressive_combine_to_fma_fsub_0_f64: +; SI-DAG: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} +; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}} +; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:32{{$}} +; SI: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]], -[[Z]] +; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[FMA0]] +; SI: buffer_store_dwordx2 [[RESULT]] +define void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr double addrspace(1)* %gep.0, i32 3 + %gep.4 = getelementptr double addrspace(1)* %gep.0, i32 4 + %gep.out = getelementptr double addrspace(1)* %out, i32 %tid + + %x = load double addrspace(1)* %gep.0 + %y = load double addrspace(1)* %gep.1 + %z = load double addrspace(1)* %gep.2 + %u = load double addrspace(1)* %gep.3 + %v = load double addrspace(1)* %gep.4 + + %tmp0 = fmul double %u, %v + %tmp1 = call double @llvm.fma.f64(double %x, double %y, double %tmp0) #0 + %tmp2 = fsub double %tmp1, %z + + store double %tmp2, double addrspace(1)* %gep.out + ret void +} + +; fold (fsub x, (fma y, z, (fmul u, v))) +; -> (fma (fneg y), z, (fma (fneg u), v, x)) + +; FUNC-LABEL: {{^}}aggressive_combine_to_fma_fsub_1_f64: +; SI-DAG: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} +; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}} +; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:32{{$}} +; SI: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[U]], [[V]], [[X]] +; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[Y]], [[Z]], [[FMA0]] +; SI: buffer_store_dwordx2 [[RESULT]] +define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr double addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr double addrspace(1)* %gep.0, i32 3 + %gep.4 = getelementptr double addrspace(1)* %gep.0, i32 4 + %gep.out = getelementptr double addrspace(1)* %out, i32 %tid + + %x = load double addrspace(1)* %gep.0 + %y = load double addrspace(1)* %gep.1 + %z = load double addrspace(1)* %gep.2 + %u = load double addrspace(1)* %gep.3 + %v = load double addrspace(1)* %gep.4 + + %tmp0 = fmul double %u, %v + %tmp1 = call double @llvm.fma.f64(double %y, double %z, double %tmp0) #0 + %tmp2 = fsub double %x, %tmp1 + + store double %tmp2, double addrspace(1)* %gep.out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } diff --git a/test/CodeGen/R600/fma.f64.ll b/test/CodeGen/R600/fma.f64.ll index 4b0ab76..bca312b 100644 --- a/test/CodeGen/R600/fma.f64.ll +++ b/test/CodeGen/R600/fma.f64.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare double @llvm.fma.f64(double, double, double) nounwind readnone declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone diff --git a/test/CodeGen/R600/fma.ll b/test/CodeGen/R600/fma.ll index 637e799..f3861ff 100644 --- a/test/CodeGen/R600/fma.ll +++ b/test/CodeGen/R600/fma.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare float @llvm.fma.f32(float, float, float) nounwind readnone diff --git a/test/CodeGen/R600/fmax3.ll b/test/CodeGen/R600/fmax3.ll index cf371b3..629c032 100644 --- a/test/CodeGen/R600/fmax3.ll +++ b/test/CodeGen/R600/fmax3.ll @@ -1,11 +1,12 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare float @llvm.maxnum.f32(float, float) nounwind readnone ; SI-LABEL: {{^}}test_fmax3_olt_0: -; SI: buffer_load_dword [[REGA:v[0-9]+]] -; SI: buffer_load_dword [[REGB:v[0-9]+]] ; SI: buffer_load_dword [[REGC:v[0-9]+]] +; SI: buffer_load_dword [[REGB:v[0-9]+]] +; SI: buffer_load_dword [[REGA:v[0-9]+]] ; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm @@ -21,8 +22,8 @@ define void @test_fmax3_olt_0(float addrspace(1)* %out, float addrspace(1)* %apt ; Commute operand of second fmax ; SI-LABEL: {{^}}test_fmax3_olt_1: -; SI: buffer_load_dword [[REGA:v[0-9]+]] ; SI: buffer_load_dword [[REGB:v[0-9]+]] +; SI: buffer_load_dword [[REGA:v[0-9]+]] ; SI: buffer_load_dword [[REGC:v[0-9]+]] ; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] ; SI: buffer_store_dword [[RESULT]], diff --git a/test/CodeGen/R600/fmax_legacy.f64.ll b/test/CodeGen/R600/fmax_legacy.f64.ll new file mode 100644 index 0000000..762853d --- /dev/null +++ b/test/CodeGen/R600/fmax_legacy.f64.ll @@ -0,0 +1,67 @@ +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; Make sure we don't try to form FMAX_LEGACY nodes with f64 + +declare i32 @llvm.r600.read.tidig.x() #1 + +; FUNC-LABEL: @test_fmax_legacy_uge_f64 +define void @test_fmax_legacy_uge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + + %a = load double addrspace(1)* %gep.0, align 8 + %b = load double addrspace(1)* %gep.1, align 8 + + %cmp = fcmp uge double %a, %b + %val = select i1 %cmp, double %a, double %b + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_fmax_legacy_oge_f64 +define void @test_fmax_legacy_oge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + + %a = load double addrspace(1)* %gep.0, align 8 + %b = load double addrspace(1)* %gep.1, align 8 + + %cmp = fcmp oge double %a, %b + %val = select i1 %cmp, double %a, double %b + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_fmax_legacy_ugt_f64 +define void @test_fmax_legacy_ugt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + + %a = load double addrspace(1)* %gep.0, align 8 + %b = load double addrspace(1)* %gep.1, align 8 + + %cmp = fcmp ugt double %a, %b + %val = select i1 %cmp, double %a, double %b + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_fmax_legacy_ogt_f64 +define void @test_fmax_legacy_ogt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + + %a = load double addrspace(1)* %gep.0, align 8 + %b = load double addrspace(1)* %gep.1, align 8 + + %cmp = fcmp ogt double %a, %b + %val = select i1 %cmp, double %a, double %b + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/R600/fmax_legacy.ll b/test/CodeGen/R600/fmax_legacy.ll index e9d837b..46f0e98 100644 --- a/test/CodeGen/R600/fmax_legacy.ll +++ b/test/CodeGen/R600/fmax_legacy.ll @@ -1,12 +1,17 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s +; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; FIXME: Should replace unsafe-fp-math with no signed zeros. + declare i32 @llvm.r600.read.tidig.x() #1 ; FUNC-LABEL: @test_fmax_legacy_uge_f32 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 -; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] +; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]] +; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]] + ; EG: MAX define void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.r600.read.tidig.x() #1 @@ -24,8 +29,9 @@ define void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace( ; FUNC-LABEL: @test_fmax_legacy_oge_f32 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 -; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] +; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] +; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]] ; EG: MAX define void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.r600.read.tidig.x() #1 @@ -43,8 +49,9 @@ define void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace( ; FUNC-LABEL: @test_fmax_legacy_ugt_f32 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 -; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] +; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]] +; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]] ; EG: MAX define void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.r600.read.tidig.x() #1 @@ -62,8 +69,9 @@ define void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace( ; FUNC-LABEL: @test_fmax_legacy_ogt_f32 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 -; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] +; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] +; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]] ; EG: MAX define void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.r600.read.tidig.x() #1 @@ -79,5 +87,30 @@ define void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace( ret void } + +; FUNC-LABEL: @test_fmax_legacy_ogt_f32_multi_use +; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; SI-NOT: v_max_ +; SI: v_cmp_gt_f32 +; SI-NEXT: v_cndmask_b32 +; SI-NOT: v_max_ + +; EG: MAX +define void @test_fmax_legacy_ogt_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + + %a = load float addrspace(1)* %gep.0, align 4 + %b = load float addrspace(1)* %gep.1, align 4 + + %cmp = fcmp ogt float %a, %b + %val = select i1 %cmp, float %a, float %b + store float %val, float addrspace(1)* %out0, align 4 + store i1 %cmp, i1addrspace(1)* %out1 + ret void +} + attributes #0 = { nounwind } attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/R600/fmaxnum.f64.ll b/test/CodeGen/R600/fmaxnum.f64.ll index 51cbf4d..de563ce 100644 --- a/test/CodeGen/R600/fmaxnum.f64.ll +++ b/test/CodeGen/R600/fmaxnum.f64.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare double @llvm.maxnum.f64(double, double) #0 declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) #0 diff --git a/test/CodeGen/R600/fmaxnum.ll b/test/CodeGen/R600/fmaxnum.ll index 01d30b0..c105598 100644 --- a/test/CodeGen/R600/fmaxnum.ll +++ b/test/CodeGen/R600/fmaxnum.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare float @llvm.maxnum.f32(float, float) #0 declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #0 diff --git a/test/CodeGen/R600/fmin3.ll b/test/CodeGen/R600/fmin3.ll index 7420368..e3acb31 100644 --- a/test/CodeGen/R600/fmin3.ll +++ b/test/CodeGen/R600/fmin3.ll @@ -1,11 +1,13 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare float @llvm.minnum.f32(float, float) nounwind readnone ; SI-LABEL: {{^}}test_fmin3_olt_0: -; SI: buffer_load_dword [[REGA:v[0-9]+]] -; SI: buffer_load_dword [[REGB:v[0-9]+]] ; SI: buffer_load_dword [[REGC:v[0-9]+]] +; SI: buffer_load_dword [[REGB:v[0-9]+]] +; SI: buffer_load_dword [[REGA:v[0-9]+]] ; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm @@ -21,8 +23,8 @@ define void @test_fmin3_olt_0(float addrspace(1)* %out, float addrspace(1)* %apt ; Commute operand of second fmin ; SI-LABEL: {{^}}test_fmin3_olt_1: -; SI: buffer_load_dword [[REGA:v[0-9]+]] ; SI: buffer_load_dword [[REGB:v[0-9]+]] +; SI: buffer_load_dword [[REGA:v[0-9]+]] ; SI: buffer_load_dword [[REGC:v[0-9]+]] ; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] ; SI: buffer_store_dword [[RESULT]], diff --git a/test/CodeGen/R600/fmin_legacy.f64.ll b/test/CodeGen/R600/fmin_legacy.f64.ll new file mode 100644 index 0000000..83043cd --- /dev/null +++ b/test/CodeGen/R600/fmin_legacy.f64.ll @@ -0,0 +1,77 @@ +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare i32 @llvm.r600.read.tidig.x() #1 + +; FUNC-LABEL: @test_fmin_legacy_f64 +define void @test_fmin_legacy_f64(<4 x double> addrspace(1)* %out, <4 x double> inreg %reg0) #0 { + %r0 = extractelement <4 x double> %reg0, i32 0 + %r1 = extractelement <4 x double> %reg0, i32 1 + %r2 = fcmp uge double %r0, %r1 + %r3 = select i1 %r2, double %r1, double %r0 + %vec = insertelement <4 x double> undef, double %r3, i32 0 + store <4 x double> %vec, <4 x double> addrspace(1)* %out, align 16 + ret void +} + +; FUNC-LABEL: @test_fmin_legacy_ule_f64 +define void @test_fmin_legacy_ule_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + + %a = load double addrspace(1)* %gep.0, align 8 + %b = load double addrspace(1)* %gep.1, align 8 + + %cmp = fcmp ule double %a, %b + %val = select i1 %cmp, double %a, double %b + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_fmin_legacy_ole_f64 +define void @test_fmin_legacy_ole_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + + %a = load double addrspace(1)* %gep.0, align 8 + %b = load double addrspace(1)* %gep.1, align 8 + + %cmp = fcmp ole double %a, %b + %val = select i1 %cmp, double %a, double %b + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_fmin_legacy_olt_f64 +define void @test_fmin_legacy_olt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + + %a = load double addrspace(1)* %gep.0, align 8 + %b = load double addrspace(1)* %gep.1, align 8 + + %cmp = fcmp olt double %a, %b + %val = select i1 %cmp, double %a, double %b + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_fmin_legacy_ult_f64 +define void @test_fmin_legacy_ult_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1 + + %a = load double addrspace(1)* %gep.0, align 8 + %b = load double addrspace(1)* %gep.1, align 8 + + %cmp = fcmp ult double %a, %b + %val = select i1 %cmp, double %a, double %b + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/R600/fmin_legacy.ll b/test/CodeGen/R600/fmin_legacy.ll index 2fbdb6b..5014f6c 100644 --- a/test/CodeGen/R600/fmin_legacy.ll +++ b/test/CodeGen/R600/fmin_legacy.ll @@ -1,11 +1,15 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; FIXME: Should replace unsafe-fp-math with no signed zeros. + declare i32 @llvm.r600.read.tidig.x() #1 ; FUNC-LABEL: @test_fmin_legacy_f32 ; EG: MIN * -; SI: v_min_legacy_f32_e32 +; SI-SAFE: v_min_legacy_f32_e32 +; SI-NONAN: v_min_f32_e32 define void @test_fmin_legacy_f32(<4 x float> addrspace(1)* %out, <4 x float> inreg %reg0) #0 { %r0 = extractelement <4 x float> %reg0, i32 0 %r1 = extractelement <4 x float> %reg0, i32 1 @@ -18,8 +22,9 @@ define void @test_fmin_legacy_f32(<4 x float> addrspace(1)* %out, <4 x float> in ; FUNC-LABEL: @test_fmin_legacy_ule_f32 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 -; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] +; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]] +; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]] define void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.r600.read.tidig.x() #1 %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid @@ -36,8 +41,9 @@ define void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace( ; FUNC-LABEL: @test_fmin_legacy_ole_f32 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 -; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] +; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] +; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]] define void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.r600.read.tidig.x() #1 %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid @@ -54,8 +60,9 @@ define void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace( ; FUNC-LABEL: @test_fmin_legacy_olt_f32 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 -; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] +; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] +; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]] define void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.r600.read.tidig.x() #1 %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid @@ -72,8 +79,9 @@ define void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace( ; FUNC-LABEL: @test_fmin_legacy_ult_f32 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 -; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] +; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]] +; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]] define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.r600.read.tidig.x() #1 %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid @@ -88,5 +96,28 @@ define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace( ret void } +; FUNC-LABEL: @test_fmin_legacy_ole_f32_multi_use +; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; SI-NOT: v_min +; SI: v_cmp_le_f32 +; SI-NEXT: v_cndmask_b32 +; SI-NOT: v_min +; SI: s_endpgm +define void @test_fmin_legacy_ole_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + + %a = load float addrspace(1)* %gep.0, align 4 + %b = load float addrspace(1)* %gep.1, align 4 + + %cmp = fcmp ole float %a, %b + %val0 = select i1 %cmp, float %a, float %b + store float %val0, float addrspace(1)* %out0, align 4 + store i1 %cmp, i1 addrspace(1)* %out1 + ret void +} + attributes #0 = { nounwind } attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/R600/fminnum.f64.ll b/test/CodeGen/R600/fminnum.f64.ll index 11b0c20..0f929d6 100644 --- a/test/CodeGen/R600/fminnum.f64.ll +++ b/test/CodeGen/R600/fminnum.f64.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare double @llvm.minnum.f64(double, double) #0 declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0 diff --git a/test/CodeGen/R600/fminnum.ll b/test/CodeGen/R600/fminnum.ll index 65adab6..6b93b83 100644 --- a/test/CodeGen/R600/fminnum.ll +++ b/test/CodeGen/R600/fminnum.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare float @llvm.minnum.f32(float, float) #0 declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #0 diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll index eabb271..6c09aa2 100644 --- a/test/CodeGen/R600/fmul.ll +++ b/test/CodeGen/R600/fmul.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s diff --git a/test/CodeGen/R600/fmul64.ll b/test/CodeGen/R600/fmul64.ll index 0a5f707..9d7787c 100644 --- a/test/CodeGen/R600/fmul64.ll +++ b/test/CodeGen/R600/fmul64.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s ; FUNC-LABEL: {{^}}fmul_f64: ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} diff --git a/test/CodeGen/R600/fmuladd.ll b/test/CodeGen/R600/fmuladd.ll index 16003a5..2b70863 100644 --- a/test/CodeGen/R600/fmuladd.ll +++ b/test/CodeGen/R600/fmuladd.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s declare float @llvm.fmuladd.f32(float, float, float) declare double @llvm.fmuladd.f64(double, double, double) @@ -33,7 +33,7 @@ define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1, ; CHECK-LABEL: {{^}}fmuladd_2.0_a_b_f32 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]] ; CHECK: buffer_store_dword [[RESULT]] define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { @@ -52,7 +52,7 @@ define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* % ; CHECK-LABEL: {{^}}fmuladd_a_2.0_b_f32 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]] ; CHECK: buffer_store_dword [[RESULT]] define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { @@ -71,7 +71,7 @@ define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* % ; CHECK-LABEL: {{^}}fadd_a_a_b_f32: ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]] ; CHECK: buffer_store_dword [[RESULT]] define void @fadd_a_a_b_f32(float addrspace(1)* %out, @@ -93,7 +93,7 @@ define void @fadd_a_a_b_f32(float addrspace(1)* %out, ; CHECK-LABEL: {{^}}fadd_b_a_a_f32: ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]] ; CHECK: buffer_store_dword [[RESULT]] define void @fadd_b_a_a_f32(float addrspace(1)* %out, @@ -115,7 +115,7 @@ define void @fadd_b_a_a_f32(float addrspace(1)* %out, ; CHECK-LABEL: {{^}}fmuladd_neg_2.0_a_b_f32 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]] ; CHECK: buffer_store_dword [[RESULT]] define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { @@ -135,7 +135,7 @@ define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1 ; CHECK-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]] ; CHECK: buffer_store_dword [[RESULT]] define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { @@ -157,7 +157,7 @@ define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspa ; CHECK-LABEL: {{^}}fmuladd_2.0_neg_a_b_f32 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]] ; CHECK: buffer_store_dword [[RESULT]] define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { @@ -179,7 +179,7 @@ define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1 ; CHECK-LABEL: {{^}}fmuladd_2.0_a_neg_b_f32 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]] ; CHECK: buffer_store_dword [[RESULT]] define void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { diff --git a/test/CodeGen/R600/fnearbyint.ll b/test/CodeGen/R600/fnearbyint.ll index 1c1d731..4fa9ada 100644 --- a/test/CodeGen/R600/fnearbyint.ll +++ b/test/CodeGen/R600/fnearbyint.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s -; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s ; This should have the exactly the same output as the test for rint, ; so no need to check anything. diff --git a/test/CodeGen/R600/fneg-fabs.f64.ll b/test/CodeGen/R600/fneg-fabs.f64.ll index 555f4cc..7e6ede6 100644 --- a/test/CodeGen/R600/fneg-fabs.f64.ll +++ b/test/CodeGen/R600/fneg-fabs.f64.ll @@ -1,12 +1,11 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FIXME: Check something here. Currently it seems fabs + fneg aren't ; into 2 modifiers, although theoretically that should work. ; FUNC-LABEL: {{^}}fneg_fabs_fadd_f64: -; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x7fffffff -; SI: v_and_b32_e32 v[[FABS:[0-9]+]], {{s[0-9]+}}, [[IMMREG]] -; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+}}:[[FABS]]{{\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}| define void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y) { %fabs = call double @llvm.fabs.f64(double %x) %fsub = fsub double -0.000000e+00, %fabs @@ -56,8 +55,8 @@ define void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) { } ; FUNC-LABEL: {{^}}fneg_fabs_f64: -; SI: s_load_dwordx2 ; SI: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}} +; SI: s_load_dwordx2 ; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000 ; SI-DAG: v_or_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]], [[IMMREG]] ; SI-DAG: v_mov_b32_e32 v[[LO_V:[0-9]+]], s[[LO_X]] diff --git a/test/CodeGen/R600/fneg-fabs.ll b/test/CodeGen/R600/fneg-fabs.ll index 3cc832f..4fde048 100644 --- a/test/CodeGen/R600/fneg-fabs.ll +++ b/test/CodeGen/R600/fneg-fabs.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}fneg_fabs_fadd_f32: diff --git a/test/CodeGen/R600/fneg.f64.ll b/test/CodeGen/R600/fneg.f64.ll index 7aa08a9..aa6df20 100644 --- a/test/CodeGen/R600/fneg.f64.ll +++ b/test/CodeGen/R600/fneg.f64.ll @@ -1,7 +1,8 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}fneg_f64: -; SI: v_xor_b32 +; GCN: v_xor_b32 define void @fneg_f64(double addrspace(1)* %out, double %in) { %fneg = fsub double -0.000000e+00, %in store double %fneg, double addrspace(1)* %out @@ -9,8 +10,8 @@ define void @fneg_f64(double addrspace(1)* %out, double %in) { } ; FUNC-LABEL: {{^}}fneg_v2f64: -; SI: v_xor_b32 -; SI: v_xor_b32 +; GCN: v_xor_b32 +; GCN: v_xor_b32 define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> %in) { %fneg = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %in store <2 x double> %fneg, <2 x double> addrspace(1)* %out @@ -23,10 +24,10 @@ define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> ; R600: -PV ; R600: -PV -; SI: v_xor_b32 -; SI: v_xor_b32 -; SI: v_xor_b32 -; SI: v_xor_b32 +; GCN: v_xor_b32 +; GCN: v_xor_b32 +; GCN: v_xor_b32 +; GCN: v_xor_b32 define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> %in) { %fneg = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %in store <4 x double> %fneg, <4 x double> addrspace(1)* %out @@ -38,8 +39,7 @@ define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> ; unless the target returns true for isNegFree() ; FUNC-LABEL: {{^}}fneg_free_f64: -; FIXME: Unnecessary copy to VGPRs -; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}} +; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, 0, -{{s\[[0-9]+:[0-9]+\]$}} define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) { %bc = bitcast i64 %in to double %fsub = fsub double 0.0, %bc @@ -47,10 +47,11 @@ define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) { ret void } -; SI-LABEL: {{^}}fneg_fold_f64: +; GCN-LABEL: {{^}}fneg_fold_f64: ; SI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb -; SI-NOT: xor -; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]] +; VI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; GCN-NOT: xor +; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]] define void @fneg_fold_f64(double addrspace(1)* %out, double %in) { %fsub = fsub double -0.0, %in %fmul = fmul double %fsub, %in diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll index c20cf24..a0fd539 100644 --- a/test/CodeGen/R600/fneg.ll +++ b/test/CodeGen/R600/fneg.ll @@ -1,10 +1,11 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}fneg_f32: ; R600: -PV -; SI: v_xor_b32 +; GCN: v_xor_b32 define void @fneg_f32(float addrspace(1)* %out, float %in) { %fneg = fsub float -0.000000e+00, %in store float %fneg, float addrspace(1)* %out @@ -15,8 +16,8 @@ define void @fneg_f32(float addrspace(1)* %out, float %in) { ; R600: -PV ; R600: -PV -; SI: v_xor_b32 -; SI: v_xor_b32 +; GCN: v_xor_b32 +; GCN: v_xor_b32 define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) { %fneg = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in store <2 x float> %fneg, <2 x float> addrspace(1)* %out @@ -29,10 +30,10 @@ define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %i ; R600: -PV ; R600: -PV -; SI: v_xor_b32 -; SI: v_xor_b32 -; SI: v_xor_b32 -; SI: v_xor_b32 +; GCN: v_xor_b32 +; GCN: v_xor_b32 +; GCN: v_xor_b32 +; GCN: v_xor_b32 define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) { %fneg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in store <4 x float> %fneg, <4 x float> addrspace(1)* %out @@ -48,7 +49,7 @@ define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %i ; R600: -KC0[2].Z ; XXX: We could use v_add_f32_e64 with the negate bit here instead. -; SI: v_sub_f32_e64 v{{[0-9]}}, 0.0, s{{[0-9]+$}} +; GCN: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}} define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) { %bc = bitcast i32 %in to float %fsub = fsub float 0.0, %bc @@ -58,8 +59,9 @@ define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) { ; FUNC-LABEL: {{^}}fneg_fold_f32: ; SI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb -; SI-NOT: xor -; SI: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]] +; VI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c +; GCN-NOT: xor +; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]] define void @fneg_fold_f32(float addrspace(1)* %out, float %in) { %fsub = fsub float -0.0, %in %fmul = fmul float %fsub, %in diff --git a/test/CodeGen/R600/fp-classify.ll b/test/CodeGen/R600/fp-classify.ll new file mode 100644 index 0000000..4fac517 --- /dev/null +++ b/test/CodeGen/R600/fp-classify.ll @@ -0,0 +1,131 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare i1 @llvm.AMDGPU.class.f32(float, i32) #1 +declare i1 @llvm.AMDGPU.class.f64(double, i32) #1 +declare i32 @llvm.r600.read.tidig.x() #1 +declare float @llvm.fabs.f32(float) #1 +declare double @llvm.fabs.f64(double) #1 + +; SI-LABEL: {{^}}test_isinf_pattern: +; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x204{{$}} +; SI: v_cmp_class_f32_e32 vcc, s{{[0-9]+}}, [[MASK]] +; SI-NOT: v_cmp +; SI: s_endpgm +define void @test_isinf_pattern(i32 addrspace(1)* nocapture %out, float %x) #0 { + %fabs = tail call float @llvm.fabs.f32(float %x) #1 + %cmp = fcmp oeq float %fabs, 0x7FF0000000000000 + %ext = zext i1 %cmp to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_not_isinf_pattern_0: +; SI-NOT: v_cmp_class +; SI: s_endpgm +define void @test_not_isinf_pattern_0(i32 addrspace(1)* nocapture %out, float %x) #0 { + %fabs = tail call float @llvm.fabs.f32(float %x) #1 + %cmp = fcmp ueq float %fabs, 0x7FF0000000000000 + %ext = zext i1 %cmp to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_not_isinf_pattern_1: +; SI-NOT: v_cmp_class +; SI: s_endpgm +define void @test_not_isinf_pattern_1(i32 addrspace(1)* nocapture %out, float %x) #0 { + %fabs = tail call float @llvm.fabs.f32(float %x) #1 + %cmp = fcmp oeq float %fabs, 0xFFF0000000000000 + %ext = zext i1 %cmp to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_isfinite_pattern_0: +; SI-NOT: v_cmp +; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1f8{{$}} +; SI: v_cmp_class_f32_e32 vcc, s{{[0-9]+}}, [[MASK]] +; SI-NOT: v_cmp +; SI: s_endpgm +define void @test_isfinite_pattern_0(i32 addrspace(1)* nocapture %out, float %x) #0 { + %ord = fcmp ord float %x, 0.000000e+00 + %x.fabs = tail call float @llvm.fabs.f32(float %x) #1 + %ninf = fcmp une float %x.fabs, 0x7FF0000000000000 + %and = and i1 %ord, %ninf + %ext = zext i1 %and to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; Use negative infinity +; SI-LABEL: {{^}}test_isfinite_not_pattern_0: +; SI-NOT: v_cmp_class_f32 +; SI: s_endpgm +define void @test_isfinite_not_pattern_0(i32 addrspace(1)* nocapture %out, float %x) #0 { + %ord = fcmp ord float %x, 0.000000e+00 + %x.fabs = tail call float @llvm.fabs.f32(float %x) #1 + %ninf = fcmp une float %x.fabs, 0xFFF0000000000000 + %and = and i1 %ord, %ninf + %ext = zext i1 %and to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; No fabs +; SI-LABEL: {{^}}test_isfinite_not_pattern_1: +; SI-NOT: v_cmp_class_f32 +; SI: s_endpgm +define void @test_isfinite_not_pattern_1(i32 addrspace(1)* nocapture %out, float %x) #0 { + %ord = fcmp ord float %x, 0.000000e+00 + %ninf = fcmp une float %x, 0x7FF0000000000000 + %and = and i1 %ord, %ninf + %ext = zext i1 %and to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; fabs of different value +; SI-LABEL: {{^}}test_isfinite_not_pattern_2: +; SI-NOT: v_cmp_class_f32 +; SI: s_endpgm +define void @test_isfinite_not_pattern_2(i32 addrspace(1)* nocapture %out, float %x, float %y) #0 { + %ord = fcmp ord float %x, 0.000000e+00 + %x.fabs = tail call float @llvm.fabs.f32(float %y) #1 + %ninf = fcmp une float %x.fabs, 0x7FF0000000000000 + %and = and i1 %ord, %ninf + %ext = zext i1 %and to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; Wrong ordered compare type +; SI-LABEL: {{^}}test_isfinite_not_pattern_3: +; SI-NOT: v_cmp_class_f32 +; SI: s_endpgm +define void @test_isfinite_not_pattern_3(i32 addrspace(1)* nocapture %out, float %x) #0 { + %ord = fcmp uno float %x, 0.000000e+00 + %x.fabs = tail call float @llvm.fabs.f32(float %x) #1 + %ninf = fcmp une float %x.fabs, 0x7FF0000000000000 + %and = and i1 %ord, %ninf + %ext = zext i1 %and to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; Wrong unordered compare +; SI-LABEL: {{^}}test_isfinite_not_pattern_4: +; SI-NOT: v_cmp_class_f32 +; SI: s_endpgm +define void @test_isfinite_not_pattern_4(i32 addrspace(1)* nocapture %out, float %x) #0 { + %ord = fcmp ord float %x, 0.000000e+00 + %x.fabs = tail call float @llvm.fabs.f32(float %x) #1 + %ninf = fcmp one float %x.fabs, 0x7FF0000000000000 + %and = and i1 %ord, %ninf + %ext = zext i1 %and to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/R600/fp16_to_fp.ll b/test/CodeGen/R600/fp16_to_fp.ll index ec3e051..da78f61 100644 --- a/test/CodeGen/R600/fp16_to_fp.ll +++ b/test/CodeGen/R600/fp16_to_fp.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone diff --git a/test/CodeGen/R600/fp32_to_fp16.ll b/test/CodeGen/R600/fp32_to_fp16.ll index e86ee62..c3c65ae 100644 --- a/test/CodeGen/R600/fp32_to_fp16.ll +++ b/test/CodeGen/R600/fp32_to_fp16.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone diff --git a/test/CodeGen/R600/fp_to_sint.f64.ll b/test/CodeGen/R600/fp_to_sint.f64.ll index 09edb40..e641847 100644 --- a/test/CodeGen/R600/fp_to_sint.f64.ll +++ b/test/CodeGen/R600/fp_to_sint.f64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() nounwind readnone diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll index c583ec3..16549c3 100644 --- a/test/CodeGen/R600/fp_to_sint.ll +++ b/test/CodeGen/R600/fp_to_sint.ll @@ -1,16 +1,28 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC + +declare float @llvm.fabs.f32(float) #0 ; FUNC-LABEL: {{^}}fp_to_sint_i32: ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; SI: v_cvt_i32_f32_e32 ; SI: s_endpgm -define void @fp_to_sint_i32 (i32 addrspace(1)* %out, float %in) { +define void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) { %conv = fptosi float %in to i32 store i32 %conv, i32 addrspace(1)* %out ret void } +; FUNC-LABEL: {{^}}fp_to_sint_i32_fabs: +; SI: v_cvt_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}} +define void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) { + %in.fabs = call float @llvm.fabs.f32(float %in) #0 + %conv = fptosi float %in.fabs to i32 + store i32 %conv, i32 addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}fp_to_sint_v2i32: ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} @@ -214,3 +226,5 @@ define void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { store <4 x i64> %conv, <4 x i64> addrspace(1)* %out ret void } + +attributes #0 = { nounwind readnone } diff --git a/test/CodeGen/R600/fp_to_uint.f64.ll b/test/CodeGen/R600/fp_to_uint.f64.ll index 25859bb..1ffe2fa 100644 --- a/test/CodeGen/R600/fp_to_uint.f64.ll +++ b/test/CodeGen/R600/fp_to_uint.f64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() nounwind readnone diff --git a/test/CodeGen/R600/fp_to_uint.ll b/test/CodeGen/R600/fp_to_uint.ll index 91bf4b7..804d90f 100644 --- a/test/CodeGen/R600/fp_to_uint.ll +++ b/test/CodeGen/R600/fp_to_uint.ll @@ -1,29 +1,31 @@ -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=EG -check-prefix=FUNC +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC -; FUNC-LABEL: {{^}}fp_to_uint_i32: +; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i32: ; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} + ; SI: v_cvt_u32_f32_e32 ; SI: s_endpgm -define void @fp_to_uint_i32 (i32 addrspace(1)* %out, float %in) { +define void @fp_to_uint_f32_to_i32 (i32 addrspace(1)* %out, float %in) { %conv = fptoui float %in to i32 store i32 %conv, i32 addrspace(1)* %out ret void } -; FUNC-LABEL: {{^}}fp_to_uint_v2i32: +; FUNC-LABEL: {{^}}fp_to_uint_v2f32_to_v2i32: ; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + ; SI: v_cvt_u32_f32_e32 ; SI: v_cvt_u32_f32_e32 - -define void @fp_to_uint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) { +define void @fp_to_uint_v2f32_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) { %result = fptoui <2 x float> %in to <2 x i32> store <2 x i32> %result, <2 x i32> addrspace(1)* %out ret void } -; FUNC-LABEL: {{^}}fp_to_uint_v4i32: +; FUNC-LABEL: {{^}}fp_to_uint_v4f32_to_v4i32: ; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} @@ -33,14 +35,14 @@ define void @fp_to_uint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) { ; SI: v_cvt_u32_f32_e32 ; SI: v_cvt_u32_f32_e32 -define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { +define void @fp_to_uint_v4f32_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { %value = load <4 x float> addrspace(1) * %in %result = fptoui <4 x float> %value to <4 x i32> store <4 x i32> %result, <4 x i32> addrspace(1)* %out ret void } -; FUNC: {{^}}fp_to_uint_i64: +; FUNC: {{^}}fp_to_uint_f32_to_i64: ; EG-DAG: AND_INT ; EG-DAG: LSHR ; EG-DAG: SUB_INT @@ -64,13 +66,13 @@ define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspac ; EG-DAG: CNDE_INT ; SI: s_endpgm -define void @fp_to_uint_i64(i64 addrspace(1)* %out, float %x) { +define void @fp_to_uint_f32_to_i64(i64 addrspace(1)* %out, float %x) { %conv = fptoui float %x to i64 store i64 %conv, i64 addrspace(1)* %out ret void } -; FUNC: {{^}}fp_to_uint_v2i64: +; FUNC: {{^}}fp_to_uint_v2f32_to_v2i64: ; EG-DAG: AND_INT ; EG-DAG: LSHR ; EG-DAG: SUB_INT @@ -115,13 +117,13 @@ define void @fp_to_uint_i64(i64 addrspace(1)* %out, float %x) { ; EG-DAG: CNDE_INT ; SI: s_endpgm -define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { +define void @fp_to_uint_v2f32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { %conv = fptoui <2 x float> %x to <2 x i64> store <2 x i64> %conv, <2 x i64> addrspace(1)* %out ret void } -; FUNC: {{^}}fp_to_uint_v4i64: +; FUNC: {{^}}fp_to_uint_v4f32_to_v4i64: ; EG-DAG: AND_INT ; EG-DAG: LSHR ; EG-DAG: SUB_INT @@ -208,7 +210,7 @@ define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { ; EG-DAG: CNDE_INT ; SI: s_endpgm -define void @fp_to_uint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { +define void @fp_to_uint_v4f32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { %conv = fptoui <4 x float> %x to <4 x i64> store <4 x i64> %conv, <4 x i64> addrspace(1)* %out ret void diff --git a/test/CodeGen/R600/fpext.ll b/test/CodeGen/R600/fpext.ll index 418395f..734a43b 100644 --- a/test/CodeGen/R600/fpext.ll +++ b/test/CodeGen/R600/fpext.ll @@ -1,9 +1,45 @@ -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; CHECK: {{^}}fpext: -; CHECK: v_cvt_f64_f32_e32 -define void @fpext(double addrspace(1)* %out, float %in) { +; FUNC-LABEL: {{^}}fpext_f32_to_f64: +; SI: v_cvt_f64_f32_e32 {{v\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +define void @fpext_f32_to_f64(double addrspace(1)* %out, float %in) { %result = fpext float %in to double store double %result, double addrspace(1)* %out ret void } + +; FUNC-LABEL: {{^}}fpext_v2f32_to_v2f64: +; SI: v_cvt_f64_f32_e32 +; SI: v_cvt_f64_f32_e32 +define void @fpext_v2f32_to_v2f64(<2 x double> addrspace(1)* %out, <2 x float> %in) { + %result = fpext <2 x float> %in to <2 x double> + store <2 x double> %result, <2 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}fpext_v4f32_to_v4f64: +; SI: v_cvt_f64_f32_e32 +; SI: v_cvt_f64_f32_e32 +; SI: v_cvt_f64_f32_e32 +; SI: v_cvt_f64_f32_e32 +define void @fpext_v4f32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x float> %in) { + %result = fpext <4 x float> %in to <4 x double> + store <4 x double> %result, <4 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}fpext_v8f32_to_v8f64: +; SI: v_cvt_f64_f32_e32 +; SI: v_cvt_f64_f32_e32 +; SI: v_cvt_f64_f32_e32 +; SI: v_cvt_f64_f32_e32 +; SI: v_cvt_f64_f32_e32 +; SI: v_cvt_f64_f32_e32 +; SI: v_cvt_f64_f32_e32 +; SI: v_cvt_f64_f32_e32 +define void @fpext_v8f32_to_v8f64(<8 x double> addrspace(1)* %out, <8 x float> %in) { + %result = fpext <8 x float> %in to <8 x double> + store <8 x double> %result, <8 x double> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fptrunc.ll b/test/CodeGen/R600/fptrunc.ll index 8ac8d3b..385e10e 100644 --- a/test/CodeGen/R600/fptrunc.ll +++ b/test/CodeGen/R600/fptrunc.ll @@ -1,9 +1,45 @@ -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; CHECK: {{^}}fptrunc: -; CHECK: v_cvt_f32_f64_e32 -define void @fptrunc(float addrspace(1)* %out, double %in) { +; FUNC-LABEL: {{^}}fptrunc_f64_to_f32: +; SI: v_cvt_f32_f64_e32 {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} +define void @fptrunc_f64_to_f32(float addrspace(1)* %out, double %in) { %result = fptrunc double %in to float store float %result, float addrspace(1)* %out ret void } + +; FUNC-LABEL: {{^}}fptrunc_v2f64_to_v2f32: +; SI: v_cvt_f32_f64_e32 +; SI: v_cvt_f32_f64_e32 +define void @fptrunc_v2f64_to_v2f32(<2 x float> addrspace(1)* %out, <2 x double> %in) { + %result = fptrunc <2 x double> %in to <2 x float> + store <2 x float> %result, <2 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}fptrunc_v4f64_to_v4f32: +; SI: v_cvt_f32_f64_e32 +; SI: v_cvt_f32_f64_e32 +; SI: v_cvt_f32_f64_e32 +; SI: v_cvt_f32_f64_e32 +define void @fptrunc_v4f64_to_v4f32(<4 x float> addrspace(1)* %out, <4 x double> %in) { + %result = fptrunc <4 x double> %in to <4 x float> + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}fptrunc_v8f64_to_v8f32: +; SI: v_cvt_f32_f64_e32 +; SI: v_cvt_f32_f64_e32 +; SI: v_cvt_f32_f64_e32 +; SI: v_cvt_f32_f64_e32 +; SI: v_cvt_f32_f64_e32 +; SI: v_cvt_f32_f64_e32 +; SI: v_cvt_f32_f64_e32 +; SI: v_cvt_f32_f64_e32 +define void @fptrunc_v8f64_to_v8f32(<8 x float> addrspace(1)* %out, <8 x double> %in) { + %result = fptrunc <8 x double> %in to <8 x float> + store <8 x float> %result, <8 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/frem.ll b/test/CodeGen/R600/frem.ll index c846a77..02a0070 100644 --- a/test/CodeGen/R600/frem.ll +++ b/test/CodeGen/R600/frem.ll @@ -1,16 +1,18 @@ -; RUN: llc -march=r600 -mcpu=SI -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -enable-misched < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -enable-misched < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}frem_f32: -; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}} -; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:0x10 -; SI-DAG: v_cmp -; SI-DAG: v_mul_f32 -; SI: v_rcp_f32_e32 -; SI: v_mul_f32_e32 -; SI: v_mul_f32_e32 -; SI: v_trunc_f32_e32 -; SI: v_mad_f32 -; SI: s_endpgm +; GCN-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}} +; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16 +; GCN-DAG: v_cmp +; GCN-DAG: v_mul_f32 +; GCN: v_rcp_f32_e32 +; GCN: v_mul_f32_e32 +; GCN: v_mul_f32_e32 +; GCN: v_trunc_f32_e32 +; GCN: v_mad_f32 +; GCN: s_endpgm define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) #0 { %gep2 = getelementptr float addrspace(1)* %in2, i32 4 @@ -22,14 +24,14 @@ define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, } ; FUNC-LABEL: {{^}}unsafe_frem_f32: -; SI: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:0x10 -; SI: buffer_load_dword [[X:v[0-9]+]], {{.*}} -; SI: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]] -; SI: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]] -; SI: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]] -; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]] -; SI: buffer_store_dword [[RESULT]] -; SI: s_endpgm +; GCN: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16 +; GCN: buffer_load_dword [[X:v[0-9]+]], {{.*}} +; GCN: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]] +; GCN: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]] +; GCN: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]] +; GCN: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]] +; GCN: buffer_store_dword [[RESULT]] +; GCN: s_endpgm define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) #1 { %gep2 = getelementptr float addrspace(1)* %in2, i32 4 @@ -40,11 +42,17 @@ define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, ret void } -; TODO: This should check something when f64 fdiv is implemented -; correctly - ; FUNC-LABEL: {{^}}frem_f64: -; SI: s_endpgm +; GCN: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0 +; GCN: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0 +; GCN-DAG: v_div_fmas_f64 +; GCN-DAG: v_div_scale_f64 +; GCN-DAG: v_mul_f64 +; CI: v_trunc_f64_e32 +; CI: v_mul_f64 +; GCN: v_add_f64 +; GCN: buffer_store_dwordx2 +; GCN: s_endpgm define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) #0 { %r0 = load double addrspace(1)* %in1, align 8 @@ -55,11 +63,12 @@ define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, } ; FUNC-LABEL: {{^}}unsafe_frem_f64: -; SI: v_rcp_f64_e32 -; SI: v_mul_f64 +; GCN: v_rcp_f64_e32 +; GCN: v_mul_f64 ; SI: v_bfe_u32 -; SI: v_fma_f64 -; SI: s_endpgm +; CI: v_trunc_f64_e32 +; GCN: v_fma_f64 +; GCN: s_endpgm define void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) #1 { %r0 = load double addrspace(1)* %in1, align 8 diff --git a/test/CodeGen/R600/fsqrt.ll b/test/CodeGen/R600/fsqrt.ll index 1f91faf..1fdf3e4 100644 --- a/test/CodeGen/R600/fsqrt.ll +++ b/test/CodeGen/R600/fsqrt.ll @@ -1,4 +1,9 @@ -; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck %s + +; Run with unsafe-fp-math to make sure nothing tries to turn this into 1 / rsqrt(x) ; CHECK: {{^}}fsqrt_f32: ; CHECK: v_sqrt_f32_e32 {{v[0-9]+, v[0-9]+}} diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll index 6e5ccf1..ef90fea 100644 --- a/test/CodeGen/R600/fsub.ll +++ b/test/CodeGen/R600/fsub.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}v_fsub_f32: diff --git a/test/CodeGen/R600/fsub64.ll b/test/CodeGen/R600/fsub64.ll index eca1b62..2d85cc5 100644 --- a/test/CodeGen/R600/fsub64.ll +++ b/test/CodeGen/R600/fsub64.ll @@ -1,12 +1,107 @@ -; RUN: llc -march=r600 -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare double @llvm.fabs.f64(double) #0 ; SI-LABEL: {{^}}fsub_f64: ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { - %r0 = load double addrspace(1)* %in1 - %r1 = load double addrspace(1)* %in2 - %r2 = fsub double %r0, %r1 - store double %r2, double addrspace(1)* %out - ret void + %r0 = load double addrspace(1)* %in1 + %r1 = load double addrspace(1)* %in2 + %r2 = fsub double %r0, %r1 + store double %r2, double addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}fsub_fabs_f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|}} +define void @fsub_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in1, + double addrspace(1)* %in2) { + %r0 = load double addrspace(1)* %in1 + %r1 = load double addrspace(1)* %in2 + %r1.fabs = call double @llvm.fabs.f64(double %r1) #0 + %r2 = fsub double %r0, %r1.fabs + store double %r2, double addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}fsub_fabs_inv_f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], |v\[[0-9]+:[0-9]+\]|, -v\[[0-9]+:[0-9]+\]}} +define void @fsub_fabs_inv_f64(double addrspace(1)* %out, double addrspace(1)* %in1, + double addrspace(1)* %in2) { + %r0 = load double addrspace(1)* %in1 + %r1 = load double addrspace(1)* %in2 + %r0.fabs = call double @llvm.fabs.f64(double %r0) #0 + %r2 = fsub double %r0.fabs, %r1 + store double %r2, double addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_fsub_f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +define void @s_fsub_f64(double addrspace(1)* %out, double %a, double %b) { + %sub = fsub double %a, %b + store double %sub, double addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_fsub_imm_f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], 4.0, -s\[[0-9]+:[0-9]+\]}} +define void @s_fsub_imm_f64(double addrspace(1)* %out, double %a, double %b) { + %sub = fsub double 4.0, %a + store double %sub, double addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_fsub_imm_inv_f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], -4.0, s\[[0-9]+:[0-9]+\]}} +define void @s_fsub_imm_inv_f64(double addrspace(1)* %out, double %a, double %b) { + %sub = fsub double %a, 4.0 + store double %sub, double addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_fsub_self_f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -s\[[0-9]+:[0-9]+\]}} +define void @s_fsub_self_f64(double addrspace(1)* %out, double %a) { + %sub = fsub double %a, %a + store double %sub, double addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}fsub_v2f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +define void @fsub_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) { + %sub = fsub <2 x double> %a, %b + store <2 x double> %sub, <2 x double> addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}fsub_v4f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +define void @fsub_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x double> addrspace(1)* %in, i32 1 + %a = load <4 x double> addrspace(1)* %in + %b = load <4 x double> addrspace(1)* %b_ptr + %result = fsub <4 x double> %a, %b + store <4 x double> %result, <4 x double> addrspace(1)* %out + ret void } + +; SI-LABEL: {{^}}s_fsub_v4f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +define void @s_fsub_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) { + %result = fsub <4 x double> %a, %b + store <4 x double> %result, <4 x double> addrspace(1)* %out, align 16 + ret void +} + +attributes #0 = { nounwind readnone } diff --git a/test/CodeGen/R600/ftrunc.f64.ll b/test/CodeGen/R600/ftrunc.f64.ll index fba6154..21399a8 100644 --- a/test/CodeGen/R600/ftrunc.f64.ll +++ b/test/CodeGen/R600/ftrunc.f64.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s declare double @llvm.trunc.f64(double) nounwind readnone declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone @@ -23,12 +24,12 @@ define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) { ; CI: v_trunc_f64_e32 ; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014 +; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 ; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01 ; SI: s_lshr_b64 +; SI: cmp_lt_i32 ; SI: s_not_b64 ; SI: s_and_b64 -; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 -; SI: cmp_lt_i32 ; SI: cndmask_b32 ; SI: cndmask_b32 ; SI: cmp_gt_i32 diff --git a/test/CodeGen/R600/ftrunc.ll b/test/CodeGen/R600/ftrunc.ll index 0eb1d7d..edc0860 100644 --- a/test/CodeGen/R600/ftrunc.ll +++ b/test/CodeGen/R600/ftrunc.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG --check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI --check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI --check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI --check-prefix=FUNC %s declare float @llvm.trunc.f32(float) nounwind readnone declare <2 x float> @llvm.trunc.v2f32(<2 x float>) nounwind readnone diff --git a/test/CodeGen/R600/gep-address-space.ll b/test/CodeGen/R600/gep-address-space.ll index 036daaf..5c6920d 100644 --- a/test/CodeGen/R600/gep-address-space.ll +++ b/test/CodeGen/R600/gep-address-space.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=CHECK %s -; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind { ; CHECK-LABEL: {{^}}use_gep_address_space: diff --git a/test/CodeGen/R600/global-directive.ll b/test/CodeGen/R600/global-directive.ll index d1244b8..3ba12c2 100644 --- a/test/CodeGen/R600/global-directive.ll +++ b/test/CodeGen/R600/global-directive.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; Make sure the GlobalDirective isn't merged with the function name diff --git a/test/CodeGen/R600/global-extload-i1.ll b/test/CodeGen/R600/global-extload-i1.ll new file mode 100644 index 0000000..67d36ce --- /dev/null +++ b/test/CodeGen/R600/global-extload-i1.ll @@ -0,0 +1,302 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; FIXME: Evergreen broken + +; FUNC-LABEL: {{^}}zextload_global_i1_to_i32: +; SI: buffer_load_ubyte +; SI: buffer_store_dword +; SI: s_endpgm +define void @zextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %a = load i1 addrspace(1)* %in + %ext = zext i1 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_i1_to_i32: +; SI: buffer_load_ubyte +; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}} +; SI: buffer_store_dword +; SI: s_endpgm +define void @sextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %a = load i1 addrspace(1)* %in + %ext = sext i1 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i32: +; SI: s_endpgm +define void @zextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i1> addrspace(1)* %in + %ext = zext <1 x i1> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i32: +; SI: s_endpgm +define void @sextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i1> addrspace(1)* %in + %ext = sext <1 x i1> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i32: +; SI: s_endpgm +define void @zextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i1> addrspace(1)* %in + %ext = zext <2 x i1> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i32: +; SI: s_endpgm +define void @sextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i1> addrspace(1)* %in + %ext = sext <2 x i1> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i32: +; SI: s_endpgm +define void @zextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i1> addrspace(1)* %in + %ext = zext <4 x i1> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i32: +; SI: s_endpgm +define void @sextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i1> addrspace(1)* %in + %ext = sext <4 x i1> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i32: +; SI: s_endpgm +define void @zextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i1> addrspace(1)* %in + %ext = zext <8 x i1> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i32: +; SI: s_endpgm +define void @sextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i1> addrspace(1)* %in + %ext = sext <8 x i1> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i32: +; SI: s_endpgm +define void @zextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i1> addrspace(1)* %in + %ext = zext <16 x i1> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i32: +; SI: s_endpgm +define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i1> addrspace(1)* %in + %ext = sext <16 x i1> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i32: +; XSI: s_endpgm +; define void @zextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind { +; %load = load <32 x i1> addrspace(1)* %in +; %ext = zext <32 x i1> %load to <32 x i32> +; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i32: +; XSI: s_endpgm +; define void @sextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind { +; %load = load <32 x i1> addrspace(1)* %in +; %ext = sext <32 x i1> %load to <32 x i32> +; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i32: +; XSI: s_endpgm +; define void @zextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind { +; %load = load <64 x i1> addrspace(1)* %in +; %ext = zext <64 x i1> %load to <64 x i32> +; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i32: +; XSI: s_endpgm +; define void @sextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind { +; %load = load <64 x i1> addrspace(1)* %in +; %ext = sext <64 x i1> %load to <64 x i32> +; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out +; ret void +; } + +; FUNC-LABEL: {{^}}zextload_global_i1_to_i64: +; SI: buffer_load_ubyte [[LOAD:v[0-9]+]], +; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}} +; SI: buffer_store_dwordx2 +define void @zextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %a = load i1 addrspace(1)* %in + %ext = zext i1 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_i1_to_i64: +; SI: buffer_load_ubyte [[LOAD:v[0-9]+]], +; SI: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}} +; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]] +; SI: buffer_store_dwordx2 +define void @sextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %a = load i1 addrspace(1)* %in + %ext = sext i1 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i64: +; SI: s_endpgm +define void @zextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i1> addrspace(1)* %in + %ext = zext <1 x i1> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i64: +; SI: s_endpgm +define void @sextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i1> addrspace(1)* %in + %ext = sext <1 x i1> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i64: +; SI: s_endpgm +define void @zextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i1> addrspace(1)* %in + %ext = zext <2 x i1> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i64: +; SI: s_endpgm +define void @sextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i1> addrspace(1)* %in + %ext = sext <2 x i1> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i64: +; SI: s_endpgm +define void @zextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i1> addrspace(1)* %in + %ext = zext <4 x i1> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i64: +; SI: s_endpgm +define void @sextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i1> addrspace(1)* %in + %ext = sext <4 x i1> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i64: +; SI: s_endpgm +define void @zextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i1> addrspace(1)* %in + %ext = zext <8 x i1> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i64: +; SI: s_endpgm +define void @sextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i1> addrspace(1)* %in + %ext = sext <8 x i1> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i64: +; SI: s_endpgm +define void @zextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i1> addrspace(1)* %in + %ext = zext <16 x i1> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i64: +; SI: s_endpgm +define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i1> addrspace(1)* %in + %ext = sext <16 x i1> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i64: +; XSI: s_endpgm +; define void @zextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind { +; %load = load <32 x i1> addrspace(1)* %in +; %ext = zext <32 x i1> %load to <32 x i64> +; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i64: +; XSI: s_endpgm +; define void @sextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind { +; %load = load <32 x i1> addrspace(1)* %in +; %ext = sext <32 x i1> %load to <32 x i64> +; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i64: +; XSI: s_endpgm +; define void @zextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind { +; %load = load <64 x i1> addrspace(1)* %in +; %ext = zext <64 x i1> %load to <64 x i64> +; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i64: +; XSI: s_endpgm +; define void @sextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind { +; %load = load <64 x i1> addrspace(1)* %in +; %ext = sext <64 x i1> %load to <64 x i64> +; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out +; ret void +; } diff --git a/test/CodeGen/R600/global-extload-i16.ll b/test/CodeGen/R600/global-extload-i16.ll new file mode 100644 index 0000000..f3e3312 --- /dev/null +++ b/test/CodeGen/R600/global-extload-i16.ll @@ -0,0 +1,302 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; FIXME: cypress is broken because the bigger testcases spill and it's not implemented + +; FUNC-LABEL: {{^}}zextload_global_i16_to_i32: +; SI: buffer_load_ushort +; SI: buffer_store_dword +; SI: s_endpgm +define void @zextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { + %a = load i16 addrspace(1)* %in + %ext = zext i16 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_i16_to_i32: +; SI: buffer_load_sshort +; SI: buffer_store_dword +; SI: s_endpgm +define void @sextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { + %a = load i16 addrspace(1)* %in + %ext = sext i16 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i32: +; SI: buffer_load_ushort +; SI: s_endpgm +define void @zextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i16> addrspace(1)* %in + %ext = zext <1 x i16> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i32: +; SI: buffer_load_sshort +; SI: s_endpgm +define void @sextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i16> addrspace(1)* %in + %ext = sext <1 x i16> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i32: +; SI: s_endpgm +define void @zextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i16> addrspace(1)* %in + %ext = zext <2 x i16> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i32: +; SI: s_endpgm +define void @sextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i16> addrspace(1)* %in + %ext = sext <2 x i16> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i32: +; SI: s_endpgm +define void @zextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i16> addrspace(1)* %in + %ext = zext <4 x i16> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i32: +; SI: s_endpgm +define void @sextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i16> addrspace(1)* %in + %ext = sext <4 x i16> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i32: +; SI: s_endpgm +define void @zextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i16> addrspace(1)* %in + %ext = zext <8 x i16> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i32: +; SI: s_endpgm +define void @sextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i16> addrspace(1)* %in + %ext = sext <8 x i16> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i32: +; SI: s_endpgm +define void @zextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i16> addrspace(1)* %in + %ext = zext <16 x i16> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i32: +; SI: s_endpgm +define void @sextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i16> addrspace(1)* %in + %ext = sext <16 x i16> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i32: +; SI: s_endpgm +define void @zextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <32 x i16> addrspace(1)* %in + %ext = zext <32 x i16> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i32: +; SI: s_endpgm +define void @sextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <32 x i16> addrspace(1)* %in + %ext = sext <32 x i16> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i32: +; SI: s_endpgm +define void @zextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <64 x i16> addrspace(1)* %in + %ext = zext <64 x i16> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i32: +; SI: s_endpgm +define void @sextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <64 x i16> addrspace(1)* %in + %ext = sext <64 x i16> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_i16_to_i64: +; SI: buffer_load_ushort v[[LO:[0-9]+]], +; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} +; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] +define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { + %a = load i16 addrspace(1)* %in + %ext = zext i16 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_i16_to_i64: +; SI: buffer_load_sshort [[LOAD:v[0-9]+]], +; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]] +; SI: buffer_store_dwordx2 +define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { + %a = load i16 addrspace(1)* %in + %ext = sext i16 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i64: +; SI: s_endpgm +define void @zextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i16> addrspace(1)* %in + %ext = zext <1 x i16> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i64: +; SI: s_endpgm +define void @sextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i16> addrspace(1)* %in + %ext = sext <1 x i16> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i64: +; SI: s_endpgm +define void @zextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i16> addrspace(1)* %in + %ext = zext <2 x i16> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i64: +; SI: s_endpgm +define void @sextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i16> addrspace(1)* %in + %ext = sext <2 x i16> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i64: +; SI: s_endpgm +define void @zextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i16> addrspace(1)* %in + %ext = zext <4 x i16> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i64: +; SI: s_endpgm +define void @sextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i16> addrspace(1)* %in + %ext = sext <4 x i16> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i64: +; SI: s_endpgm +define void @zextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i16> addrspace(1)* %in + %ext = zext <8 x i16> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i64: +; SI: s_endpgm +define void @sextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i16> addrspace(1)* %in + %ext = sext <8 x i16> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i64: +; SI: s_endpgm +define void @zextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i16> addrspace(1)* %in + %ext = zext <16 x i16> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i64: +; SI: s_endpgm +define void @sextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i16> addrspace(1)* %in + %ext = sext <16 x i16> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i64: +; SI: s_endpgm +define void @zextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <32 x i16> addrspace(1)* %in + %ext = zext <32 x i16> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i64: +; SI: s_endpgm +define void @sextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <32 x i16> addrspace(1)* %in + %ext = sext <32 x i16> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i64: +; SI: s_endpgm +define void @zextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <64 x i16> addrspace(1)* %in + %ext = zext <64 x i16> %load to <64 x i64> + store <64 x i64> %ext, <64 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i64: +; SI: s_endpgm +define void @sextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <64 x i16> addrspace(1)* %in + %ext = sext <64 x i16> %load to <64 x i64> + store <64 x i64> %ext, <64 x i64> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/global-extload-i32.ll b/test/CodeGen/R600/global-extload-i32.ll new file mode 100644 index 0000000..b3d5438 --- /dev/null +++ b/test/CodeGen/R600/global-extload-i32.ll @@ -0,0 +1,457 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}zextload_global_i32_to_i64: +; SI: buffer_load_dword v[[LO:[0-9]+]], +; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} +; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] +define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %a = load i32 addrspace(1)* %in + %ext = zext i32 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_i32_to_i64: +; SI: buffer_load_dword [[LOAD:v[0-9]+]], +; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]] +; SI: buffer_store_dwordx2 +define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %a = load i32 addrspace(1)* %in + %ext = sext i32 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v1i32_to_v1i64: +; SI: buffer_load_dword +; SI: buffer_store_dwordx2 +; SI: s_endpgm +define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i32> addrspace(1)* %in + %ext = zext <1 x i32> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v1i32_to_v1i64: +; SI: buffer_load_dword +; SI: v_ashrrev_i32 +; SI: buffer_store_dwordx2 +; SI: s_endpgm +define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i32> addrspace(1)* %in + %ext = sext <1 x i32> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v2i32_to_v2i64: +; SI: buffer_load_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: s_endpgm +define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i32> addrspace(1)* %in + %ext = zext <2 x i32> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v2i32_to_v2i64: +; SI: buffer_load_dwordx2 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI: s_endpgm +define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i32> addrspace(1)* %in + %ext = sext <2 x i32> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v4i32_to_v4i64: +; SI: buffer_load_dwordx4 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: s_endpgm +define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i32> addrspace(1)* %in + %ext = zext <4 x i32> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v4i32_to_v4i64: +; SI: buffer_load_dwordx4 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI: s_endpgm +define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i32> addrspace(1)* %in + %ext = sext <4 x i32> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v8i32_to_v8i64: +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI: s_endpgm +define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i32> addrspace(1)* %in + %ext = zext <8 x i32> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v8i32_to_v8i64: +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI: s_endpgm +define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i32> addrspace(1)* %in + %ext = sext <8 x i32> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v16i32_to_v16i64: +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI: s_endpgm +define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i32> addrspace(1)* %in + %ext = sext <16 x i32> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v16i32_to_v16i64 +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 + +; SI: s_endpgm +define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i32> addrspace(1)* %in + %ext = zext <16 x i32> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v32i32_to_v32i64: +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 + +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI: s_endpgm +define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <32 x i32> addrspace(1)* %in + %ext = sext <32 x i32> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v32i32_to_v32i64: +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI: s_endpgm +define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <32 x i32> addrspace(1)* %in + %ext = zext <32 x i32> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/global-extload-i8.ll b/test/CodeGen/R600/global-extload-i8.ll new file mode 100644 index 0000000..4c37f3f --- /dev/null +++ b/test/CodeGen/R600/global-extload-i8.ll @@ -0,0 +1,299 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}zextload_global_i8_to_i32: +; SI: buffer_load_ubyte +; SI: buffer_store_dword +; SI: s_endpgm +define void @zextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { + %a = load i8 addrspace(1)* %in + %ext = zext i8 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_i8_to_i32: +; SI: buffer_load_sbyte +; SI: buffer_store_dword +; SI: s_endpgm +define void @sextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { + %a = load i8 addrspace(1)* %in + %ext = sext i8 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i32: +; SI: s_endpgm +define void @zextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i8> addrspace(1)* %in + %ext = zext <1 x i8> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i32: +; SI: s_endpgm +define void @sextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i8> addrspace(1)* %in + %ext = sext <1 x i8> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i32: +; SI: s_endpgm +define void @zextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i8> addrspace(1)* %in + %ext = zext <2 x i8> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i32: +; SI: s_endpgm +define void @sextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i8> addrspace(1)* %in + %ext = sext <2 x i8> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i32: +; SI: s_endpgm +define void @zextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i8> addrspace(1)* %in + %ext = zext <4 x i8> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i32: +; SI: s_endpgm +define void @sextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i8> addrspace(1)* %in + %ext = sext <4 x i8> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i32: +; SI: s_endpgm +define void @zextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i8> addrspace(1)* %in + %ext = zext <8 x i8> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i32: +; SI: s_endpgm +define void @sextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i8> addrspace(1)* %in + %ext = sext <8 x i8> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i32: +; SI: s_endpgm +define void @zextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i8> addrspace(1)* %in + %ext = zext <16 x i8> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i32: +; SI: s_endpgm +define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i8> addrspace(1)* %in + %ext = sext <16 x i8> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i32: +; XSI: s_endpgm +; define void @zextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind { +; %load = load <32 x i8> addrspace(1)* %in +; %ext = zext <32 x i8> %load to <32 x i32> +; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i32: +; XSI: s_endpgm +; define void @sextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind { +; %load = load <32 x i8> addrspace(1)* %in +; %ext = sext <32 x i8> %load to <32 x i32> +; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i32: +; XSI: s_endpgm +; define void @zextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind { +; %load = load <64 x i8> addrspace(1)* %in +; %ext = zext <64 x i8> %load to <64 x i32> +; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i32: +; XSI: s_endpgm +; define void @sextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind { +; %load = load <64 x i8> addrspace(1)* %in +; %ext = sext <64 x i8> %load to <64 x i32> +; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out +; ret void +; } + +; FUNC-LABEL: {{^}}zextload_global_i8_to_i64: +; SI: buffer_load_ubyte v[[LO:[0-9]+]], +; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} +; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] +define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { + %a = load i8 addrspace(1)* %in + %ext = zext i8 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_i8_to_i64: +; SI: buffer_load_sbyte [[LOAD:v[0-9]+]], +; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]] +; SI: buffer_store_dwordx2 +define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { + %a = load i8 addrspace(1)* %in + %ext = sext i8 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i64: +; SI: s_endpgm +define void @zextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i8> addrspace(1)* %in + %ext = zext <1 x i8> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i64: +; SI: s_endpgm +define void @sextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i8> addrspace(1)* %in + %ext = sext <1 x i8> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i64: +; SI: s_endpgm +define void @zextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i8> addrspace(1)* %in + %ext = zext <2 x i8> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i64: +; SI: s_endpgm +define void @sextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i8> addrspace(1)* %in + %ext = sext <2 x i8> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i64: +; SI: s_endpgm +define void @zextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i8> addrspace(1)* %in + %ext = zext <4 x i8> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i64: +; SI: s_endpgm +define void @sextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i8> addrspace(1)* %in + %ext = sext <4 x i8> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i64: +; SI: s_endpgm +define void @zextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i8> addrspace(1)* %in + %ext = zext <8 x i8> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i64: +; SI: s_endpgm +define void @sextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i8> addrspace(1)* %in + %ext = sext <8 x i8> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i64: +; SI: s_endpgm +define void @zextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i8> addrspace(1)* %in + %ext = zext <16 x i8> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i64: +; SI: s_endpgm +define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i8> addrspace(1)* %in + %ext = sext <16 x i8> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i64: +; XSI: s_endpgm +; define void @zextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind { +; %load = load <32 x i8> addrspace(1)* %in +; %ext = zext <32 x i8> %load to <32 x i64> +; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i64: +; XSI: s_endpgm +; define void @sextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind { +; %load = load <32 x i8> addrspace(1)* %in +; %ext = sext <32 x i8> %load to <32 x i64> +; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i64: +; XSI: s_endpgm +; define void @zextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind { +; %load = load <64 x i8> addrspace(1)* %in +; %ext = zext <64 x i8> %load to <64 x i64> +; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i64: +; XSI: s_endpgm +; define void @sextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind { +; %load = load <64 x i8> addrspace(1)* %in +; %ext = sext <64 x i8> %load to <64 x i64> +; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out +; ret void +; } diff --git a/test/CodeGen/R600/global-zero-initializer.ll b/test/CodeGen/R600/global-zero-initializer.ll index b69b061..6909c58 100644 --- a/test/CodeGen/R600/global-zero-initializer.ll +++ b/test/CodeGen/R600/global-zero-initializer.ll @@ -1,4 +1,5 @@ -; RUN: not llc -march=r600 -mcpu=SI < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mcpu=SI < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mcpu=tonga < %s 2>&1 | FileCheck %s ; CHECK: error: unsupported initializer for address space in load_init_global_global diff --git a/test/CodeGen/R600/global_atomics.ll b/test/CodeGen/R600/global_atomics.ll index 533a964..5a07a02 100644 --- a/test/CodeGen/R600/global_atomics.ll +++ b/test/CodeGen/R600/global_atomics.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s ; FUNC-LABEL: {{^}}atomic_add_i32_offset: -; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}} +; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32 addrspace(1)* %out, i32 4 @@ -10,7 +10,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_add_i32_ret_offset: -; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}} +; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} ; SI: buffer_store_dword [[RET]] define void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -21,7 +21,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_add_i32_addr64_offset: -; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}} +; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} define void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32 addrspace(1)* %out, i64 %index @@ -31,7 +31,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64_offset: -; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}} +; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; SI: buffer_store_dword [[RET]] define void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: @@ -81,7 +81,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_and_i32_offset: -; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}} +; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32 addrspace(1)* %out, i32 4 @@ -90,7 +90,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_and_i32_ret_offset: -; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}} +; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} ; SI: buffer_store_dword [[RET]] define void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -101,7 +101,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_and_i32_addr64_offset: -; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}} +; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} define void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32 addrspace(1)* %out, i64 %index @@ -111,7 +111,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64_offset: -; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}} +; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; SI: buffer_store_dword [[RET]] define void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: @@ -161,7 +161,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_sub_i32_offset: -; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}} +; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32 addrspace(1)* %out, i32 4 @@ -170,7 +170,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_offset: -; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}} +; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} ; SI: buffer_store_dword [[RET]] define void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -181,7 +181,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_sub_i32_addr64_offset: -; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}} +; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} define void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32 addrspace(1)* %out, i64 %index @@ -191,7 +191,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset: -; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}} +; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; SI: buffer_store_dword [[RET]] define void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: @@ -241,7 +241,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_max_i32_offset: -; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}} +; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32 addrspace(1)* %out, i32 4 @@ -250,7 +250,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_max_i32_ret_offset: -; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}} +; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} ; SI: buffer_store_dword [[RET]] define void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -261,7 +261,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_max_i32_addr64_offset: -; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}} +; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} define void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32 addrspace(1)* %out, i64 %index @@ -271,7 +271,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64_offset: -; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}} +; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; SI: buffer_store_dword [[RET]] define void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: @@ -321,7 +321,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umax_i32_offset: -; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}} +; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32 addrspace(1)* %out, i32 4 @@ -330,7 +330,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_offset: -; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}} +; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} ; SI: buffer_store_dword [[RET]] define void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -341,7 +341,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umax_i32_addr64_offset: -; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}} +; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} define void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32 addrspace(1)* %out, i64 %index @@ -351,7 +351,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset: -; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}} +; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; SI: buffer_store_dword [[RET]] define void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: @@ -401,7 +401,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_min_i32_offset: -; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}} +; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32 addrspace(1)* %out, i32 4 @@ -410,7 +410,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_min_i32_ret_offset: -; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}} +; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} ; SI: buffer_store_dword [[RET]] define void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -421,7 +421,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_min_i32_addr64_offset: -; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}} +; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} define void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32 addrspace(1)* %out, i64 %index @@ -431,7 +431,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64_offset: -; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}} +; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; SI: buffer_store_dword [[RET]] define void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: @@ -481,7 +481,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umin_i32_offset: -; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}} +; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32 addrspace(1)* %out, i32 4 @@ -490,7 +490,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_offset: -; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}} +; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} ; SI: buffer_store_dword [[RET]] define void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -501,7 +501,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umin_i32_addr64_offset: -; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}} +; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} define void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32 addrspace(1)* %out, i64 %index @@ -511,7 +511,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset: -; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}} +; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; SI: buffer_store_dword [[RET]] define void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: @@ -561,7 +561,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_or_i32_offset: -; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}} +; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32 addrspace(1)* %out, i32 4 @@ -570,7 +570,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_or_i32_ret_offset: -; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}} +; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} ; SI: buffer_store_dword [[RET]] define void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -581,7 +581,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_or_i32_addr64_offset: -; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}} +; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} define void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32 addrspace(1)* %out, i64 %index @@ -591,7 +591,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64_offset: -; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}} +; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; SI: buffer_store_dword [[RET]] define void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: @@ -641,7 +641,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xchg_i32_offset: -; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}} +; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32 addrspace(1)* %out, i32 4 @@ -650,7 +650,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_offset: -; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}} +; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} ; SI: buffer_store_dword [[RET]] define void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -661,7 +661,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64_offset: -; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}} +; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} define void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32 addrspace(1)* %out, i64 %index @@ -671,7 +671,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset: -; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}} +; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; SI: buffer_store_dword [[RET]] define void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: @@ -721,7 +721,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xor_i32_offset: -; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10{{$}} +; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32 addrspace(1)* %out, i32 4 @@ -730,7 +730,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_offset: -; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:0x10 glc {{$}} +; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} ; SI: buffer_store_dword [[RET]] define void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -741,7 +741,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xor_i32_addr64_offset: -; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10{{$}} +; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} define void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32 addrspace(1)* %out, i64 %index @@ -751,7 +751,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset: -; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:0x10 glc{{$}} +; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; SI: buffer_store_dword [[RET]] define void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: diff --git a/test/CodeGen/R600/gv-const-addrspace-fail.ll b/test/CodeGen/R600/gv-const-addrspace-fail.ll index 905948f..af0df41 100644 --- a/test/CodeGen/R600/gv-const-addrspace-fail.ll +++ b/test/CodeGen/R600/gv-const-addrspace-fail.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; XUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s diff --git a/test/CodeGen/R600/gv-const-addrspace.ll b/test/CodeGen/R600/gv-const-addrspace.ll index 6aa20b8..45af71d 100644 --- a/test/CodeGen/R600/gv-const-addrspace.ll +++ b/test/CodeGen/R600/gv-const-addrspace.ll @@ -1,5 +1,6 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s @b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2 @@ -9,6 +10,7 @@ ; FUNC-LABEL: {{^}}float: ; FIXME: We should be using s_load_dword here. ; SI: buffer_load_dword +; VI: s_load_dword ; EG-DAG: MOV {{\** *}}T2.X ; EG-DAG: MOV {{\** *}}T3.X @@ -31,6 +33,7 @@ entry: ; FIXME: We should be using s_load_dword here. ; SI: buffer_load_dword +; VI: s_load_dword ; EG-DAG: MOV {{\** *}}T2.X ; EG-DAG: MOV {{\** *}}T3.X @@ -53,7 +56,7 @@ entry: @struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ] ; FUNC-LABEL: {{^}}struct_foo_gv_load: -; SI: s_load_dword +; GCN: s_load_dword define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) { %gep = getelementptr inbounds [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index @@ -70,6 +73,7 @@ define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) { ; FUNC-LABEL: {{^}}array_v1_gv_load: ; FIXME: We should be using s_load_dword here. ; SI: buffer_load_dword +; VI: s_load_dword define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) { %gep = getelementptr inbounds [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index %load = load <1 x i32> addrspace(2)* %gep, align 4 diff --git a/test/CodeGen/R600/half.ll b/test/CodeGen/R600/half.ll index 6ad9b2f..35a41c5 100644 --- a/test/CodeGen/R600/half.ll +++ b/test/CodeGen/R600/half.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=SI | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s define void @test_load_store(half addrspace(1)* %in, half addrspace(1)* %out) { ; CHECK-LABEL: {{^}}test_load_store: diff --git a/test/CodeGen/R600/hsa.ll b/test/CodeGen/R600/hsa.ll new file mode 100644 index 0000000..ff75b90 --- /dev/null +++ b/test/CodeGen/R600/hsa.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s + +; HSA: {{^}}simple: +; HSA: .section .hsa.version +; HSA-NEXT: .ascii "HSA Code Unit:0.0:AMD:0.1:GFX8.1:0" +; Make sure we are setting the ATC bit: +; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000 +; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0 + +define void @simple(i32 addrspace(1)* %out) { +entry: + store i32 0, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/i1-copy-implicit-def.ll b/test/CodeGen/R600/i1-copy-implicit-def.ll index 7c5bc04..b11a211 100644 --- a/test/CodeGen/R600/i1-copy-implicit-def.ll +++ b/test/CodeGen/R600/i1-copy-implicit-def.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SILowerI1Copies was not handling IMPLICIT_DEF ; SI-LABEL: {{^}}br_implicit_def: diff --git a/test/CodeGen/R600/i1-copy-phi.ll b/test/CodeGen/R600/i1-copy-phi.ll index bfa8672..430466e 100644 --- a/test/CodeGen/R600/i1-copy-phi.ll +++ b/test/CodeGen/R600/i1-copy-phi.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}br_i1_phi: ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} diff --git a/test/CodeGen/R600/icmp64.ll b/test/CodeGen/R600/icmp64.ll index 870bf7f..0eaa33e 100644 --- a/test/CodeGen/R600/icmp64.ll +++ b/test/CodeGen/R600/icmp64.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}test_i64_eq: ; SI: v_cmp_eq_i64 diff --git a/test/CodeGen/R600/imm.ll b/test/CodeGen/R600/imm.ll index 1fcaf29..9b95fd6 100644 --- a/test/CodeGen/R600/imm.ll +++ b/test/CodeGen/R600/imm.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CHECK %s ; Use a 64-bit value with lo bits that can be represented as an inline constant ; CHECK-LABEL: {{^}}i64_imm_inline_lo: @@ -22,73 +23,100 @@ entry: ret void } -; CHECK-LABEL: {{^}}store_inline_imm_0.0_f32 -; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} +; CHECK-LABEL: {{^}}store_imm_neg_0.0_i64: +; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x80000000 +; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]] +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]] +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_imm_neg_0.0_i64(i64 addrspace(1) *%out) { + store i64 -9223372036854775808, i64 addrspace(1) *%out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_neg_0.0_i32: +; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000 ; CHECK-NEXT: buffer_store_dword [[REG]] +define void @store_inline_imm_neg_0.0_i32(i32 addrspace(1)* %out) { + store i32 -2147483648, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_0.0_f32: +; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} +; CHECK: buffer_store_dword [[REG]] define void @store_inline_imm_0.0_f32(float addrspace(1)* %out) { store float 0.0, float addrspace(1)* %out ret void } -; CHECK-LABEL: {{^}}store_inline_imm_0.5_f32 +; CHECK-LABEL: {{^}}store_imm_neg_0.0_f32: +; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000 +; CHECK: buffer_store_dword [[REG]] +define void @store_imm_neg_0.0_f32(float addrspace(1)* %out) { + store float -0.0, float addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_0.5_f32: ; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0.5{{$}} -; CHECK-NEXT: buffer_store_dword [[REG]] +; CHECK: buffer_store_dword [[REG]] define void @store_inline_imm_0.5_f32(float addrspace(1)* %out) { store float 0.5, float addrspace(1)* %out ret void } -; CHECK-LABEL: {{^}}store_inline_imm_m_0.5_f32 +; CHECK-LABEL: {{^}}store_inline_imm_m_0.5_f32: ; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -0.5{{$}} -; CHECK-NEXT: buffer_store_dword [[REG]] +; CHECK: buffer_store_dword [[REG]] define void @store_inline_imm_m_0.5_f32(float addrspace(1)* %out) { store float -0.5, float addrspace(1)* %out ret void } -; CHECK-LABEL: {{^}}store_inline_imm_1.0_f32 +; CHECK-LABEL: {{^}}store_inline_imm_1.0_f32: ; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0{{$}} -; CHECK-NEXT: buffer_store_dword [[REG]] +; CHECK: buffer_store_dword [[REG]] define void @store_inline_imm_1.0_f32(float addrspace(1)* %out) { store float 1.0, float addrspace(1)* %out ret void } -; CHECK-LABEL: {{^}}store_inline_imm_m_1.0_f32 +; CHECK-LABEL: {{^}}store_inline_imm_m_1.0_f32: ; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -1.0{{$}} -; CHECK-NEXT: buffer_store_dword [[REG]] +; CHECK: buffer_store_dword [[REG]] define void @store_inline_imm_m_1.0_f32(float addrspace(1)* %out) { store float -1.0, float addrspace(1)* %out ret void } -; CHECK-LABEL: {{^}}store_inline_imm_2.0_f32 +; CHECK-LABEL: {{^}}store_inline_imm_2.0_f32: ; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0{{$}} -; CHECK-NEXT: buffer_store_dword [[REG]] +; CHECK: buffer_store_dword [[REG]] define void @store_inline_imm_2.0_f32(float addrspace(1)* %out) { store float 2.0, float addrspace(1)* %out ret void } -; CHECK-LABEL: {{^}}store_inline_imm_m_2.0_f32 +; CHECK-LABEL: {{^}}store_inline_imm_m_2.0_f32: ; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -2.0{{$}} -; CHECK-NEXT: buffer_store_dword [[REG]] +; CHECK: buffer_store_dword [[REG]] define void @store_inline_imm_m_2.0_f32(float addrspace(1)* %out) { store float -2.0, float addrspace(1)* %out ret void } -; CHECK-LABEL: {{^}}store_inline_imm_4.0_f32 +; CHECK-LABEL: {{^}}store_inline_imm_4.0_f32: ; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 4.0{{$}} -; CHECK-NEXT: buffer_store_dword [[REG]] +; CHECK: buffer_store_dword [[REG]] define void @store_inline_imm_4.0_f32(float addrspace(1)* %out) { store float 4.0, float addrspace(1)* %out ret void } -; CHECK-LABEL: {{^}}store_inline_imm_m_4.0_f32 +; CHECK-LABEL: {{^}}store_inline_imm_m_4.0_f32: ; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -4.0{{$}} -; CHECK-NEXT: buffer_store_dword [[REG]] +; CHECK: buffer_store_dword [[REG]] define void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) { store float -4.0, float addrspace(1)* %out ret void @@ -96,106 +124,106 @@ define void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) { ; CHECK-LABEL: {{^}}store_literal_imm_f32: ; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x45800000 -; CHECK-NEXT: buffer_store_dword [[REG]] +; CHECK: buffer_store_dword [[REG]] define void @store_literal_imm_f32(float addrspace(1)* %out) { store float 4096.0, float addrspace(1)* %out ret void } -; CHECK-LABEL: {{^}}add_inline_imm_0.0_f32 +; CHECK-LABEL: {{^}}add_inline_imm_0.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] -; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0.0, [[VAL]]{{$}} -; CHECK-NEXT: buffer_store_dword [[REG]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0, [[VAL]]{{$}} +; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 0.0 store float %y, float addrspace(1)* %out ret void } -; CHECK-LABEL: {{^}}add_inline_imm_0.5_f32 +; CHECK-LABEL: {{^}}add_inline_imm_0.5_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0.5, [[VAL]]{{$}} -; CHECK-NEXT: buffer_store_dword [[REG]] +; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 0.5 store float %y, float addrspace(1)* %out ret void } -; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f32 +; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -0.5, [[VAL]]{{$}} -; CHECK-NEXT: buffer_store_dword [[REG]] +; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, -0.5 store float %y, float addrspace(1)* %out ret void } -; CHECK-LABEL: {{^}}add_inline_imm_1.0_f32 +; CHECK-LABEL: {{^}}add_inline_imm_1.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 1.0, [[VAL]]{{$}} -; CHECK-NEXT: buffer_store_dword [[REG]] +; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 1.0 store float %y, float addrspace(1)* %out ret void } -; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f32 +; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -1.0, [[VAL]]{{$}} -; CHECK-NEXT: buffer_store_dword [[REG]] +; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, -1.0 store float %y, float addrspace(1)* %out ret void } -; CHECK-LABEL: {{^}}add_inline_imm_2.0_f32 +; CHECK-LABEL: {{^}}add_inline_imm_2.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, [[VAL]]{{$}} -; CHECK-NEXT: buffer_store_dword [[REG]] +; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 2.0 store float %y, float addrspace(1)* %out ret void } -; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f32 +; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -2.0, [[VAL]]{{$}} -; CHECK-NEXT: buffer_store_dword [[REG]] +; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, -2.0 store float %y, float addrspace(1)* %out ret void } -; CHECK-LABEL: {{^}}add_inline_imm_4.0_f32 +; CHECK-LABEL: {{^}}add_inline_imm_4.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 4.0, [[VAL]]{{$}} -; CHECK-NEXT: buffer_store_dword [[REG]] +; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 4.0 store float %y, float addrspace(1)* %out ret void } -; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f32 +; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -4.0, [[VAL]]{{$}} -; CHECK-NEXT: buffer_store_dword [[REG]] +; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, -4.0 store float %y, float addrspace(1)* %out ret void } -; CHECK-LABEL: @commute_add_inline_imm_0.5_f32 +; CHECK-LABEL: {{^}}commute_add_inline_imm_0.5_f32: ; CHECK: buffer_load_dword [[VAL:v[0-9]+]] ; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0.5, [[VAL]] -; CHECK-NEXT: buffer_store_dword [[REG]] +; CHECK: buffer_store_dword [[REG]] define void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addrspace(1)* %in) { %x = load float addrspace(1)* %in %y = fadd float %x, 0.5 @@ -203,13 +231,387 @@ define void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addr ret void } -; CHECK-LABEL: @commute_add_literal_f32 +; CHECK-LABEL: {{^}}commute_add_literal_f32: ; CHECK: buffer_load_dword [[VAL:v[0-9]+]] ; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0x44800000, [[VAL]] -; CHECK-NEXT: buffer_store_dword [[REG]] +; CHECK: buffer_store_dword [[REG]] define void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1)* %in) { %x = load float addrspace(1)* %in %y = fadd float %x, 1024.0 store float %y, float addrspace(1)* %out ret void } + +; CHECK-LABEL: {{^}}add_inline_imm_1_f32: +; CHECK: s_load_dword [[VAL:s[0-9]+]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 1, [[VAL]]{{$}} +; CHECK: buffer_store_dword [[REG]] +define void @add_inline_imm_1_f32(float addrspace(1)* %out, float %x) { + %y = fadd float %x, 0x36a0000000000000 + store float %y, float addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_2_f32: +; CHECK: s_load_dword [[VAL:s[0-9]+]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 2, [[VAL]]{{$}} +; CHECK: buffer_store_dword [[REG]] +define void @add_inline_imm_2_f32(float addrspace(1)* %out, float %x) { + %y = fadd float %x, 0x36b0000000000000 + store float %y, float addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_16_f32: +; CHECK: s_load_dword [[VAL:s[0-9]+]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 16, [[VAL]] +; CHECK: buffer_store_dword [[REG]] +define void @add_inline_imm_16_f32(float addrspace(1)* %out, float %x) { + %y = fadd float %x, 0x36e0000000000000 + store float %y, float addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f32: +; CHECK: s_load_dword [[VAL:s[0-9]+]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -1, [[VAL]] +; CHECK: buffer_store_dword [[REG]] +define void @add_inline_imm_neg_1_f32(float addrspace(1)* %out, float %x) { + %y = fadd float %x, 0xffffffffe0000000 + store float %y, float addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f32: +; CHECK: s_load_dword [[VAL:s[0-9]+]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -2, [[VAL]] +; CHECK: buffer_store_dword [[REG]] +define void @add_inline_imm_neg_2_f32(float addrspace(1)* %out, float %x) { + %y = fadd float %x, 0xffffffffc0000000 + store float %y, float addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f32: +; CHECK: s_load_dword [[VAL:s[0-9]+]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -16, [[VAL]] +; CHECK: buffer_store_dword [[REG]] +define void @add_inline_imm_neg_16_f32(float addrspace(1)* %out, float %x) { + %y = fadd float %x, 0xfffffffe00000000 + store float %y, float addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_63_f32: +; CHECK: s_load_dword [[VAL:s[0-9]+]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 63, [[VAL]] +; CHECK: buffer_store_dword [[REG]] +define void @add_inline_imm_63_f32(float addrspace(1)* %out, float %x) { + %y = fadd float %x, 0x36ff800000000000 + store float %y, float addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_64_f32: +; CHECK: s_load_dword [[VAL:s[0-9]+]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 64, [[VAL]] +; CHECK: buffer_store_dword [[REG]] +define void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) { + %y = fadd float %x, 0x3700000000000000 + store float %y, float addrspace(1)* %out + ret void +} + + +; CHECK-LABEL: {{^}}add_inline_imm_0.0_f64: +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0, [[VAL]] +; CHECK: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0.0 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_0.5_f64: +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0.5, [[VAL]] +; CHECK: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0.5 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f64: +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -0.5, [[VAL]] +; CHECK: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, -0.5 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_1.0_f64: +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1.0, [[VAL]] +; CHECK: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 1.0 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f64: +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1.0, [[VAL]] +; CHECK: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, -1.0 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_2.0_f64: +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2.0, [[VAL]] +; CHECK: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 2.0 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f64: +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2.0, [[VAL]] +; CHECK: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, -2.0 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_4.0_f64: +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 4.0, [[VAL]] +; CHECK: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 4.0 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f64: +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -4.0, [[VAL]] +; CHECK: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, -4.0 + store double %y, double addrspace(1)* %out + ret void +} + + +; CHECK-LABEL: {{^}}add_inline_imm_1_f64: +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1, [[VAL]] +; CHECK: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0x0000000000000001 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_2_f64: +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2, [[VAL]] +; CHECK: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0x0000000000000002 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_16_f64: +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 16, [[VAL]] +; CHECK: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0x0000000000000010 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f64: +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1, [[VAL]] +; CHECK: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0xffffffffffffffff + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f64: +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2, [[VAL]] +; CHECK: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0xfffffffffffffffe + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f64: +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -16, [[VAL]] +; CHECK: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0xfffffffffffffff0 + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_63_f64: +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 63, [[VAL]] +; CHECK: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0x000000000000003F + store double %y, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}add_inline_imm_64_f64: +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 64, [[VAL]] +; CHECK: buffer_store_dwordx2 [[REG]] +define void @add_inline_imm_64_f64(double addrspace(1)* %out, double %x) { + %y = fadd double %x, 0x0000000000000040 + store double %y, double addrspace(1)* %out + ret void +} + + +; CHECK-LABEL: {{^}}store_inline_imm_0.0_f64: +; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0 +; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0 +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inline_imm_0.0_f64(double addrspace(1)* %out) { + store double 0.0, double addrspace(1)* %out + ret void +} + + +; CHECK-LABEL: {{^}}store_literal_imm_neg_0.0_f64: +; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x80000000 +; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]] +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]] +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_literal_imm_neg_0.0_f64(double addrspace(1)* %out) { + store double -0.0, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_0.5_f64: +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3fe00000 +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inline_imm_0.5_f64(double addrspace(1)* %out) { + store double 0.5, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_m_0.5_f64: +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbfe00000 +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inline_imm_m_0.5_f64(double addrspace(1)* %out) { + store double -0.5, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_1.0_f64: +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3ff00000 +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inline_imm_1.0_f64(double addrspace(1)* %out) { + store double 1.0, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_m_1.0_f64: +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbff00000 +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inline_imm_m_1.0_f64(double addrspace(1)* %out) { + store double -1.0, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_2.0_f64: +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 2.0 +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inline_imm_2.0_f64(double addrspace(1)* %out) { + store double 2.0, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_m_2.0_f64: +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], -2.0 +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inline_imm_m_2.0_f64(double addrspace(1)* %out) { + store double -2.0, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_4.0_f64: +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x40100000 +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inline_imm_4.0_f64(double addrspace(1)* %out) { + store double 4.0, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_inline_imm_m_4.0_f64: +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xc0100000 +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_inline_imm_m_4.0_f64(double addrspace(1)* %out) { + store double -4.0, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}store_literal_imm_f64: +; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x40b00000 +; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]] +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]] +; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +define void @store_literal_imm_f64(double addrspace(1)* %out) { + store double 4096.0, double addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/indirect-addressing-si.ll b/test/CodeGen/R600/indirect-addressing-si.ll index 0ba1614..9cd2d84 100644 --- a/test/CodeGen/R600/indirect-addressing-si.ll +++ b/test/CodeGen/R600/indirect-addressing-si.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ; Tests for indirect addressing on SI, which is implemented using dynamic ; indexing of vectors. diff --git a/test/CodeGen/R600/indirect-private-64.ll b/test/CodeGen/R600/indirect-private-64.ll index e0a6ce1..cb06d60 100644 --- a/test/CodeGen/R600/indirect-private-64.ll +++ b/test/CodeGen/R600/indirect-private-64.ll @@ -1,5 +1,7 @@ -; RUN: llc -march=r600 -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s -; RUN: llc -march=r600 -mcpu=SI -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind diff --git a/test/CodeGen/R600/infinite-loop.ll b/test/CodeGen/R600/infinite-loop.ll index 48edab0..7233aa5 100644 --- a/test/CodeGen/R600/infinite-loop.ll +++ b/test/CodeGen/R600/infinite-loop.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}infinite_loop: ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7 diff --git a/test/CodeGen/R600/inline-asm.ll b/test/CodeGen/R600/inline-asm.ll new file mode 100644 index 0000000..efc2292 --- /dev/null +++ b/test/CodeGen/R600/inline-asm.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s + +; CHECK: {{^}}inline_asm: +; CHECK: s_endpgm +; CHECK: s_endpgm +define void @inline_asm(i32 addrspace(1)* %out) { +entry: + store i32 5, i32 addrspace(1)* %out + call void asm sideeffect "s_endpgm", ""() + ret void +} diff --git a/test/CodeGen/R600/inline-calls.ll b/test/CodeGen/R600/inline-calls.ll index 3bceeca..33a4c83 100644 --- a/test/CodeGen/R600/inline-calls.ll +++ b/test/CodeGen/R600/inline-calls.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s ; CHECK-NOT: {{^}}func: diff --git a/test/CodeGen/R600/input-mods.ll b/test/CodeGen/R600/input-mods.ll index e3e9499..1c4d285 100644 --- a/test/CodeGen/R600/input-mods.ll +++ b/test/CodeGen/R600/input-mods.ll @@ -1,13 +1,13 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK -;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG +;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM -;EG-CHECK-LABEL: {{^}}test: -;EG-CHECK: EXP_IEEE * -;CM-CHECK-LABEL: {{^}}test: -;CM-CHECK: EXP_IEEE T{{[0-9]+}}.X, -|T{{[0-9]+}}.X| -;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Y (MASKED), -|T{{[0-9]+}}.X| -;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Z (MASKED), -|T{{[0-9]+}}.X| -;CM-CHECK: EXP_IEEE * T{{[0-9]+}}.W (MASKED), -|T{{[0-9]+}}.X| +;EG-LABEL: {{^}}test: +;EG: EXP_IEEE * +;CM-LABEL: {{^}}test: +;CM: EXP_IEEE T{{[0-9]+}}.X, -|T{{[0-9]+}}.X| +;CM: EXP_IEEE T{{[0-9]+}}.Y (MASKED), -|T{{[0-9]+}}.X| +;CM: EXP_IEEE T{{[0-9]+}}.Z (MASKED), -|T{{[0-9]+}}.X| +;CM: EXP_IEEE * T{{[0-9]+}}.W (MASKED), -|T{{[0-9]+}}.X| define void @test(<4 x float> inreg %reg0) #0 { %r0 = extractelement <4 x float> %reg0, i32 0 diff --git a/test/CodeGen/R600/insert_subreg.ll b/test/CodeGen/R600/insert_subreg.ll index e311e19..4a5e886 100644 --- a/test/CodeGen/R600/insert_subreg.ll +++ b/test/CodeGen/R600/insert_subreg.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s ; Test that INSERT_SUBREG instructions don't have non-register operands after ; instruction selection. diff --git a/test/CodeGen/R600/insert_vector_elt.ll b/test/CodeGen/R600/insert_vector_elt.ll index 857c414..64afddc 100644 --- a/test/CodeGen/R600/insert_vector_elt.ll +++ b/test/CodeGen/R600/insert_vector_elt.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI %s ; FIXME: Broken on evergreen ; FIXME: For some reason the 8 and 16 vectors are being stored as diff --git a/test/CodeGen/R600/kernel-args.ll b/test/CodeGen/R600/kernel-args.ll index 9a7da90..5db45ce 100644 --- a/test/CodeGen/R600/kernel-args.ll +++ b/test/CodeGen/R600/kernel-args.ll @@ -1,11 +1,11 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK -; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC +; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC -; EG-CHECK-LABEL: {{^}}i8_arg: -; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z -; SI-CHECK-LABEL: {{^}}i8_arg: -; SI-CHECK: buffer_load_ubyte +; FUNC-LABEL: {{^}}i8_arg: +; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; GCN: buffer_load_ubyte define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { entry: @@ -14,10 +14,10 @@ entry: ret void } -; EG-CHECK-LABEL: {{^}}i8_zext_arg: -; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z -; SI-CHECK-LABEL: {{^}}i8_zext_arg: -; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb +; FUNC-LABEL: {{^}}i8_zext_arg: +; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb +; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { entry: @@ -26,10 +26,10 @@ entry: ret void } -; EG-CHECK-LABEL: {{^}}i8_sext_arg: -; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z -; SI-CHECK-LABEL: {{^}}i8_sext_arg: -; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb +; FUNC-LABEL: {{^}}i8_sext_arg: +; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb +; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind { entry: @@ -38,10 +38,9 @@ entry: ret void } -; EG-CHECK-LABEL: {{^}}i16_arg: -; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z -; SI-CHECK-LABEL: {{^}}i16_arg: -; SI-CHECK: buffer_load_ushort +; FUNC-LABEL: {{^}}i16_arg: +; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; GCN: buffer_load_ushort define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { entry: @@ -50,10 +49,10 @@ entry: ret void } -; EG-CHECK-LABEL: {{^}}i16_zext_arg: -; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z -; SI-CHECK-LABEL: {{^}}i16_zext_arg: -; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb +; FUNC-LABEL: {{^}}i16_zext_arg: +; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb +; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { entry: @@ -62,10 +61,10 @@ entry: ret void } -; EG-CHECK-LABEL: {{^}}i16_sext_arg: -; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z -; SI-CHECK-LABEL: {{^}}i16_sext_arg: -; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb +; FUNC-LABEL: {{^}}i16_sext_arg: +; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb +; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind { entry: @@ -74,380 +73,369 @@ entry: ret void } -; EG-CHECK-LABEL: {{^}}i32_arg: -; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z -; SI-CHECK-LABEL: {{^}}i32_arg: -; s_load_dword s{{[0-9]}}, s[0:1], 0xb +; FUNC-LABEL: {{^}}i32_arg: +; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z +; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb +; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind { entry: store i32 %in, i32 addrspace(1)* %out, align 4 ret void } -; EG-CHECK-LABEL: {{^}}f32_arg: -; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z -; SI-CHECK-LABEL: {{^}}f32_arg: -; s_load_dword s{{[0-9]}}, s[0:1], 0xb +; FUNC-LABEL: {{^}}f32_arg: +; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z +; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb +; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind { entry: store float %in, float addrspace(1)* %out, align 4 ret void } -; EG-CHECK-LABEL: {{^}}v2i8_arg: -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; SI-CHECK-LABEL: {{^}}v2i8_arg: -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte +; FUNC-LABEL: {{^}}v2i8_arg: +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) { entry: store <2 x i8> %in, <2 x i8> addrspace(1)* %out ret void } -; EG-CHECK-LABEL: {{^}}v2i16_arg: -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; SI-CHECK-LABEL: {{^}}v2i16_arg: -; SI-CHECK-DAG: buffer_load_ushort -; SI-CHECK-DAG: buffer_load_ushort +; FUNC-LABEL: {{^}}v2i16_arg: +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; GCN-DAG: buffer_load_ushort +; GCN-DAG: buffer_load_ushort define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) { entry: store <2 x i16> %in, <2 x i16> addrspace(1)* %out ret void } -; EG-CHECK-LABEL: {{^}}v2i32_arg: -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W -; SI-CHECK-LABEL: {{^}}v2i32_arg: -; SI-CHECK: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb +; FUNC-LABEL: {{^}}v2i32_arg: +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W +; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb +; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind { entry: store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4 ret void } -; EG-CHECK-LABEL: {{^}}v2f32_arg: -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W -; SI-CHECK-LABEL: {{^}}v2f32_arg: -; SI-CHECK: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb +; FUNC-LABEL: {{^}}v2f32_arg: +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W +; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb +; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind { entry: store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4 ret void } -; EG-CHECK-LABEL: {{^}}v3i8_arg: +; FUNC-LABEL: {{^}}v3i8_arg: ; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40 ; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41 ; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42 -; SI-CHECK-LABEL: {{^}}v3i8_arg: define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind { entry: store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4 ret void } -; EG-CHECK-LABEL: {{^}}v3i16_arg: +; FUNC-LABEL: {{^}}v3i16_arg: ; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44 ; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46 ; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48 -; SI-CHECK-LABEL: {{^}}v3i16_arg: define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind { entry: store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4 ret void } -; EG-CHECK-LABEL: {{^}}v3i32_arg: -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W -; SI-CHECK-LABEL: {{^}}v3i32_arg: -; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd +; FUNC-LABEL: {{^}}v3i32_arg: +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W +; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd +; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34 define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind { entry: store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4 ret void } -; EG-CHECK-LABEL: {{^}}v3f32_arg: -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W -; SI-CHECK-LABEL: {{^}}v3f32_arg: -; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd +; FUNC-LABEL: {{^}}v3f32_arg: +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W +; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd +; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34 define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind { entry: store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4 ret void } -; EG-CHECK-LABEL: {{^}}v4i8_arg: -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; SI-CHECK-LABEL: {{^}}v4i8_arg: -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte +; FUNC-LABEL: {{^}}v4i8_arg: +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { entry: store <4 x i8> %in, <4 x i8> addrspace(1)* %out ret void } -; EG-CHECK-LABEL: {{^}}v4i16_arg: -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; SI-CHECK-LABEL: {{^}}v4i16_arg: -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort +; FUNC-LABEL: {{^}}v4i16_arg: +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) { entry: store <4 x i16> %in, <4 x i16> addrspace(1)* %out ret void } -; EG-CHECK-LABEL: {{^}}v4i32_arg: -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X -; SI-CHECK-LABEL: {{^}}v4i32_arg: -; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd +; FUNC-LABEL: {{^}}v4i32_arg: +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X +; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd +; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34 define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind { entry: store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4 ret void } -; EG-CHECK-LABEL: {{^}}v4f32_arg: -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X -; SI-CHECK-LABEL: {{^}}v4f32_arg: -; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd +; FUNC-LABEL: {{^}}v4f32_arg: +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X +; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd +; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34 define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind { entry: store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4 ret void } -; EG-CHECK-LABEL: {{^}}v8i8_arg: -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; SI-CHECK-LABEL: {{^}}v8i8_arg: -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte +; FUNC-LABEL: {{^}}v8i8_arg: +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { entry: store <8 x i8> %in, <8 x i8> addrspace(1)* %out ret void } -; EG-CHECK-LABEL: {{^}}v8i16_arg: -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; SI-CHECK-LABEL: {{^}}v8i16_arg: -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort +; FUNC-LABEL: {{^}}v8i16_arg: +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) { entry: store <8 x i16> %in, <8 x i16> addrspace(1)* %out ret void } -; EG-CHECK-LABEL: {{^}}v8i32_arg: -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X -; SI-CHECK-LABEL: {{^}}v8i32_arg: -; SI-CHECK: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11 +; FUNC-LABEL: {{^}}v8i32_arg: +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X +; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11 +; VI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x44 define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind { entry: store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4 ret void } -; EG-CHECK-LABEL: {{^}}v8f32_arg: -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X -; SI-CHECK-LABEL: {{^}}v8f32_arg: -; SI-CHECK: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11 +; FUNC-LABEL: {{^}}v8f32_arg: +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X +; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11 define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind { entry: store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4 ret void } -; EG-CHECK-LABEL: {{^}}v16i8_arg: -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; EG-CHECK: VTX_READ_8 -; SI-CHECK-LABEL: {{^}}v16i8_arg: -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte +; FUNC-LABEL: {{^}}v16i8_arg: +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) { entry: store <16 x i8> %in, <16 x i8> addrspace(1)* %out ret void } -; EG-CHECK-LABEL: {{^}}v16i16_arg: -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; EG-CHECK: VTX_READ_16 -; SI-CHECK-LABEL: {{^}}v16i16_arg: -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort +; FUNC-LABEL: {{^}}v16i16_arg: +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) { entry: store <16 x i16> %in, <16 x i16> addrspace(1)* %out ret void } -; EG-CHECK-LABEL: {{^}}v16i32_arg: -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X -; SI-CHECK-LABEL: {{^}}v16i32_arg: -; SI-CHECK: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19 +; FUNC-LABEL: {{^}}v16i32_arg: +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X +; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19 +; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64 define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind { entry: store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4 ret void } -; EG-CHECK-LABEL: {{^}}v16f32_arg: -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W -; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X -; SI-CHECK-LABEL: {{^}}v16f32_arg: -; SI-CHECK: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19 +; FUNC-LABEL: {{^}}v16f32_arg: +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W +; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X +; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19 +; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64 define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind { entry: store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4 @@ -455,18 +443,18 @@ entry: } ; FUNC-LABEL: {{^}}kernel_arg_i64: -; SI: s_load_dwordx2 -; SI: s_load_dwordx2 -; SI: buffer_store_dwordx2 +; GCN: s_load_dwordx2 +; GCN: s_load_dwordx2 +; GCN: buffer_store_dwordx2 define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind { store i64 %a, i64 addrspace(1)* %out, align 8 ret void } ; XFUNC-LABEL: {{^}}kernel_arg_v1i64: -; XSI: s_load_dwordx2 -; XSI: s_load_dwordx2 -; XSI: buffer_store_dwordx2 +; XGCN: s_load_dwordx2 +; XGCN: s_load_dwordx2 +; XGCN: buffer_store_dwordx2 ; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind { ; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8 ; ret void diff --git a/test/CodeGen/R600/large-alloca.ll b/test/CodeGen/R600/large-alloca.ll index d8be6d4..788816c 100644 --- a/test/CodeGen/R600/large-alloca.ll +++ b/test/CodeGen/R600/large-alloca.ll @@ -1,6 +1,7 @@ ; XFAIL: * ; REQUIRES: asserts -; RUN: llc -march=r600 -mcpu=SI < %s +; RUN: llc -march=amdgcn -mcpu=SI < %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s define void @large_alloca(i32 addrspace(1)* %out, i32 %x, i32 %y) nounwind { %large = alloca [8192 x i32], align 4 diff --git a/test/CodeGen/R600/large-constant-initializer.ll b/test/CodeGen/R600/large-constant-initializer.ll index 5612dd3..c8671ef 100644 --- a/test/CodeGen/R600/large-constant-initializer.ll +++ b/test/CodeGen/R600/large-constant-initializer.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI < %s +; RUN: llc -march=amdgcn -mcpu=SI < %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s ; CHECK: s_endpgm @gv = external unnamed_addr addrspace(2) constant [239 x i32], align 4 diff --git a/test/CodeGen/R600/lds-initializer.ll b/test/CodeGen/R600/lds-initializer.ll index 91d5d12..7344eff 100644 --- a/test/CodeGen/R600/lds-initializer.ll +++ b/test/CodeGen/R600/lds-initializer.ll @@ -1,4 +1,5 @@ -; RUN: not llc -march=r600 -mcpu=SI < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mcpu=SI < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mcpu=tonga < %s 2>&1 | FileCheck %s ; CHECK: error: unsupported initializer for address space in load_init_lds_global diff --git a/test/CodeGen/R600/lds-zero-initializer.ll b/test/CodeGen/R600/lds-zero-initializer.ll index 23912a9..1fb6f52 100644 --- a/test/CodeGen/R600/lds-zero-initializer.ll +++ b/test/CodeGen/R600/lds-zero-initializer.ll @@ -1,4 +1,5 @@ -; RUN: not llc -march=r600 -mcpu=SI < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mcpu=SI < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mcpu=tonga < %s 2>&1 | FileCheck %s ; CHECK: error: unsupported initializer for address space in load_zeroinit_lds_global diff --git a/test/CodeGen/R600/llvm.AMDGPU.abs.ll b/test/CodeGen/R600/llvm.AMDGPU.abs.ll index b4aede8..8bc2583 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.abs.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.abs.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.AMDGPU.abs(i32) nounwind readnone diff --git a/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll b/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll index 98f6695..a11d9ae 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll @@ -1,8 +1,10 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}test_barrier_global: ; EG: GROUP_BARRIER +; SI: buffer_store_dword +; SI: s_waitcnt ; SI: s_barrier define void @test_barrier_global(i32 addrspace(1)* %out) { diff --git a/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll b/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll index 92fe9f2..76c2453 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll @@ -1,8 +1,11 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}test_barrier_local: ; EG: GROUP_BARRIER + +; SI: buffer_store_dword +; SI: s_waitcnt ; SI: s_barrier define void @test_barrier_local(i32 addrspace(1)* %out) { diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll index 0b60d0d..2ec2546 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll index 0794ac4..6cd0108 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.AMDGPU.bfe.u32(i32, i32, i32) nounwind readnone diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfi.ll b/test/CodeGen/R600/llvm.AMDGPU.bfi.ll index df61b0b..517a55a 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.bfi.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.bfi.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.AMDGPU.bfi(i32, i32, i32) nounwind readnone diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfm.ll b/test/CodeGen/R600/llvm.AMDGPU.bfm.ll index 0ba4af5..2346f40 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.bfm.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.bfm.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.AMDGPU.bfm(i32, i32) nounwind readnone diff --git a/test/CodeGen/R600/llvm.AMDGPU.brev.ll b/test/CodeGen/R600/llvm.AMDGPU.brev.ll index 647df34..3973f53 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.brev.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.brev.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.AMDGPU.brev(i32) nounwind readnone diff --git a/test/CodeGen/R600/llvm.AMDGPU.clamp.ll b/test/CodeGen/R600/llvm.AMDGPU.clamp.ll index c6efdb9..11ec963 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.clamp.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.clamp.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare float @llvm.fabs.f32(float) nounwind readnone diff --git a/test/CodeGen/R600/llvm.AMDGPU.class.ll b/test/CodeGen/R600/llvm.AMDGPU.class.ll new file mode 100644 index 0000000..f111eb9 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.class.ll @@ -0,0 +1,497 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare i1 @llvm.AMDGPU.class.f32(float, i32) #1 +declare i1 @llvm.AMDGPU.class.f64(double, i32) #1 +declare i32 @llvm.r600.read.tidig.x() #1 +declare float @llvm.fabs.f32(float) #1 +declare double @llvm.fabs.f64(double) #1 + +; SI-LABEL: {{^}}test_class_f32: +; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc +; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] +; SI: v_cmp_class_f32_e32 vcc, [[SA]], [[VB]] +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc +; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @test_class_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { + %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 %b) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_class_fabs_f32: +; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc +; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] +; SI: v_cmp_class_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |[[SA]]|, [[VB]] +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] +; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @test_class_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { + %a.fabs = call float @llvm.fabs.f32(float %a) #1 + %result = call i1 @llvm.AMDGPU.class.f32(float %a.fabs, i32 %b) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_class_fneg_f32: +; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc +; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] +; SI: v_cmp_class_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -[[SA]], [[VB]] +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] +; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @test_class_fneg_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { + %a.fneg = fsub float -0.0, %a + %result = call i1 @llvm.AMDGPU.class.f32(float %a.fneg, i32 %b) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_class_fneg_fabs_f32: +; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc +; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] +; SI: v_cmp_class_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|[[SA]]|, [[VB]] +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] +; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @test_class_fneg_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { + %a.fabs = call float @llvm.fabs.f32(float %a) #1 + %a.fneg.fabs = fsub float -0.0, %a.fabs + %result = call i1 @llvm.AMDGPU.class.f32(float %a.fneg.fabs, i32 %b) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_class_1_f32: +; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI: v_cmp_class_f32_e64 [[COND:s\[[0-9]+:[0-9]+\]]], [[SA]], 1{{$}} +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[COND]] +; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @test_class_1_f32(i32 addrspace(1)* %out, float %a) #0 { + %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_class_64_f32: +; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI: v_cmp_class_f32_e64 [[COND:s\[[0-9]+:[0-9]+\]]], [[SA]], 64{{$}} +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[COND]] +; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @test_class_64_f32(i32 addrspace(1)* %out, float %a) #0 { + %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 64) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; Set all 10 bits of mask +; SI-LABEL: {{^}}test_class_full_mask_f32: +; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x3ff{{$}} +; SI: v_cmp_class_f32_e32 vcc, [[SA]], [[MASK]] +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc +; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @test_class_full_mask_f32(i32 addrspace(1)* %out, float %a) #0 { + %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1023) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_class_9bit_mask_f32: +; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}} +; SI: v_cmp_class_f32_e32 vcc, [[SA]], [[MASK]] +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc +; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @test_class_9bit_mask_f32(i32 addrspace(1)* %out, float %a) #0 { + %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 511) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}v_test_class_full_mask_f32: +; SI-DAG: buffer_load_dword [[VA:v[0-9]+]] +; SI-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}} +; SI: v_cmp_class_f32_e32 vcc, [[VA]], [[MASK]] +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc +; SI: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @v_test_class_full_mask_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.in = getelementptr float addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid + %a = load float addrspace(1)* %gep.in + + %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 511) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %gep.out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_class_inline_imm_constant_dynamic_mask_f32: +; SI-DAG: buffer_load_dword [[VB:v[0-9]+]] +; SI: v_cmp_class_f32_e32 vcc, 1.0, [[VB]] +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc +; SI: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @test_class_inline_imm_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid + %b = load i32 addrspace(1)* %gep.in + + %result = call i1 @llvm.AMDGPU.class.f32(float 1.0, i32 %b) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %gep.out, align 4 + ret void +} + +; FIXME: Why isn't this using a literal constant operand? +; SI-LABEL: {{^}}test_class_lit_constant_dynamic_mask_f32: +; SI-DAG: buffer_load_dword [[VB:v[0-9]+]] +; SI-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000 +; SI: v_cmp_class_f32_e32 vcc, [[VK]], [[VB]] +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc +; SI: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @test_class_lit_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid + %b = load i32 addrspace(1)* %gep.in + + %result = call i1 @llvm.AMDGPU.class.f32(float 1024.0, i32 %b) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %gep.out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_class_f64: +; SI-DAG: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd +; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] +; SI: v_cmp_class_f64_e32 vcc, [[SA]], [[VB]] +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc +; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @test_class_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { + %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 %b) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_class_fabs_f64: +; SI-DAG: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd +; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] +; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |[[SA]]|, [[VB]] +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] +; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @test_class_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { + %a.fabs = call double @llvm.fabs.f64(double %a) #1 + %result = call i1 @llvm.AMDGPU.class.f64(double %a.fabs, i32 %b) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_class_fneg_f64: +; SI-DAG: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd +; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] +; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -[[SA]], [[VB]] +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] +; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @test_class_fneg_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { + %a.fneg = fsub double -0.0, %a + %result = call i1 @llvm.AMDGPU.class.f64(double %a.fneg, i32 %b) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_class_fneg_fabs_f64: +; SI-DAG: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd +; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] +; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|[[SA]]|, [[VB]] +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] +; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @test_class_fneg_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { + %a.fabs = call double @llvm.fabs.f64(double %a) #1 + %a.fneg.fabs = fsub double -0.0, %a.fabs + %result = call i1 @llvm.AMDGPU.class.f64(double %a.fneg.fabs, i32 %b) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_class_1_f64: +; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 1{{$}} +; SI: s_endpgm +define void @test_class_1_f64(i32 addrspace(1)* %out, double %a) #0 { + %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 1) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_class_64_f64: +; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 64{{$}} +; SI: s_endpgm +define void @test_class_64_f64(i32 addrspace(1)* %out, double %a) #0 { + %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 64) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; Set all 9 bits of mask +; SI-LABEL: {{^}}test_class_full_mask_f64: +; SI: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}} +; SI: v_cmp_class_f64_e32 vcc, [[SA]], [[MASK]] +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc +; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 { + %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 511) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}v_test_class_full_mask_f64: +; SI-DAG: buffer_load_dwordx2 [[VA:v\[[0-9]+:[0-9]+\]]] +; SI-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}} +; SI: v_cmp_class_f64_e32 vcc, [[VA]], [[MASK]] +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc +; SI: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.in = getelementptr double addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid + %a = load double addrspace(1)* %in + + %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 511) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %gep.out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_class_inline_imm_constant_dynamic_mask_f64: +; XSI: v_cmp_class_f64_e32 vcc, 1.0, +; SI: v_cmp_class_f64_e32 vcc, +; SI: s_endpgm +define void @test_class_inline_imm_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid + %b = load i32 addrspace(1)* %gep.in + + %result = call i1 @llvm.AMDGPU.class.f64(double 1.0, i32 %b) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %gep.out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_class_lit_constant_dynamic_mask_f64: +; SI: v_cmp_class_f64_e32 vcc, s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} +; SI: s_endpgm +define void @test_class_lit_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid + %b = load i32 addrspace(1)* %gep.in + + %result = call i1 @llvm.AMDGPU.class.f64(double 1024.0, i32 %b) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %gep.out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_fold_or_class_f32_0: +; SI-NOT: v_cmp_class +; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 3{{$}} +; SI-NOT: v_cmp_class +; SI: s_endpgm +define void @test_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.in = getelementptr float addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid + %a = load float addrspace(1)* %gep.in + + %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1 + %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 3) #1 + %or = or i1 %class0, %class1 + + %sext = sext i1 %or to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_fold_or3_class_f32_0: +; SI-NOT: v_cmp_class +; SI: v_cmp_class_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 7{{$}} +; SI-NOT: v_cmp_class +; SI: s_endpgm +define void @test_fold_or3_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.in = getelementptr float addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid + %a = load float addrspace(1)* %gep.in + + %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1 + %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1 + %class2 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1 + %or.0 = or i1 %class0, %class1 + %or.1 = or i1 %or.0, %class2 + + %sext = sext i1 %or.1 to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_fold_or_all_tests_class_f32_0: +; SI-NOT: v_cmp_class +; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x3ff{{$}} +; SI: v_cmp_class_f32_e32 vcc, v{{[0-9]+}}, [[MASK]]{{$}} +; SI-NOT: v_cmp_class +; SI: s_endpgm +define void @test_fold_or_all_tests_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.in = getelementptr float addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid + %a = load float addrspace(1)* %gep.in + + %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1 + %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1 + %class2 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1 + %class3 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 8) #1 + %class4 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 16) #1 + %class5 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 32) #1 + %class6 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 64) #1 + %class7 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 128) #1 + %class8 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 256) #1 + %class9 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 512) #1 + %or.0 = or i1 %class0, %class1 + %or.1 = or i1 %or.0, %class2 + %or.2 = or i1 %or.1, %class3 + %or.3 = or i1 %or.2, %class4 + %or.4 = or i1 %or.3, %class5 + %or.5 = or i1 %or.4, %class6 + %or.6 = or i1 %or.5, %class7 + %or.7 = or i1 %or.6, %class8 + %or.8 = or i1 %or.7, %class9 + %sext = sext i1 %or.8 to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_fold_or_class_f32_1: +; SI-NOT: v_cmp_class +; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 12{{$}} +; SI-NOT: v_cmp_class +; SI: s_endpgm +define void @test_fold_or_class_f32_1(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.in = getelementptr float addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid + %a = load float addrspace(1)* %gep.in + + %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1 + %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 8) #1 + %or = or i1 %class0, %class1 + + %sext = sext i1 %or to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_fold_or_class_f32_2: +; SI-NOT: v_cmp_class +; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 7{{$}} +; SI-NOT: v_cmp_class +; SI: s_endpgm +define void @test_fold_or_class_f32_2(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.in = getelementptr float addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid + %a = load float addrspace(1)* %gep.in + + %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1 + %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1 + %or = or i1 %class0, %class1 + + %sext = sext i1 %or to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_no_fold_or_class_f32_0: +; SI-DAG: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 4{{$}} +; SI-DAG: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 8{{$}} +; SI: s_or_b64 +; SI: s_endpgm +define void @test_no_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in, float %b) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.in = getelementptr float addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid + %a = load float addrspace(1)* %gep.in + + %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1 + %class1 = call i1 @llvm.AMDGPU.class.f32(float %b, i32 8) #1 + %or = or i1 %class0, %class1 + + %sext = sext i1 %or to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_class_0_f32: +; SI-NOT: v_cmp_class +; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} +; SI: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @test_class_0_f32(i32 addrspace(1)* %out, float %a) #0 { + %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 0) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_class_0_f64: +; SI-NOT: v_cmp_class +; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} +; SI: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @test_class_0_f64(i32 addrspace(1)* %out, double %a) #0 { + %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 0) #1 + %sext = sext i1 %result to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll b/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll index 7aacbb9..799817e 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI %s declare float @llvm.AMDGPU.cvt.f32.ubyte0(i32) nounwind readnone declare float @llvm.AMDGPU.cvt.f32.ubyte1(i32) nounwind readnone diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll index 009fd73..55ca9c7 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll @@ -1,25 +1,29 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone -; SI-LABEL: {{^}}test_div_fixup_f32: +; GCN-LABEL: {{^}}test_div_fixup_f32: ; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc -; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]] -; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] -; SI: v_div_fixup_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; VI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x34 +; VI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 +; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]] +; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] +; GCN: v_div_fixup_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind { %result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone store float %result, float addrspace(1)* %out, align 4 ret void } -; SI-LABEL: {{^}}test_div_fixup_f64: -; SI: v_div_fixup_f64 +; GCN-LABEL: {{^}}test_div_fixup_f64: +; GCN: v_div_fixup_f64 define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind { %result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone store double %result, double addrspace(1)* %out, align 8 diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll index dcca9e9..239fd53 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll @@ -1,27 +1,179 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; XUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; FIXME: Enable for VI. + +declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare void @llvm.AMDGPU.barrier.global() nounwind noduplicate declare float @llvm.AMDGPU.div.fmas.f32(float, float, float, i1) nounwind readnone declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind readnone -; SI-LABEL: {{^}}test_div_fmas_f32: +; GCN-LABEL: {{^}}test_div_fmas_f32: ; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc +; VI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x34 +; VI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 +; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]] +; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] +; GCN-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[SA]] +; GCN: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[VB]], [[VA]], [[VC]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm +define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind { + %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}test_div_fmas_f32_inline_imm_0: +; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd +; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc ; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]] ; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] -; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]] +; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], 1.0, [[VB]], [[VC]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm -define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind { - %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone +define void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind { + %result = call float @llvm.AMDGPU.div.fmas.f32(float 1.0, float %b, float %c, i1 %d) nounwind readnone + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}test_div_fmas_f32_inline_imm_1: +; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd +; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]] +; SI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[SA]] +; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], 1.0, [[VA]], [[VC]] +; SI: buffer_store_dword [[RESULT]], +; SI: s_endpgm +define void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind { + %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float 1.0, float %c, i1 %d) nounwind readnone store float %result, float addrspace(1)* %out, align 4 ret void } -; SI-LABEL: {{^}}test_div_fmas_f64: -; SI: v_div_fmas_f64 +; GCN-LABEL: {{^}}test_div_fmas_f32_inline_imm_2: +; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc +; SI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[SA]] +; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] +; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[VA]], [[VB]], 1.0 +; SI: buffer_store_dword [[RESULT]], +; SI: s_endpgm +define void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind { + %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float 1.0, i1 %d) nounwind readnone + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}test_div_fmas_f64: +; GCN: v_div_fmas_f64 define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) nounwind { %result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone store double %result, double addrspace(1)* %out, align 8 ret void } + +; GCN-LABEL: {{^}}test_div_fmas_f32_cond_to_vcc: +; SI: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0 +; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} +define void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c, i32 %i) nounwind { + %cmp = icmp eq i32 %i, 0 + %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %cmp) nounwind readnone + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}test_div_fmas_f32_imm_false_cond_to_vcc: +; SI: s_mov_b64 vcc, 0 +; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} +define void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c) nounwind { + %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 false) nounwind readnone + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}test_div_fmas_f32_imm_true_cond_to_vcc: +; SI: s_mov_b64 vcc, -1 +; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} +define void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c) nounwind { + %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 true) nounwind readnone + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}test_div_fmas_f32_logical_cond_to_vcc: +; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; SI-DAG: buffer_load_dword [[C:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} + +; SI-DAG: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0 +; SI-DAG: v_cmp_ne_i32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0 +; SI: s_and_b64 vcc, [[CMP0]], [[CMP1]] +; SI: v_div_fmas_f32 {{v[0-9]+}}, [[A]], [[B]], [[C]] +; SI: s_endpgm +define void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 %d) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.a = getelementptr float addrspace(1)* %in, i32 %tid + %gep.b = getelementptr float addrspace(1)* %gep.a, i32 1 + %gep.c = getelementptr float addrspace(1)* %gep.a, i32 2 + %gep.out = getelementptr float addrspace(1)* %out, i32 2 + + %a = load float addrspace(1)* %gep.a + %b = load float addrspace(1)* %gep.b + %c = load float addrspace(1)* %gep.c + + %cmp0 = icmp eq i32 %tid, 0 + %cmp1 = icmp ne i32 %d, 0 + %and = and i1 %cmp0, %cmp1 + + %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %and) nounwind readnone + store float %result, float addrspace(1)* %gep.out, align 4 + ret void +} + +; GCN-LABEL: {{^}}test_div_fmas_f32_i1_phi_vcc: +; SI: v_cmp_eq_i32_e64 [[CMPTID:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0 +; SI: s_and_saveexec_b64 [[CMPTID]], [[CMPTID]] +; SI: s_xor_b64 [[CMPTID]], exec, [[CMPTID]] + +; SI: buffer_load_dword [[LOAD:v[0-9]+]] +; SI: v_cmp_ne_i32_e64 [[CMPLOAD:s\[[0-9]+:[0-9]+\]]], [[LOAD]], 0 +; SI: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, [[CMPLOAD]] + + +; SI: BB9_2: +; SI: s_or_b64 exec, exec, [[CMPTID]] +; SI: v_cmp_ne_i32_e32 vcc, 0, v0 +; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} +; SI: buffer_store_dword +; SI: s_endpgm +define void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 addrspace(1)* %dummy) nounwind { +entry: + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.out = getelementptr float addrspace(1)* %out, i32 2 + %gep.a = getelementptr float addrspace(1)* %in, i32 %tid + %gep.b = getelementptr float addrspace(1)* %gep.a, i32 1 + %gep.c = getelementptr float addrspace(1)* %gep.a, i32 2 + + %a = load float addrspace(1)* %gep.a + %b = load float addrspace(1)* %gep.b + %c = load float addrspace(1)* %gep.c + + %cmp0 = icmp eq i32 %tid, 0 + br i1 %cmp0, label %bb, label %exit + +bb: + %val = load i32 addrspace(1)* %dummy + %cmp1 = icmp ne i32 %val, 0 + br label %exit + +exit: + %cond = phi i1 [false, %entry], [%cmp1, %bb] + %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %cond) nounwind readnone + store float %result, float addrspace(1)* %gep.out, align 4 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll index 641c8ca..5773da0 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll @@ -1,12 +1,13 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.r600.read.tidig.x() nounwind readnone declare { float, i1 } @llvm.AMDGPU.div.scale.f32(float, float, i1) nounwind readnone declare { double, i1 } @llvm.AMDGPU.div.scale.f64(double, double, i1) nounwind readnone +declare float @llvm.fabs.f32(float) nounwind readnone ; SI-LABEL @test_div_scale_f32_1: ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 -; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm @@ -26,7 +27,7 @@ define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* ; SI-LABEL @test_div_scale_f32_2: ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 -; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm @@ -46,7 +47,7 @@ define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* ; SI-LABEL @test_div_scale_f64_1: ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 -; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8 +; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] ; SI: buffer_store_dwordx2 [[RESULT0]] ; SI: s_endpgm @@ -66,7 +67,7 @@ define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1) ; SI-LABEL @test_div_scale_f64_1: ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 -; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8 +; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] ; SI: buffer_store_dwordx2 [[RESULT0]] ; SI: s_endpgm @@ -285,3 +286,79 @@ define void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, double % store double %result0, double addrspace(1)* %out, align 8 ret void } + +; SI-LABEL @test_div_scale_f32_inline_imm_num: +; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[A]], 1.0 +; SI: buffer_store_dword [[RESULT0]] +; SI: s_endpgm +define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %a = load float addrspace(1)* %gep.0, align 4 + + %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone + %result0 = extractvalue { float, i1 } %result, 0 + store float %result0, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL @test_div_scale_f32_inline_imm_den: +; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], 2.0, 2.0, [[A]] +; SI: buffer_store_dword [[RESULT0]] +; SI: s_endpgm +define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %a = load float addrspace(1)* %gep.0, align 4 + + %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone + %result0 = extractvalue { float, i1 } %result, 0 + store float %result0, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL @test_div_scale_f32_fabs_num: +; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 +; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], |[[A]]| +; SI: buffer_store_dword [[RESULT0]] +; SI: s_endpgm +define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + + %a = load float addrspace(1)* %gep.0, align 4 + %b = load float addrspace(1)* %gep.1, align 4 + + %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone + + %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a.fabs, float %b, i1 false) nounwind readnone + %result0 = extractvalue { float, i1 } %result, 0 + store float %result0, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL @test_div_scale_f32_fabs_den: +; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 +; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], |[[B]]|, |[[B]]|, [[A]] +; SI: buffer_store_dword [[RESULT0]] +; SI: s_endpgm +define void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + + %a = load float addrspace(1)* %gep.0, align 4 + %b = load float addrspace(1)* %gep.1, align 4 + + %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone + + %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b.fabs, i1 false) nounwind readnone + %result0 = extractvalue { float, i1 } %result, 0 + store float %result0, float addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.fract.ll b/test/CodeGen/R600/llvm.AMDGPU.fract.ll index 235068c..7d15300 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.fract.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.fract.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare float @llvm.AMDGPU.fract.f32(float) nounwind readnone diff --git a/test/CodeGen/R600/llvm.AMDGPU.imad24.ll b/test/CodeGen/R600/llvm.AMDGPU.imad24.ll index 8998840..42102e3 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.imad24.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.imad24.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s diff --git a/test/CodeGen/R600/llvm.AMDGPU.imax.ll b/test/CodeGen/R600/llvm.AMDGPU.imax.ll index dac21a4..ce7fca0 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.imax.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.imax.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s +; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}vector_imax: ; SI: v_max_i32_e32 @@ -29,4 +30,4 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float attributes #0 = { nounwind } attributes #1 = { nounwind readnone } -!0 = metadata !{metadata !"const", null, i32 1} +!0 = !{!"const", null, i32 1} diff --git a/test/CodeGen/R600/llvm.AMDGPU.imin.ll b/test/CodeGen/R600/llvm.AMDGPU.imin.ll index 462c497..15cd38b 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.imin.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.imin.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s +; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}vector_imin: ; SI: v_min_i32_e32 @@ -29,4 +30,4 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float attributes #0 = { nounwind } attributes #1 = { nounwind readnone } -!0 = metadata !{metadata !"const", null, i32 1} +!0 = !{!"const", null, i32 1} diff --git a/test/CodeGen/R600/llvm.AMDGPU.imul24.ll b/test/CodeGen/R600/llvm.AMDGPU.imul24.ll index db563dd..fdc1172 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.imul24.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.imul24.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s diff --git a/test/CodeGen/R600/llvm.AMDGPU.kill.ll b/test/CodeGen/R600/llvm.AMDGPU.kill.ll index 988b43c..d1ff3b1 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.kill.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.kill.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}kill_gs_const: ; SI-NOT: v_cmpx_le_f32 @@ -19,4 +20,4 @@ declare void @llvm.AMDGPU.kill(float) attributes #0 = { "ShaderType"="2" } -!0 = metadata !{metadata !"const", null, i32 1} +!0 = !{!"const", null, i32 1} diff --git a/test/CodeGen/R600/llvm.AMDGPU.ldexp.ll b/test/CodeGen/R600/llvm.AMDGPU.ldexp.ll index 72719fe..a59c0ce 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.ldexp.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.ldexp.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare float @llvm.AMDGPU.ldexp.f32(float, i32) nounwind readnone declare double @llvm.AMDGPU.ldexp.f64(double, i32) nounwind readnone diff --git a/test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll b/test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll index 6e3fa25..4cafd56 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare float @llvm.AMDGPU.legacy.rsq(float) nounwind readnone diff --git a/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll b/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll index c4b04c5..d2a655b 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone declare double @llvm.sqrt.f64(double) nounwind readnone @@ -22,6 +23,8 @@ define void @rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind { ; FUNC-LABEL: {{^}}rsq_rcp_pat_f64: ; SI-UNSAFE: v_rsq_f64_e32 ; SI-SAFE-NOT: v_rsq_f64_e32 +; SI-SAFE: v_sqrt_f64 +; SI-SAFE: v_rcp_f64 define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind { %sqrt = call double @llvm.sqrt.f64(double %src) nounwind readnone %rcp = call double @llvm.AMDGPU.rcp.f64(double %sqrt) nounwind readnone diff --git a/test/CodeGen/R600/llvm.AMDGPU.rcp.ll b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll index 3ee3e6b..edd6e9a 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.rcp.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll @@ -1,6 +1,9 @@ -; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s -; XUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; XUN: llc -march=amdgcn -mcpu=SI -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; XUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG-SAFE -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll index 18854be..67f1d22 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll @@ -1,9 +1,21 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone ; FUNC-LABEL: {{^}}rsq_clamped_f64: ; SI: v_rsq_clamp_f64_e32 + +; VI: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[2:3] +; TODO: this constant should be folded: +; VI: s_mov_b32 s[[ALLBITS:[0-9+]]], -1 +; VI: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff +; VI: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]] +; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]] +; VI: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff +; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]] +; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]] + define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind { %rsq_clamped = call double @llvm.AMDGPU.rsq.clamped.f64(double %src) nounwind readnone store double %rsq_clamped, double addrspace(1)* %out, align 8 diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll index 6bf9f0c..eeff253 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s @@ -6,7 +7,15 @@ declare float @llvm.AMDGPU.rsq.clamped.f32(float) nounwind readnone ; FUNC-LABEL: {{^}}rsq_clamped_f32: ; SI: v_rsq_clamp_f32_e32 + +; VI: v_rsq_f32_e32 [[RSQ:v[0-9]+]], {{s[0-9]+}} +; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0x7f7fffff, [[RSQ]] +; TODO: this constant should be folded: +; VI: v_mov_b32_e32 [[MINFLT:v[0-9]+]], 0xff7fffff +; VI: v_max_f32_e32 {{v[0-9]+}}, [[MIN]], [[MINFLT]] + ; EG: RECIPSQRT_CLAMPED + define void @rsq_clamped_f32(float addrspace(1)* %out, float %src) nounwind { %rsq_clamped = call float @llvm.AMDGPU.rsq.clamped.f32(float %src) nounwind readnone store float %rsq_clamped, float addrspace(1)* %out, align 4 diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.ll b/test/CodeGen/R600/llvm.AMDGPU.rsq.ll index d6299b8..36b72f1 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.rsq.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.rsq.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare float @llvm.AMDGPU.rsq.f32(float) nounwind readnone diff --git a/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll index 2e6bd5c..5829f73 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare double @llvm.AMDGPU.trig.preop.f64(double, i32) nounwind readnone diff --git a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll index fdd531d..74792e5 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll @@ -1,10 +1,11 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s -; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 %s +; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s -; R600-CHECK: {{^}}amdgpu_trunc: -; R600-CHECK: TRUNC T{{[0-9]+\.[XYZW]}}, KC0[2].Z -; SI-CHECK: {{^}}amdgpu_trunc: -; SI-CHECK: v_trunc_f32 +; R600: {{^}}amdgpu_trunc: +; R600: TRUNC T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; SI: {{^}}amdgpu_trunc: +; SI: v_trunc_f32 define void @amdgpu_trunc(float addrspace(1)* %out, float %x) { entry: diff --git a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll index 59d6248..88613db 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s @@ -20,7 +20,7 @@ define void @test_umad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2 ; FUNC-LABEL: {{^}}commute_umad24: ; SI-DAG: buffer_load_dword [[SRC0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dword [[SRC2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; SI-DAG: buffer_load_dword [[SRC2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_mad_u32_u24 [[RESULT:v[0-9]+]], 4, [[SRC0]], [[SRC2]] ; SI: buffer_store_dword [[RESULT]] define void @commute_umad24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { diff --git a/test/CodeGen/R600/llvm.AMDGPU.umax.ll b/test/CodeGen/R600/llvm.AMDGPU.umax.ll index ee854ec..4320dfe 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.umax.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.umax.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s +; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}vector_umax: ; SI: v_max_u32_e32 @@ -44,4 +45,4 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float attributes #0 = { nounwind } attributes #1 = { nounwind readnone } -!0 = metadata !{metadata !"const", null, i32 1} +!0 = !{!"const", null, i32 1} diff --git a/test/CodeGen/R600/llvm.AMDGPU.umin.ll b/test/CodeGen/R600/llvm.AMDGPU.umin.ll index 2eaa372..e4cac33 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.umin.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.umin.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s +; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}vector_umin: ; SI: v_min_u32_e32 @@ -44,4 +45,4 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float attributes #0 = { nounwind } attributes #1 = { nounwind readnone } -!0 = metadata !{metadata !"const", null, i32 1} +!0 = !{!"const", null, i32 1} diff --git a/test/CodeGen/R600/llvm.AMDGPU.umul24.ll b/test/CodeGen/R600/llvm.AMDGPU.umul24.ll index 567ac31..76624a0 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.umul24.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.umul24.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll deleted file mode 100644 index d26bc32..0000000 --- a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll +++ /dev/null @@ -1,21 +0,0 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s - -;CHECK: s_mov_b32 -;CHECK-NEXT: v_interp_mov_f32 - -define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" { -main_body: - %4 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3) - %5 = call i32 @llvm.SI.packf16(float %4, float %4) - %6 = bitcast i32 %5 to float - call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %6, float %6, float %6, float %6) - ret void -} - -declare void @llvm.AMDGPU.shader.type(i32) - -declare float @llvm.SI.fs.constant(i32, i32, i32) readnone - -declare i32 @llvm.SI.packf16(float, float) readnone - -declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.ll b/test/CodeGen/R600/llvm.SI.fs.interp.ll new file mode 100644 index 0000000..6b36140 --- /dev/null +++ b/test/CodeGen/R600/llvm.SI.fs.interp.ll @@ -0,0 +1,30 @@ +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s + +;CHECK-NOT: s_wqm +;CHECK: s_mov_b32 +;CHECK-NEXT: v_interp_mov_f32 +;CHECK: v_interp_p1_f32 +;CHECK: v_interp_p2_f32 + +define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) #0 { +main_body: + %5 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3) + %6 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %4) + %7 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %4) + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %5, float %6, float %7, float %7) + ret void +} + +declare void @llvm.AMDGPU.shader.type(i32) + +; Function Attrs: nounwind readnone +declare float @llvm.SI.fs.constant(i32, i32, i32) #1 + +; Function Attrs: nounwind readnone +declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { "ShaderType"="0" } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/R600/llvm.SI.gather4.ll b/test/CodeGen/R600/llvm.SI.gather4.ll index 91a2012..275cb58 100644 --- a/test/CodeGen/R600/llvm.SI.gather4.ll +++ b/test/CodeGen/R600/llvm.SI.gather4.ll @@ -1,4 +1,5 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK-LABEL: {{^}}gather4_v2: ;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} diff --git a/test/CodeGen/R600/llvm.SI.getlod.ll b/test/CodeGen/R600/llvm.SI.getlod.ll index ec26fe5..06ee98e 100644 --- a/test/CodeGen/R600/llvm.SI.getlod.ll +++ b/test/CodeGen/R600/llvm.SI.getlod.ll @@ -1,4 +1,5 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK-LABEL: {{^}}getlod: ;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} diff --git a/test/CodeGen/R600/llvm.SI.image.ll b/test/CodeGen/R600/llvm.SI.image.ll index 4eec543..0fac8d7 100644 --- a/test/CodeGen/R600/llvm.SI.image.ll +++ b/test/CodeGen/R600/llvm.SI.image.ll @@ -1,4 +1,5 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK-LABEL: {{^}}image_load: ;CHECK: image_load {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} diff --git a/test/CodeGen/R600/llvm.SI.image.sample.ll b/test/CodeGen/R600/llvm.SI.image.sample.ll index ebff391..4bc638a 100644 --- a/test/CodeGen/R600/llvm.SI.image.sample.ll +++ b/test/CodeGen/R600/llvm.SI.image.sample.ll @@ -1,6 +1,8 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK-LABEL: {{^}}sample: +;CHECK: s_wqm ;CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample() #0 { main_body: @@ -14,6 +16,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_cl: +;CHECK: s_wqm ;CHECK: image_sample_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_cl() #0 { main_body: @@ -27,6 +30,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_d: +;CHECK-NOT: s_wqm ;CHECK: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_d() #0 { main_body: @@ -40,6 +44,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_d_cl: +;CHECK-NOT: s_wqm ;CHECK: image_sample_d_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_d_cl() #0 { main_body: @@ -53,6 +58,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_l: +;CHECK-NOT: s_wqm ;CHECK: image_sample_l {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_l() #0 { main_body: @@ -66,6 +72,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_b: +;CHECK: s_wqm ;CHECK: image_sample_b {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_b() #0 { main_body: @@ -79,6 +86,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_b_cl: +;CHECK: s_wqm ;CHECK: image_sample_b_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_b_cl() #0 { main_body: @@ -92,6 +100,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_lz: +;CHECK-NOT: s_wqm ;CHECK: image_sample_lz {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_lz() #0 { main_body: @@ -105,6 +114,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_cd: +;CHECK-NOT: s_wqm ;CHECK: image_sample_cd {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_cd() #0 { main_body: @@ -118,6 +128,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_cd_cl: +;CHECK-NOT: s_wqm ;CHECK: image_sample_cd_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_cd_cl() #0 { main_body: @@ -131,6 +142,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_c: +;CHECK: s_wqm ;CHECK: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_c() #0 { main_body: @@ -144,6 +156,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_c_cl: +;CHECK: s_wqm ;CHECK: image_sample_c_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_c_cl() #0 { main_body: @@ -157,6 +170,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_c_d: +;CHECK-NOT: s_wqm ;CHECK: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_c_d() #0 { main_body: @@ -170,6 +184,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_c_d_cl: +;CHECK-NOT: s_wqm ;CHECK: image_sample_c_d_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_c_d_cl() #0 { main_body: @@ -183,6 +198,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_c_l: +;CHECK-NOT: s_wqm ;CHECK: image_sample_c_l {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_c_l() #0 { main_body: @@ -196,6 +212,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_c_b: +;CHECK: s_wqm ;CHECK: image_sample_c_b {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_c_b() #0 { main_body: @@ -209,6 +226,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_c_b_cl: +;CHECK: s_wqm ;CHECK: image_sample_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_c_b_cl() #0 { main_body: @@ -222,6 +240,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_c_lz: +;CHECK-NOT: s_wqm ;CHECK: image_sample_c_lz {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_c_lz() #0 { main_body: @@ -235,6 +254,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_c_cd: +;CHECK-NOT: s_wqm ;CHECK: image_sample_c_cd {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_c_cd() #0 { main_body: @@ -248,6 +268,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_c_cd_cl: +;CHECK-NOT: s_wqm ;CHECK: image_sample_c_cd_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_c_cd_cl() #0 { main_body: diff --git a/test/CodeGen/R600/llvm.SI.image.sample.o.ll b/test/CodeGen/R600/llvm.SI.image.sample.o.ll index dbc1b2b..9d89354 100644 --- a/test/CodeGen/R600/llvm.SI.image.sample.o.ll +++ b/test/CodeGen/R600/llvm.SI.image.sample.o.ll @@ -1,6 +1,8 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK-LABEL: {{^}}sample: +;CHECK: s_wqm ;CHECK: image_sample_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample() #0 { main_body: @@ -14,6 +16,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_cl: +;CHECK: s_wqm ;CHECK: image_sample_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_cl() #0 { main_body: @@ -27,6 +30,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_d: +;CHECK-NOT: s_wqm ;CHECK: image_sample_d_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_d() #0 { main_body: @@ -40,6 +44,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_d_cl: +;CHECK-NOT: s_wqm ;CHECK: image_sample_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_d_cl() #0 { main_body: @@ -53,6 +58,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_l: +;CHECK-NOT: s_wqm ;CHECK: image_sample_l_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_l() #0 { main_body: @@ -66,6 +72,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_b: +;CHECK: s_wqm ;CHECK: image_sample_b_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_b() #0 { main_body: @@ -79,6 +86,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_b_cl: +;CHECK: s_wqm ;CHECK: image_sample_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_b_cl() #0 { main_body: @@ -92,6 +100,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_lz: +;CHECK-NOT: s_wqm ;CHECK: image_sample_lz_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_lz() #0 { main_body: @@ -105,6 +114,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_cd: +;CHECK-NOT: s_wqm ;CHECK: image_sample_cd_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_cd() #0 { main_body: @@ -118,6 +128,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_cd_cl: +;CHECK-NOT: s_wqm ;CHECK: image_sample_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_cd_cl() #0 { main_body: @@ -131,6 +142,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_c: +;CHECK: s_wqm ;CHECK: image_sample_c_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_c() #0 { main_body: @@ -144,6 +156,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_c_cl: +;CHECK: s_wqm ;CHECK: image_sample_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_c_cl() #0 { main_body: @@ -157,6 +170,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_c_d: +;CHECK-NOT: s_wqm ;CHECK: image_sample_c_d_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_c_d() #0 { main_body: @@ -170,6 +184,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_c_d_cl: +;CHECK-NOT: s_wqm ;CHECK: image_sample_c_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_c_d_cl() #0 { main_body: @@ -183,6 +198,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_c_l: +;CHECK-NOT: s_wqm ;CHECK: image_sample_c_l_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_c_l() #0 { main_body: @@ -196,6 +212,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_c_b: +;CHECK: s_wqm ;CHECK: image_sample_c_b_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_c_b() #0 { main_body: @@ -209,6 +226,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_c_b_cl: +;CHECK: s_wqm ;CHECK: image_sample_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_c_b_cl() #0 { main_body: @@ -222,6 +240,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_c_lz: +;CHECK-NOT: s_wqm ;CHECK: image_sample_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_c_lz() #0 { main_body: @@ -235,6 +254,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_c_cd: +;CHECK-NOT: s_wqm ;CHECK: image_sample_c_cd_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_c_cd() #0 { main_body: @@ -248,6 +268,7 @@ main_body: } ;CHECK-LABEL: {{^}}sample_c_cd_cl: +;CHECK-NOT: s_wqm ;CHECK: image_sample_c_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} define void @sample_c_cd_cl() #0 { main_body: diff --git a/test/CodeGen/R600/llvm.SI.imageload.ll b/test/CodeGen/R600/llvm.SI.imageload.ll index 673d92d..35e4591 100644 --- a/test/CodeGen/R600/llvm.SI.imageload.ll +++ b/test/CodeGen/R600/llvm.SI.imageload.ll @@ -1,4 +1,5 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK-DAG: image_load {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1 ;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0 @@ -126,6 +127,6 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } -!0 = metadata !{metadata !"const", null} -!1 = metadata !{} -!2 = metadata !{metadata !0, metadata !0, i64 0, i32 1} +!0 = !{!"const", null} +!1 = !{} +!2 = !{!0, !0, i64 0, i32 1} diff --git a/test/CodeGen/R600/llvm.SI.load.dword.ll b/test/CodeGen/R600/llvm.SI.load.dword.ll index e5c6201..d2e6a8e 100644 --- a/test/CodeGen/R600/llvm.SI.load.dword.ll +++ b/test/CodeGen/R600/llvm.SI.load.dword.ll @@ -1,28 +1,41 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=verde -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s ; Example of a simple geometry shader loading vertex attributes from the ; ESGS ring buffer -; CHECK-LABEL: {{^}}main: -; CHECK: buffer_load_dword -; CHECK: buffer_load_dword -; CHECK: buffer_load_dword -; CHECK: buffer_load_dword +; FIXME: Out of bounds immediate offset crashes -define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [2 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32, i32, i32, i32) #0 { +; CHECK-LABEL: {{^}}main: +; CHECK: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 glc slc +; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen glc slc +; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen glc slc +; CHECK: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen offen glc slc +; CHECK: s_movk_i32 [[K:s[0-9]+]], 0x4d2 ; encoding +; CHECK: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, [[K]] idxen offen offset:65535 glc slc + +define void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, [2 x <16 x i8>] addrspace(2)* byval %arg3, [17 x <16 x i8>] addrspace(2)* inreg %arg4, [17 x <16 x i8>] addrspace(2)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) #0 { main_body: - %10 = getelementptr [2 x <16 x i8>] addrspace(2)* %3, i64 0, i32 1 - %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0 - %12 = shl i32 %6, 2 - %13 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0) - %14 = bitcast i32 %13 to float - %15 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0) - %16 = bitcast i32 %15 to float - %17 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0) - %18 = bitcast i32 %17 to float - %19 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %11, <2 x i32> <i32 0, i32 0>, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 0) - %20 = bitcast i32 %19 to float - call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %14, float %16, float %18, float %20) + %tmp = getelementptr [2 x <16 x i8>] addrspace(2)* %arg3, i64 0, i32 1 + %tmp10 = load <16 x i8> addrspace(2)* %tmp, !tbaa !0 + %tmp11 = shl i32 %arg6, 2 + %tmp12 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0) + %tmp13 = bitcast i32 %tmp12 to float + %tmp14 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp10, i32 %tmp11, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0) + %tmp15 = bitcast i32 %tmp14 to float + %tmp16 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp10, i32 %tmp11, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0) + %tmp17 = bitcast i32 %tmp16 to float + %tmp18 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %tmp10, <2 x i32> zeroinitializer, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 0) + %tmp19 = bitcast i32 %tmp18 to float + + %tmp20 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %tmp10, <2 x i32> zeroinitializer, i32 0, i32 123, i32 1, i32 1, i32 1, i32 1, i32 0) + %tmp21 = bitcast i32 %tmp20 to float + + %tmp22 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %tmp10, <2 x i32> zeroinitializer, i32 1234, i32 65535, i32 1, i32 1, i32 1, i32 1, i32 0) + %tmp23 = bitcast i32 %tmp22 to float + + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %tmp13, float %tmp15, float %tmp17, float %tmp19) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %tmp21, float %tmp23, float %tmp23, float %tmp23) ret void } @@ -37,4 +50,4 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float attributes #0 = { "ShaderType"="1" } attributes #1 = { nounwind readonly } -!0 = metadata !{metadata !"const", null, i32 1} +!0 = !{!"const", null, i32 1} diff --git a/test/CodeGen/R600/llvm.SI.resinfo.ll b/test/CodeGen/R600/llvm.SI.resinfo.ll index d8f3722..ac95fd0 100644 --- a/test/CodeGen/R600/llvm.SI.resinfo.ll +++ b/test/CodeGen/R600/llvm.SI.resinfo.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1 ; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0 diff --git a/test/CodeGen/R600/llvm.SI.sample-masked.ll b/test/CodeGen/R600/llvm.SI.sample-masked.ll index 9e86bec..ce9558c 100644 --- a/test/CodeGen/R600/llvm.SI.sample-masked.ll +++ b/test/CodeGen/R600/llvm.SI.sample-masked.ll @@ -1,4 +1,5 @@ -;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=verde | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s ; CHECK-LABEL: {{^}}v1: ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 13 diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll index a1d2c02..509c45f 100644 --- a/test/CodeGen/R600/llvm.SI.sample.ll +++ b/test/CodeGen/R600/llvm.SI.sample.ll @@ -1,4 +1,5 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15 ;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 3 diff --git a/test/CodeGen/R600/llvm.SI.sampled.ll b/test/CodeGen/R600/llvm.SI.sampled.ll index 91b71f3..f2badff 100644 --- a/test/CodeGen/R600/llvm.SI.sampled.ll +++ b/test/CodeGen/R600/llvm.SI.sampled.ll @@ -1,4 +1,5 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 15 ;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 3 diff --git a/test/CodeGen/R600/llvm.SI.sendmsg-m0.ll b/test/CodeGen/R600/llvm.SI.sendmsg-m0.ll new file mode 100644 index 0000000..2198590 --- /dev/null +++ b/test/CodeGen/R600/llvm.SI.sendmsg-m0.ll @@ -0,0 +1,20 @@ +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=BOTH %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=BOTH %s + +; BOTH-LABEL: {{^}}main: +; BOTH: s_mov_b32 m0, s0 +; VI-NEXT: s_nop 0 +; BOTH-NEXT: s_sendmsg Gs_done(nop) +; BOTH-NEXT: s_endpgm + +define void @main(i32 inreg %a) #0 { +main_body: + call void @llvm.SI.sendmsg(i32 3, i32 %a) + ret void +} + +; Function Attrs: nounwind +declare void @llvm.SI.sendmsg(i32, i32) #1 + +attributes #0 = { "ShaderType"="2" "unsafe-fp-math"="true" } +attributes #1 = { nounwind } diff --git a/test/CodeGen/R600/llvm.SI.sendmsg.ll b/test/CodeGen/R600/llvm.SI.sendmsg.ll index 042fc5b..ce38002 100644 --- a/test/CodeGen/R600/llvm.SI.sendmsg.ll +++ b/test/CodeGen/R600/llvm.SI.sendmsg.ll @@ -1,4 +1,5 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ; CHECK-LABEL: {{^}}main: ; CHECK: s_sendmsg Gs(emit stream 0) diff --git a/test/CodeGen/R600/llvm.SI.tbuffer.store.ll b/test/CodeGen/R600/llvm.SI.tbuffer.store.ll index 702daea..71f5154 100644 --- a/test/CodeGen/R600/llvm.SI.tbuffer.store.ll +++ b/test/CodeGen/R600/llvm.SI.tbuffer.store.ll @@ -1,4 +1,5 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK-LABEL: {{^}}test1: ;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, 0x20, -1, 0, -1, 0, 14, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0 diff --git a/test/CodeGen/R600/llvm.SI.tid.ll b/test/CodeGen/R600/llvm.SI.tid.ll index ee96124..f6e6d70 100644 --- a/test/CodeGen/R600/llvm.SI.tid.ll +++ b/test/CodeGen/R600/llvm.SI.tid.ll @@ -1,7 +1,9 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s -;CHECK: v_mbcnt_lo_u32_b32_e64 -;CHECK: v_mbcnt_hi_u32_b32_e32 +;GCN: v_mbcnt_lo_u32_b32_e64 +;SI: v_mbcnt_hi_u32_b32_e32 +;VI: v_mbcnt_hi_u32_b32_e64 define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" { main_body: diff --git a/test/CodeGen/R600/llvm.amdgpu.kilp.ll b/test/CodeGen/R600/llvm.amdgpu.kilp.ll index 08bee38..42df6db 100644 --- a/test/CodeGen/R600/llvm.amdgpu.kilp.ll +++ b/test/CodeGen/R600/llvm.amdgpu.kilp.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}kilp_gs_const: ; SI: s_mov_b64 exec, 0 @@ -17,4 +18,4 @@ declare void @llvm.AMDGPU.kilp(float) attributes #0 = { "ShaderType"="2" } -!0 = metadata !{metadata !"const", null, i32 1} +!0 = !{!"const", null, i32 1} diff --git a/test/CodeGen/R600/llvm.amdgpu.lrp.ll b/test/CodeGen/R600/llvm.amdgpu.lrp.ll index ee922fe..4e4c2ec 100644 --- a/test/CodeGen/R600/llvm.amdgpu.lrp.ll +++ b/test/CodeGen/R600/llvm.amdgpu.lrp.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare float @llvm.AMDGPU.lrp(float, float, float) nounwind readnone diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll index 837340f..c65df8b 100644 --- a/test/CodeGen/R600/llvm.cos.ll +++ b/test/CodeGen/R600/llvm.cos.ll @@ -1,5 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -check-prefix=EG -check-prefix=FUNC -;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s -check-prefix=SI -check-prefix=FUNC +;RUN: llc < %s -march=amdgcn -mcpu=SI | FileCheck %s -check-prefix=SI -check-prefix=FUNC +;RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s -check-prefix=SI -check-prefix=FUNC ;FUNC-LABEL: test ;EG: MULADD_IEEE * diff --git a/test/CodeGen/R600/llvm.exp2.ll b/test/CodeGen/R600/llvm.exp2.ll index 52dc67d..4269892 100644 --- a/test/CodeGen/R600/llvm.exp2.ll +++ b/test/CodeGen/R600/llvm.exp2.ll @@ -1,14 +1,15 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK --check-prefix=FUNC -;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK --check-prefix=FUNC -;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC +;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC +;RUN: llc < %s -march=amdgcn -mcpu=SI | FileCheck %s --check-prefix=SI --check-prefix=FUNC +;RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s --check-prefix=SI --check-prefix=FUNC ;FUNC-LABEL: {{^}}test: -;EG-CHECK: EXP_IEEE -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} -;SI-CHECK: v_exp_f32 +;EG: EXP_IEEE +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;SI: v_exp_f32 define void @test(float addrspace(1)* %out, float %in) { entry: @@ -18,20 +19,20 @@ entry: } ;FUNC-LABEL: {{^}}testv2: -;EG-CHECK: EXP_IEEE -;EG-CHECK: EXP_IEEE +;EG: EXP_IEEE +;EG: EXP_IEEE ; FIXME: We should be able to merge these packets together on Cayman so we ; have a maximum of 4 instructions. -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} -;SI-CHECK: v_exp_f32 -;SI-CHECK: v_exp_f32 +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;SI: v_exp_f32 +;SI: v_exp_f32 define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) { entry: @@ -41,32 +42,32 @@ entry: } ;FUNC-LABEL: {{^}}testv4: -;EG-CHECK: EXP_IEEE -;EG-CHECK: EXP_IEEE -;EG-CHECK: EXP_IEEE -;EG-CHECK: EXP_IEEE +;EG: EXP_IEEE +;EG: EXP_IEEE +;EG: EXP_IEEE +;EG: EXP_IEEE ; FIXME: We should be able to merge these packets together on Cayman so we ; have a maximum of 4 instructions. -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} -;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} -;SI-CHECK: v_exp_f32 -;SI-CHECK: v_exp_f32 -;SI-CHECK: v_exp_f32 -;SI-CHECK: v_exp_f32 +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;CM-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;SI: v_exp_f32 +;SI: v_exp_f32 +;SI: v_exp_f32 +;SI: v_exp_f32 define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) { entry: %0 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %in) diff --git a/test/CodeGen/R600/llvm.floor.ll b/test/CodeGen/R600/llvm.floor.ll deleted file mode 100644 index 0c7a15b..0000000 --- a/test/CodeGen/R600/llvm.floor.ll +++ /dev/null @@ -1,54 +0,0 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK - -; R600-CHECK: {{^}}f32: -; R600-CHECK: FLOOR -; SI-CHECK: {{^}}f32: -; SI-CHECK: v_floor_f32_e32 -define void @f32(float addrspace(1)* %out, float %in) { -entry: - %0 = call float @llvm.floor.f32(float %in) - store float %0, float addrspace(1)* %out - ret void -} - -; R600-CHECK: {{^}}v2f32: -; R600-CHECK: FLOOR -; R600-CHECK: FLOOR -; SI-CHECK: {{^}}v2f32: -; SI-CHECK: v_floor_f32_e32 -; SI-CHECK: v_floor_f32_e32 -define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { -entry: - %0 = call <2 x float> @llvm.floor.v2f32(<2 x float> %in) - store <2 x float> %0, <2 x float> addrspace(1)* %out - ret void -} - -; R600-CHECK: {{^}}v4f32: -; R600-CHECK: FLOOR -; R600-CHECK: FLOOR -; R600-CHECK: FLOOR -; R600-CHECK: FLOOR -; SI-CHECK: {{^}}v4f32: -; SI-CHECK: v_floor_f32_e32 -; SI-CHECK: v_floor_f32_e32 -; SI-CHECK: v_floor_f32_e32 -; SI-CHECK: v_floor_f32_e32 -define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) { -entry: - %0 = call <4 x float> @llvm.floor.v4f32(<4 x float> %in) - store <4 x float> %0, <4 x float> addrspace(1)* %out - ret void -} - -; Function Attrs: nounwind readonly -declare float @llvm.floor.f32(float) #0 - -; Function Attrs: nounwind readonly -declare <2 x float> @llvm.floor.v2f32(<2 x float>) #0 - -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.floor.v4f32(<4 x float>) #0 - -attributes #0 = { nounwind readonly } diff --git a/test/CodeGen/R600/llvm.log2.ll b/test/CodeGen/R600/llvm.log2.ll index 0b54a46..c75e785 100644 --- a/test/CodeGen/R600/llvm.log2.ll +++ b/test/CodeGen/R600/llvm.log2.ll @@ -1,14 +1,15 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK --check-prefix=FUNC -;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK --check-prefix=FUNC -;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC +;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC +;RUN: llc < %s -march=amdgcn -mcpu=SI | FileCheck %s --check-prefix=SI --check-prefix=FUNC +;RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s --check-prefix=SI --check-prefix=FUNC ;FUNC-LABEL: {{^}}test: -;EG-CHECK: LOG_IEEE -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} -;SI-CHECK: v_log_f32 +;EG: LOG_IEEE +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;SI: v_log_f32 define void @test(float addrspace(1)* %out, float %in) { entry: @@ -18,20 +19,20 @@ entry: } ;FUNC-LABEL: {{^}}testv2: -;EG-CHECK: LOG_IEEE -;EG-CHECK: LOG_IEEE +;EG: LOG_IEEE +;EG: LOG_IEEE ; FIXME: We should be able to merge these packets together on Cayman so we ; have a maximum of 4 instructions. -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} -;SI-CHECK: v_log_f32 -;SI-CHECK: v_log_f32 +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;SI: v_log_f32 +;SI: v_log_f32 define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) { entry: @@ -41,32 +42,32 @@ entry: } ;FUNC-LABEL: {{^}}testv4: -;EG-CHECK: LOG_IEEE -;EG-CHECK: LOG_IEEE -;EG-CHECK: LOG_IEEE -;EG-CHECK: LOG_IEEE +;EG: LOG_IEEE +;EG: LOG_IEEE +;EG: LOG_IEEE +;EG: LOG_IEEE ; FIXME: We should be able to merge these packets together on Cayman so we ; have a maximum of 4 instructions. -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} -;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} -;SI-CHECK: v_log_f32 -;SI-CHECK: v_log_f32 -;SI-CHECK: v_log_f32 -;SI-CHECK: v_log_f32 +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;CM-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;SI: v_log_f32 +;SI: v_log_f32 +;SI: v_log_f32 +;SI: v_log_f32 define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) { entry: %0 = call <4 x float> @llvm.log2.v4f32(<4 x float> %in) diff --git a/test/CodeGen/R600/llvm.memcpy.ll b/test/CodeGen/R600/llvm.memcpy.ll index 5f2710a..e491732 100644 --- a/test/CodeGen/R600/llvm.memcpy.ll +++ b/test/CodeGen/R600/llvm.memcpy.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind @@ -6,39 +7,23 @@ declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1: ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 - ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 + ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 - ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 ; SI: ds_read_u8 - ; SI: ds_read_u8 ; SI: ds_read_u8 ; SI: ds_read_u8 @@ -65,6 +50,14 @@ declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace ; SI: ds_write_b8 ; SI: ds_write_b8 ; SI: ds_write_b8 + +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 ; SI: ds_write_b8 ; SI: ds_write_b8 @@ -75,6 +68,14 @@ declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace ; SI: ds_write_b8 ; SI: ds_write_b8 ; SI: ds_write_b8 + +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 ; SI: ds_write_b8 ; SI: s_endpgm diff --git a/test/CodeGen/R600/llvm.rint.f64.ll b/test/CodeGen/R600/llvm.rint.f64.ll index 72b546e..c63fb17 100644 --- a/test/CodeGen/R600/llvm.rint.f64.ll +++ b/test/CodeGen/R600/llvm.rint.f64.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}rint_f64: ; CI: v_rndne_f64_e32 diff --git a/test/CodeGen/R600/llvm.rint.ll b/test/CodeGen/R600/llvm.rint.ll index 2e05964..661db51 100644 --- a/test/CodeGen/R600/llvm.rint.ll +++ b/test/CodeGen/R600/llvm.rint.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}rint_f32: ; R600: RNDNE diff --git a/test/CodeGen/R600/llvm.round.f64.ll b/test/CodeGen/R600/llvm.round.f64.ll new file mode 100644 index 0000000..920dbb3 --- /dev/null +++ b/test/CodeGen/R600/llvm.round.f64.ll @@ -0,0 +1,74 @@ +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}round_f64: +; SI: s_endpgm +define void @round_f64(double addrspace(1)* %out, double %x) #0 { + %result = call double @llvm.round.f64(double %x) #1 + store double %result, double addrspace(1)* %out + ret void +} + +; This is a pretty large function, so just test a few of the +; instructions that are necessary. + +; FUNC-LABEL: {{^}}v_round_f64: +; SI: buffer_load_dwordx2 +; SI: v_bfe_u32 [[EXP:v[0-9]+]], v{{[0-9]+}}, 20, 11 + +; SI-DAG: v_not_b32_e32 +; SI-DAG: v_not_b32_e32 + +; SI-DAG: v_cmp_eq_i32 + +; SI-DAG: s_mov_b32 [[BFIMASK:s[0-9]+]], 0x7fffffff +; SI-DAG: v_cmp_lt_i32_e64 +; SI-DAG: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[BFIMASK]] + +; SI-DAG: v_cmp_gt_i32_e64 + + +; SI: buffer_store_dwordx2 +; SI: s_endpgm +define void @v_round_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep = getelementptr double addrspace(1)* %in, i32 %tid + %out.gep = getelementptr double addrspace(1)* %out, i32 %tid + %x = load double addrspace(1)* %gep + %result = call double @llvm.round.f64(double %x) #1 + store double %result, double addrspace(1)* %out.gep + ret void +} + +; FUNC-LABEL: {{^}}round_v2f64: +; SI: s_endpgm +define void @round_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) #0 { + %result = call <2 x double> @llvm.round.v2f64(<2 x double> %in) #1 + store <2 x double> %result, <2 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}round_v4f64: +; SI: s_endpgm +define void @round_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) #0 { + %result = call <4 x double> @llvm.round.v4f64(<4 x double> %in) #1 + store <4 x double> %result, <4 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}round_v8f64: +; SI: s_endpgm +define void @round_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %in) #0 { + %result = call <8 x double> @llvm.round.v8f64(<8 x double> %in) #1 + store <8 x double> %result, <8 x double> addrspace(1)* %out + ret void +} + +declare i32 @llvm.r600.read.tidig.x() #1 + +declare double @llvm.round.f64(double) #1 +declare <2 x double> @llvm.round.v2f64(<2 x double>) #1 +declare <4 x double> @llvm.round.v4f64(<4 x double>) #1 +declare <8 x double> @llvm.round.v8f64(<8 x double>) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/R600/llvm.round.ll b/test/CodeGen/R600/llvm.round.ll index bedf4ba..8d1cfb6 100644 --- a/test/CodeGen/R600/llvm.round.ll +++ b/test/CodeGen/R600/llvm.round.ll @@ -1,17 +1,28 @@ -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=R600 --check-prefix=FUNC - -; FUNC-LABEL: {{^}}f32: -; R600: FRACT {{.*}}, [[ARG:KC[0-9]\[[0-9]+\]\.[XYZW]]] -; R600-DAG: ADD {{.*}}, -0.5 -; R600-DAG: CEIL {{.*}} [[ARG]] -; R600-DAG: FLOOR {{.*}} [[ARG]] -; R600-DAG: CNDGE -; R600-DAG: CNDGT -; R600: CNDGE {{[^,]+}}, [[ARG]] -define void @f32(float addrspace(1)* %out, float %in) { -entry: - %0 = call float @llvm.round.f32(float %in) - store float %0, float addrspace(1)* %out +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}round_f32: +; SI-DAG: s_load_dword [[SX:s[0-9]+]] +; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x7fffffff +; SI: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[SX]] +; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], [[SX]], [[TRUNC]] +; SI: v_mov_b32_e32 [[VX:v[0-9]+]], [[SX]] +; SI: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[K]], 1.0, [[VX]] +; SI: v_cmp_ge_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |[[SUB]]|, 0.5 +; SI: v_cndmask_b32_e64 [[SEL:v[0-9]+]], 0, [[VX]], [[CMP]] +; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SEL]], [[TRUNC]] +; SI: buffer_store_dword [[RESULT]] + +; R600: TRUNC {{.*}}, [[ARG:KC[0-9]\[[0-9]+\]\.[XYZW]]] +; R600-DAG: ADD {{.*}}, +; R600-DAG: BFI_INT +; R600-DAG: SETGE +; R600-DAG: CNDE +; R600-DAG: ADD +define void @round_f32(float addrspace(1)* %out, float %x) #0 { + %result = call float @llvm.round.f32(float %x) #1 + store float %result, float addrspace(1)* %out ret void } @@ -20,24 +31,37 @@ entry: ; a test for the scalar case, so the vector tests just check that the ; compiler doesn't crash. -; FUNC-LABEL: v2f32 +; FUNC-LABEL: {{^}}round_v2f32: +; SI: s_endpgm ; R600: CF_END -define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { -entry: - %0 = call <2 x float> @llvm.round.v2f32(<2 x float> %in) - store <2 x float> %0, <2 x float> addrspace(1)* %out +define void @round_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) #0 { + %result = call <2 x float> @llvm.round.v2f32(<2 x float> %in) #1 + store <2 x float> %result, <2 x float> addrspace(1)* %out ret void } -; FUNC-LABEL: v4f32 +; FUNC-LABEL: {{^}}round_v4f32: +; SI: s_endpgm ; R600: CF_END -define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) { -entry: - %0 = call <4 x float> @llvm.round.v4f32(<4 x float> %in) - store <4 x float> %0, <4 x float> addrspace(1)* %out +define void @round_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) #0 { + %result = call <4 x float> @llvm.round.v4f32(<4 x float> %in) #1 + store <4 x float> %result, <4 x float> addrspace(1)* %out ret void } -declare float @llvm.round.f32(float) -declare <2 x float> @llvm.round.v2f32(<2 x float>) -declare <4 x float> @llvm.round.v4f32(<4 x float>) +; FUNC-LABEL: {{^}}round_v8f32: +; SI: s_endpgm +; R600: CF_END +define void @round_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %in) #0 { + %result = call <8 x float> @llvm.round.v8f32(<8 x float> %in) #1 + store <8 x float> %result, <8 x float> addrspace(1)* %out + ret void +} + +declare float @llvm.round.f32(float) #1 +declare <2 x float> @llvm.round.v2f32(<2 x float>) #1 +declare <4 x float> @llvm.round.v4f32(<4 x float>) #1 +declare <8 x float> @llvm.round.v8f32(<8 x float>) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll index 7e45710..3bb245c 100644 --- a/test/CodeGen/R600/llvm.sin.ll +++ b/test/CodeGen/R600/llvm.sin.ll @@ -1,6 +1,8 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s ; FUNC-LABEL: sin_f32 ; EG: MULADD_IEEE * diff --git a/test/CodeGen/R600/llvm.sqrt.ll b/test/CodeGen/R600/llvm.sqrt.ll index c039225..cc4717a 100644 --- a/test/CodeGen/R600/llvm.sqrt.ll +++ b/test/CodeGen/R600/llvm.sqrt.ll @@ -1,11 +1,12 @@ -; RUN: llc < %s -march=r600 --mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK -; RUN: llc < %s -march=r600 --mcpu=SI -verify-machineinstrs| FileCheck %s --check-prefix=SI-CHECK +; RUN: llc < %s -march=r600 --mcpu=redwood | FileCheck %s --check-prefix=R600 +; RUN: llc < %s -march=amdgcn --mcpu=SI -verify-machineinstrs| FileCheck %s --check-prefix=SI +; RUN: llc < %s -march=amdgcn --mcpu=tonga -verify-machineinstrs| FileCheck %s --check-prefix=SI -; R600-CHECK-LABEL: {{^}}sqrt_f32: -; R600-CHECK: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].Z -; R600-CHECK: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[2].Z, PS -; SI-CHECK-LABEL: {{^}}sqrt_f32: -; SI-CHECK: v_sqrt_f32_e32 +; R600-LABEL: {{^}}sqrt_f32: +; R600: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].Z +; R600: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[2].Z, PS +; SI-LABEL: {{^}}sqrt_f32: +; SI: v_sqrt_f32_e32 define void @sqrt_f32(float addrspace(1)* %out, float %in) { entry: %0 = call float @llvm.sqrt.f32(float %in) @@ -13,14 +14,14 @@ entry: ret void } -; R600-CHECK-LABEL: {{^}}sqrt_v2f32: -; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].W -; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[2].W, PS -; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].X -; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].X, PS -; SI-CHECK-LABEL: {{^}}sqrt_v2f32: -; SI-CHECK: v_sqrt_f32_e32 -; SI-CHECK: v_sqrt_f32_e32 +; R600-LABEL: {{^}}sqrt_v2f32: +; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].W +; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[2].W, PS +; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].X +; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].X, PS +; SI-LABEL: {{^}}sqrt_v2f32: +; SI: v_sqrt_f32_e32 +; SI: v_sqrt_f32_e32 define void @sqrt_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { entry: %0 = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in) @@ -28,20 +29,20 @@ entry: ret void } -; R600-CHECK-LABEL: {{^}}sqrt_v4f32: -; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].Y -; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].Y, PS -; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].Z -; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].Z, PS -; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].W -; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].W, PS -; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[4].X -; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[4].X, PS -; SI-CHECK-LABEL: {{^}}sqrt_v4f32: -; SI-CHECK: v_sqrt_f32_e32 -; SI-CHECK: v_sqrt_f32_e32 -; SI-CHECK: v_sqrt_f32_e32 -; SI-CHECK: v_sqrt_f32_e32 +; R600-LABEL: {{^}}sqrt_v4f32: +; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].Y +; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].Y, PS +; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].Z +; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].Z, PS +; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].W +; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].W, PS +; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[4].X +; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[4].X, PS +; SI-LABEL: {{^}}sqrt_v4f32: +; SI: v_sqrt_f32_e32 +; SI: v_sqrt_f32_e32 +; SI: v_sqrt_f32_e32 +; SI: v_sqrt_f32_e32 define void @sqrt_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) { entry: %0 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %in) diff --git a/test/CodeGen/R600/llvm.trunc.ll b/test/CodeGen/R600/llvm.trunc.ll deleted file mode 100644 index 5585477..0000000 --- a/test/CodeGen/R600/llvm.trunc.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s - -; CHECK-LABEL: {{^}}trunc_f32: -; CHECK: TRUNC - -define void @trunc_f32(float addrspace(1)* %out, float %in) { -entry: - %0 = call float @llvm.trunc.f32(float %in) - store float %0, float addrspace(1)* %out - ret void -} - -declare float @llvm.trunc.f32(float) diff --git a/test/CodeGen/R600/load-i1.ll b/test/CodeGen/R600/load-i1.ll index d85e16f..315c0a3 100644 --- a/test/CodeGen/R600/load-i1.ll +++ b/test/CodeGen/R600/load-i1.ll @@ -1,21 +1,58 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s - -; SI-LABEL: {{^}}global_copy_i1_to_i1: +; FUNC-LABEL: {{^}}global_copy_i1_to_i1: ; SI: buffer_load_ubyte ; SI: v_and_b32_e32 v{{[0-9]+}}, 1 ; SI: buffer_store_byte ; SI: s_endpgm + +; EG: VTX_READ_8 +; EG: AND_INT define void @global_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { %load = load i1 addrspace(1)* %in store i1 %load, i1 addrspace(1)* %out, align 1 ret void } -; SI-LABEL: {{^}}global_sextload_i1_to_i32: -; XSI: BUFFER_LOAD_BYTE +; FUNC-LABEL: {{^}}local_copy_i1_to_i1: +; SI: ds_read_u8 +; SI: v_and_b32_e32 v{{[0-9]+}}, 1 +; SI: ds_write_b8 +; SI: s_endpgm + +; EG: LDS_UBYTE_READ_RET +; EG: AND_INT +; EG: LDS_BYTE_WRITE +define void @local_copy_i1_to_i1(i1 addrspace(3)* %out, i1 addrspace(3)* %in) nounwind { + %load = load i1 addrspace(3)* %in + store i1 %load, i1 addrspace(3)* %out, align 1 + ret void +} + +; FUNC-LABEL: {{^}}constant_copy_i1_to_i1: +; SI: buffer_load_ubyte +; SI: v_and_b32_e32 v{{[0-9]+}}, 1 +; SI: buffer_store_byte +; SI: s_endpgm + +; EG: VTX_READ_8 +; EG: AND_INT +define void @constant_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(2)* %in) nounwind { + %load = load i1 addrspace(2)* %in + store i1 %load, i1 addrspace(1)* %out, align 1 + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_i1_to_i32: +; SI: buffer_load_ubyte +; SI: v_bfe_i32 ; SI: buffer_store_dword ; SI: s_endpgm + +; EG: VTX_READ_8 +; EG: BFE_INT define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { %load = load i1 addrspace(1)* %in %ext = sext i1 %load to i32 @@ -23,10 +60,11 @@ define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* ret void } -; SI-LABEL: {{^}}global_zextload_i1_to_i32: +; FUNC-LABEL: {{^}}global_zextload_i1_to_i32: ; SI: buffer_load_ubyte ; SI: buffer_store_dword ; SI: s_endpgm + define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { %load = load i1 addrspace(1)* %in %ext = zext i1 %load to i32 @@ -34,8 +72,9 @@ define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* ret void } -; SI-LABEL: {{^}}global_sextload_i1_to_i64: -; XSI: BUFFER_LOAD_BYTE +; FUNC-LABEL: {{^}}global_sextload_i1_to_i64: +; SI: buffer_load_ubyte +; SI: v_bfe_i32 ; SI: buffer_store_dwordx2 ; SI: s_endpgm define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { @@ -45,8 +84,9 @@ define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* ret void } -; SI-LABEL: {{^}}global_zextload_i1_to_i64: +; FUNC-LABEL: {{^}}global_zextload_i1_to_i64: ; SI: buffer_load_ubyte +; SI: v_mov_b32_e32 {{v[0-9]+}}, 0 ; SI: buffer_store_dwordx2 ; SI: s_endpgm define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { @@ -56,7 +96,7 @@ define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* ret void } -; SI-LABEL: {{^}}i1_arg: +; FUNC-LABEL: {{^}}i1_arg: ; SI: buffer_load_ubyte ; SI: v_and_b32_e32 ; SI: buffer_store_byte @@ -66,7 +106,7 @@ define void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { ret void } -; SI-LABEL: {{^}}i1_arg_zext_i32: +; FUNC-LABEL: {{^}}i1_arg_zext_i32: ; SI: buffer_load_ubyte ; SI: buffer_store_dword ; SI: s_endpgm @@ -76,7 +116,7 @@ define void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { ret void } -; SI-LABEL: {{^}}i1_arg_zext_i64: +; FUNC-LABEL: {{^}}i1_arg_zext_i64: ; SI: buffer_load_ubyte ; SI: buffer_store_dwordx2 ; SI: s_endpgm @@ -86,8 +126,8 @@ define void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { ret void } -; SI-LABEL: {{^}}i1_arg_sext_i32: -; XSI: BUFFER_LOAD_BYTE +; FUNC-LABEL: {{^}}i1_arg_sext_i32: +; SI: buffer_load_ubyte ; SI: buffer_store_dword ; SI: s_endpgm define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { @@ -96,8 +136,10 @@ define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { ret void } -; SI-LABEL: {{^}}i1_arg_sext_i64: -; XSI: BUFFER_LOAD_BYTE +; FUNC-LABEL: {{^}}i1_arg_sext_i64: +; SI: buffer_load_ubyte +; SI: v_bfe_i32 +; SI: v_ashrrev_i32 ; SI: buffer_store_dwordx2 ; SI: s_endpgm define void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll index 62d3063..b71b7cb 100644 --- a/test/CodeGen/R600/load.ll +++ b/test/CodeGen/R600/load.ll @@ -1,6 +1,7 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s -; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s +; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600 --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s ;===------------------------------------------------------------------------===; ; GLOBAL ADDRESS SPACE @@ -8,9 +9,9 @@ ; Load an i8 value from the global address space. ; FUNC-LABEL: {{^}}load_i8: -; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} +; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-CHECK: buffer_load_ubyte v{{[0-9]+}}, +; SI: buffer_load_ubyte v{{[0-9]+}}, define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { %1 = load i8 addrspace(1)* %in %2 = zext i8 %1 to i32 @@ -19,12 +20,12 @@ define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { } ; FUNC-LABEL: {{^}}load_i8_sext: -; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] -; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] -; R600-CHECK: 24 -; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] -; R600-CHECK: 24 -; SI-CHECK: buffer_load_sbyte +; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] +; R600: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] +; R600: 24 +; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] +; R600: 24 +; SI: buffer_load_sbyte define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { entry: %0 = load i8 addrspace(1)* %in @@ -34,10 +35,10 @@ entry: } ; FUNC-LABEL: {{^}}load_v2i8: -; R600-CHECK: VTX_READ_8 -; R600-CHECK: VTX_READ_8 -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte +; R600: VTX_READ_8 +; R600: VTX_READ_8 +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { entry: %0 = load <2 x i8> addrspace(1)* %in @@ -47,18 +48,18 @@ entry: } ; FUNC-LABEL: {{^}}load_v2i8_sext: -; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] -; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] -; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] -; R600-CHECK-DAG: 24 -; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] -; R600-CHECK-DAG: 24 -; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] -; R600-CHECK-DAG: 24 -; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] -; R600-CHECK-DAG: 24 -; SI-CHECK: buffer_load_sbyte -; SI-CHECK: buffer_load_sbyte +; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] +; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] +; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] +; R600-DAG: 24 +; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] +; R600-DAG: 24 +; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] +; R600-DAG: 24 +; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] +; R600-DAG: 24 +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { entry: %0 = load <2 x i8> addrspace(1)* %in @@ -68,14 +69,14 @@ entry: } ; FUNC-LABEL: {{^}}load_v4i8: -; R600-CHECK: VTX_READ_8 -; R600-CHECK: VTX_READ_8 -; R600-CHECK: VTX_READ_8 -; R600-CHECK: VTX_READ_8 -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte -; SI-CHECK: buffer_load_ubyte +; R600: VTX_READ_8 +; R600: VTX_READ_8 +; R600: VTX_READ_8 +; R600: VTX_READ_8 +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) { entry: %0 = load <4 x i8> addrspace(1)* %in @@ -85,30 +86,30 @@ entry: } ; FUNC-LABEL: {{^}}load_v4i8_sext: -; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] -; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] -; R600-CHECK-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] -; R600-CHECK-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] -; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] -; R600-CHECK-DAG: 24 -; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] -; R600-CHECK-DAG: 24 -; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] -; R600-CHECK-DAG: 24 -; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] -; R600-CHECK-DAG: 24 -; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]] -; R600-CHECK-DAG: 24 -; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]] -; R600-CHECK-DAG: 24 -; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]] -; R600-CHECK-DAG: 24 -; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]] -; R600-CHECK-DAG: 24 -; SI-CHECK: buffer_load_sbyte -; SI-CHECK: buffer_load_sbyte -; SI-CHECK: buffer_load_sbyte -; SI-CHECK: buffer_load_sbyte +; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] +; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] +; R600-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] +; R600-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] +; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] +; R600-DAG: 24 +; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] +; R600-DAG: 24 +; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] +; R600-DAG: 24 +; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] +; R600-DAG: 24 +; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]] +; R600-DAG: 24 +; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]] +; R600-DAG: 24 +; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]] +; R600-DAG: 24 +; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]] +; R600-DAG: 24 +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) { entry: %0 = load <4 x i8> addrspace(1)* %in @@ -119,8 +120,8 @@ entry: ; Load an i16 value from the global address space. ; FUNC-LABEL: {{^}}load_i16: -; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-CHECK: buffer_load_ushort +; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} +; SI: buffer_load_ushort define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { entry: %0 = load i16 addrspace(1)* %in @@ -130,12 +131,12 @@ entry: } ; FUNC-LABEL: {{^}}load_i16_sext: -; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] -; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] -; R600-CHECK: 16 -; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] -; R600-CHECK: 16 -; SI-CHECK: buffer_load_sshort +; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] +; R600: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] +; R600: 16 +; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] +; R600: 16 +; SI: buffer_load_sshort define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { entry: %0 = load i16 addrspace(1)* %in @@ -145,10 +146,10 @@ entry: } ; FUNC-LABEL: {{^}}load_v2i16: -; R600-CHECK: VTX_READ_16 -; R600-CHECK: VTX_READ_16 -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort +; R600: VTX_READ_16 +; R600: VTX_READ_16 +; SI: buffer_load_ushort +; SI: buffer_load_ushort define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { entry: %0 = load <2 x i16> addrspace(1)* %in @@ -158,18 +159,18 @@ entry: } ; FUNC-LABEL: {{^}}load_v2i16_sext: -; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] -; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] -; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] -; R600-CHECK-DAG: 16 -; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] -; R600-CHECK-DAG: 16 -; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] -; R600-CHECK-DAG: 16 -; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] -; R600-CHECK-DAG: 16 -; SI-CHECK: buffer_load_sshort -; SI-CHECK: buffer_load_sshort +; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] +; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] +; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] +; R600-DAG: 16 +; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] +; R600-DAG: 16 +; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] +; R600-DAG: 16 +; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] +; R600-DAG: 16 +; SI: buffer_load_sshort +; SI: buffer_load_sshort define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { entry: %0 = load <2 x i16> addrspace(1)* %in @@ -179,14 +180,14 @@ entry: } ; FUNC-LABEL: {{^}}load_v4i16: -; R600-CHECK: VTX_READ_16 -; R600-CHECK: VTX_READ_16 -; R600-CHECK: VTX_READ_16 -; R600-CHECK: VTX_READ_16 -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort -; SI-CHECK: buffer_load_ushort +; R600: VTX_READ_16 +; R600: VTX_READ_16 +; R600: VTX_READ_16 +; R600: VTX_READ_16 +; SI: buffer_load_ushort +; SI: buffer_load_ushort +; SI: buffer_load_ushort +; SI: buffer_load_ushort define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { entry: %0 = load <4 x i16> addrspace(1)* %in @@ -196,30 +197,30 @@ entry: } ; FUNC-LABEL: {{^}}load_v4i16_sext: -; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] -; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] -; R600-CHECK-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] -; R600-CHECK-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] -; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] -; R600-CHECK-DAG: 16 -; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] -; R600-CHECK-DAG: 16 -; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] -; R600-CHECK-DAG: 16 -; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] -; R600-CHECK-DAG: 16 -; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]] -; R600-CHECK-DAG: 16 -; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]] -; R600-CHECK-DAG: 16 -; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]] -; R600-CHECK-DAG: 16 -; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]] -; R600-CHECK-DAG: 16 -; SI-CHECK: buffer_load_sshort -; SI-CHECK: buffer_load_sshort -; SI-CHECK: buffer_load_sshort -; SI-CHECK: buffer_load_sshort +; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] +; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] +; R600-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] +; R600-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] +; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] +; R600-DAG: 16 +; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] +; R600-DAG: 16 +; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] +; R600-DAG: 16 +; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] +; R600-DAG: 16 +; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]] +; R600-DAG: 16 +; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]] +; R600-DAG: 16 +; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]] +; R600-DAG: 16 +; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]] +; R600-DAG: 16 +; SI: buffer_load_sshort +; SI: buffer_load_sshort +; SI: buffer_load_sshort +; SI: buffer_load_sshort define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { entry: %0 = load <4 x i16> addrspace(1)* %in @@ -230,9 +231,9 @@ entry: ; load an i32 value from the global address space. ; FUNC-LABEL: {{^}}load_i32: -; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 +; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 -; SI-CHECK: buffer_load_dword v{{[0-9]+}} +; SI: buffer_load_dword v{{[0-9]+}} define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: %0 = load i32 addrspace(1)* %in @@ -242,9 +243,9 @@ entry: ; load a f32 value from the global address space. ; FUNC-LABEL: {{^}}load_f32: -; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 +; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 -; SI-CHECK: buffer_load_dword v{{[0-9]+}} +; SI: buffer_load_dword v{{[0-9]+}} define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) { entry: %0 = load float addrspace(1)* %in @@ -254,9 +255,9 @@ entry: ; load a v2f32 value from the global address space ; FUNC-LABEL: {{^}}load_v2f32: -; R600-CHECK: MEM_RAT -; R600-CHECK: VTX_READ_64 -; SI-CHECK: buffer_load_dwordx2 +; R600: MEM_RAT +; R600: VTX_READ_64 +; SI: buffer_load_dwordx2 define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) { entry: %0 = load <2 x float> addrspace(1)* %in @@ -265,8 +266,8 @@ entry: } ; FUNC-LABEL: {{^}}load_i64: -; R600-CHECK: VTX_READ_64 -; SI-CHECK: buffer_load_dwordx2 +; R600: VTX_READ_64 +; SI: buffer_load_dwordx2 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { entry: %0 = load i64 addrspace(1)* %in @@ -275,11 +276,11 @@ entry: } ; FUNC-LABEL: {{^}}load_i64_sext: -; R600-CHECK: MEM_RAT -; R600-CHECK: MEM_RAT -; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x -; R600-CHECK: 31 -; SI-CHECK: buffer_load_dword +; R600: MEM_RAT +; R600: MEM_RAT +; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x +; R600: 31 +; SI: buffer_load_dword define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: @@ -290,8 +291,8 @@ entry: } ; FUNC-LABEL: {{^}}load_i64_zext: -; R600-CHECK: MEM_RAT -; R600-CHECK: MEM_RAT +; R600: MEM_RAT +; R600: MEM_RAT define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: %0 = load i32 addrspace(1)* %in @@ -301,17 +302,17 @@ entry: } ; FUNC-LABEL: {{^}}load_v8i32: -; R600-CHECK: VTX_READ_128 -; R600-CHECK: VTX_READ_128 +; R600: VTX_READ_128 +; R600: VTX_READ_128 ; XXX: We should be using DWORDX4 instructions on SI. -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) { entry: %0 = load <8 x i32> addrspace(1)* %in @@ -320,27 +321,27 @@ entry: } ; FUNC-LABEL: {{^}}load_v16i32: -; R600-CHECK: VTX_READ_128 -; R600-CHECK: VTX_READ_128 -; R600-CHECK: VTX_READ_128 -; R600-CHECK: VTX_READ_128 +; R600: VTX_READ_128 +; R600: VTX_READ_128 +; R600: VTX_READ_128 +; R600: VTX_READ_128 ; XXX: We should be using DWORDX4 instructions on SI. -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword -; SI-CHECK: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) { entry: %0 = load <16 x i32> addrspace(1)* %in @@ -354,12 +355,12 @@ entry: ; Load a sign-extended i8 value ; FUNC-LABEL: {{^}}load_const_i8_sext: -; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] -; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] -; R600-CHECK: 24 -; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] -; R600-CHECK: 24 -; SI-CHECK: buffer_load_sbyte v{{[0-9]+}}, +; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] +; R600: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] +; R600: 24 +; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] +; R600: 24 +; SI: buffer_load_sbyte v{{[0-9]+}}, define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { entry: %0 = load i8 addrspace(2)* %in @@ -370,8 +371,8 @@ entry: ; Load an aligned i8 value ; FUNC-LABEL: {{^}}load_const_i8_aligned: -; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-CHECK: buffer_load_ubyte v{{[0-9]+}}, +; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} +; SI: buffer_load_ubyte v{{[0-9]+}}, define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { entry: %0 = load i8 addrspace(2)* %in @@ -382,8 +383,8 @@ entry: ; Load an un-aligned i8 value ; FUNC-LABEL: {{^}}load_const_i8_unaligned: -; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-CHECK: buffer_load_ubyte v{{[0-9]+}}, +; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} +; SI: buffer_load_ubyte v{{[0-9]+}}, define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { entry: %0 = getelementptr i8 addrspace(2)* %in, i32 1 @@ -395,12 +396,12 @@ entry: ; Load a sign-extended i16 value ; FUNC-LABEL: {{^}}load_const_i16_sext: -; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] -; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] -; R600-CHECK: 16 -; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] -; R600-CHECK: 16 -; SI-CHECK: buffer_load_sshort +; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] +; R600: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] +; R600: 16 +; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] +; R600: 16 +; SI: buffer_load_sshort define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { entry: %0 = load i16 addrspace(2)* %in @@ -411,8 +412,8 @@ entry: ; Load an aligned i16 value ; FUNC-LABEL: {{^}}load_const_i16_aligned: -; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-CHECK: buffer_load_ushort +; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} +; SI: buffer_load_ushort define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { entry: %0 = load i16 addrspace(2)* %in @@ -423,8 +424,8 @@ entry: ; Load an un-aligned i16 value ; FUNC-LABEL: {{^}}load_const_i16_unaligned: -; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-CHECK: buffer_load_ushort +; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} +; SI: buffer_load_ushort define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { entry: %0 = getelementptr i16 addrspace(2)* %in, i32 1 @@ -436,9 +437,9 @@ entry: ; Load an i32 value from the constant address space. ; FUNC-LABEL: {{^}}load_const_addrspace_i32: -; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 +; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 -; SI-CHECK: s_load_dword s{{[0-9]+}} +; SI: s_load_dword s{{[0-9]+}} define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { entry: %0 = load i32 addrspace(2)* %in @@ -448,9 +449,9 @@ entry: ; Load a f32 value from the constant address space. ; FUNC-LABEL: {{^}}load_const_addrspace_f32: -; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 +; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 -; SI-CHECK: s_load_dword s{{[0-9]+}} +; SI: s_load_dword s{{[0-9]+}} define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) { %1 = load float addrspace(2)* %in store float %1, float addrspace(1)* %out @@ -463,10 +464,10 @@ define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace( ; Load an i8 value from the local address space. ; FUNC-LABEL: {{^}}load_i8_local: -; R600-CHECK: LDS_UBYTE_READ_RET -; SI-CHECK-NOT: s_wqm_b64 -; SI-CHECK: s_mov_b32 m0 -; SI-CHECK: ds_read_u8 +; R600: LDS_UBYTE_READ_RET +; SI-NOT: s_wqm_b64 +; SI: s_mov_b32 m0 +; SI: ds_read_u8 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { %1 = load i8 addrspace(3)* %in %2 = zext i8 %1 to i32 @@ -475,11 +476,11 @@ define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { } ; FUNC-LABEL: {{^}}load_i8_sext_local: -; R600-CHECK: LDS_UBYTE_READ_RET -; R600-CHECK: ASHR -; SI-CHECK-NOT: s_wqm_b64 -; SI-CHECK: s_mov_b32 m0 -; SI-CHECK: ds_read_i8 +; R600: LDS_UBYTE_READ_RET +; R600: ASHR +; SI-NOT: s_wqm_b64 +; SI: s_mov_b32 m0 +; SI: ds_read_i8 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { entry: %0 = load i8 addrspace(3)* %in @@ -489,12 +490,12 @@ entry: } ; FUNC-LABEL: {{^}}load_v2i8_local: -; R600-CHECK: LDS_UBYTE_READ_RET -; R600-CHECK: LDS_UBYTE_READ_RET -; SI-CHECK-NOT: s_wqm_b64 -; SI-CHECK: s_mov_b32 m0 -; SI-CHECK: ds_read_u8 -; SI-CHECK: ds_read_u8 +; R600: LDS_UBYTE_READ_RET +; R600: LDS_UBYTE_READ_RET +; SI-NOT: s_wqm_b64 +; SI: s_mov_b32 m0 +; SI: ds_read_u8 +; SI: ds_read_u8 define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) { entry: %0 = load <2 x i8> addrspace(3)* %in @@ -504,14 +505,14 @@ entry: } ; FUNC-LABEL: {{^}}load_v2i8_sext_local: -; R600-CHECK-DAG: LDS_UBYTE_READ_RET -; R600-CHECK-DAG: LDS_UBYTE_READ_RET -; R600-CHECK-DAG: ASHR -; R600-CHECK-DAG: ASHR -; SI-CHECK-NOT: s_wqm_b64 -; SI-CHECK: s_mov_b32 m0 -; SI-CHECK: ds_read_i8 -; SI-CHECK: ds_read_i8 +; R600-DAG: LDS_UBYTE_READ_RET +; R600-DAG: LDS_UBYTE_READ_RET +; R600-DAG: ASHR +; R600-DAG: ASHR +; SI-NOT: s_wqm_b64 +; SI: s_mov_b32 m0 +; SI: ds_read_i8 +; SI: ds_read_i8 define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) { entry: %0 = load <2 x i8> addrspace(3)* %in @@ -521,16 +522,16 @@ entry: } ; FUNC-LABEL: {{^}}load_v4i8_local: -; R600-CHECK: LDS_UBYTE_READ_RET -; R600-CHECK: LDS_UBYTE_READ_RET -; R600-CHECK: LDS_UBYTE_READ_RET -; R600-CHECK: LDS_UBYTE_READ_RET -; SI-CHECK-NOT: s_wqm_b64 -; SI-CHECK: s_mov_b32 m0 -; SI-CHECK: ds_read_u8 -; SI-CHECK: ds_read_u8 -; SI-CHECK: ds_read_u8 -; SI-CHECK: ds_read_u8 +; R600: LDS_UBYTE_READ_RET +; R600: LDS_UBYTE_READ_RET +; R600: LDS_UBYTE_READ_RET +; R600: LDS_UBYTE_READ_RET +; SI-NOT: s_wqm_b64 +; SI: s_mov_b32 m0 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) { entry: %0 = load <4 x i8> addrspace(3)* %in @@ -540,20 +541,20 @@ entry: } ; FUNC-LABEL: {{^}}load_v4i8_sext_local: -; R600-CHECK-DAG: LDS_UBYTE_READ_RET -; R600-CHECK-DAG: LDS_UBYTE_READ_RET -; R600-CHECK-DAG: LDS_UBYTE_READ_RET -; R600-CHECK-DAG: LDS_UBYTE_READ_RET -; R600-CHECK-DAG: ASHR -; R600-CHECK-DAG: ASHR -; R600-CHECK-DAG: ASHR -; R600-CHECK-DAG: ASHR -; SI-CHECK-NOT: s_wqm_b64 -; SI-CHECK: s_mov_b32 m0 -; SI-CHECK: ds_read_i8 -; SI-CHECK: ds_read_i8 -; SI-CHECK: ds_read_i8 -; SI-CHECK: ds_read_i8 +; R600-DAG: LDS_UBYTE_READ_RET +; R600-DAG: LDS_UBYTE_READ_RET +; R600-DAG: LDS_UBYTE_READ_RET +; R600-DAG: LDS_UBYTE_READ_RET +; R600-DAG: ASHR +; R600-DAG: ASHR +; R600-DAG: ASHR +; R600-DAG: ASHR +; SI-NOT: s_wqm_b64 +; SI: s_mov_b32 m0 +; SI: ds_read_i8 +; SI: ds_read_i8 +; SI: ds_read_i8 +; SI: ds_read_i8 define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) { entry: %0 = load <4 x i8> addrspace(3)* %in @@ -564,10 +565,10 @@ entry: ; Load an i16 value from the local address space. ; FUNC-LABEL: {{^}}load_i16_local: -; R600-CHECK: LDS_USHORT_READ_RET -; SI-CHECK-NOT: s_wqm_b64 -; SI-CHECK: s_mov_b32 m0 -; SI-CHECK: ds_read_u16 +; R600: LDS_USHORT_READ_RET +; SI-NOT: s_wqm_b64 +; SI: s_mov_b32 m0 +; SI: ds_read_u16 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { entry: %0 = load i16 addrspace(3)* %in @@ -577,11 +578,11 @@ entry: } ; FUNC-LABEL: {{^}}load_i16_sext_local: -; R600-CHECK: LDS_USHORT_READ_RET -; R600-CHECK: ASHR -; SI-CHECK-NOT: s_wqm_b64 -; SI-CHECK: s_mov_b32 m0 -; SI-CHECK: ds_read_i16 +; R600: LDS_USHORT_READ_RET +; R600: ASHR +; SI-NOT: s_wqm_b64 +; SI: s_mov_b32 m0 +; SI: ds_read_i16 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { entry: %0 = load i16 addrspace(3)* %in @@ -591,12 +592,12 @@ entry: } ; FUNC-LABEL: {{^}}load_v2i16_local: -; R600-CHECK: LDS_USHORT_READ_RET -; R600-CHECK: LDS_USHORT_READ_RET -; SI-CHECK-NOT: s_wqm_b64 -; SI-CHECK: s_mov_b32 m0 -; SI-CHECK: ds_read_u16 -; SI-CHECK: ds_read_u16 +; R600: LDS_USHORT_READ_RET +; R600: LDS_USHORT_READ_RET +; SI-NOT: s_wqm_b64 +; SI: s_mov_b32 m0 +; SI: ds_read_u16 +; SI: ds_read_u16 define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) { entry: %0 = load <2 x i16> addrspace(3)* %in @@ -606,14 +607,14 @@ entry: } ; FUNC-LABEL: {{^}}load_v2i16_sext_local: -; R600-CHECK-DAG: LDS_USHORT_READ_RET -; R600-CHECK-DAG: LDS_USHORT_READ_RET -; R600-CHECK-DAG: ASHR -; R600-CHECK-DAG: ASHR -; SI-CHECK-NOT: s_wqm_b64 -; SI-CHECK: s_mov_b32 m0 -; SI-CHECK: ds_read_i16 -; SI-CHECK: ds_read_i16 +; R600-DAG: LDS_USHORT_READ_RET +; R600-DAG: LDS_USHORT_READ_RET +; R600-DAG: ASHR +; R600-DAG: ASHR +; SI-NOT: s_wqm_b64 +; SI: s_mov_b32 m0 +; SI: ds_read_i16 +; SI: ds_read_i16 define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) { entry: %0 = load <2 x i16> addrspace(3)* %in @@ -623,16 +624,16 @@ entry: } ; FUNC-LABEL: {{^}}load_v4i16_local: -; R600-CHECK: LDS_USHORT_READ_RET -; R600-CHECK: LDS_USHORT_READ_RET -; R600-CHECK: LDS_USHORT_READ_RET -; R600-CHECK: LDS_USHORT_READ_RET -; SI-CHECK-NOT: s_wqm_b64 -; SI-CHECK: s_mov_b32 m0 -; SI-CHECK: ds_read_u16 -; SI-CHECK: ds_read_u16 -; SI-CHECK: ds_read_u16 -; SI-CHECK: ds_read_u16 +; R600: LDS_USHORT_READ_RET +; R600: LDS_USHORT_READ_RET +; R600: LDS_USHORT_READ_RET +; R600: LDS_USHORT_READ_RET +; SI-NOT: s_wqm_b64 +; SI: s_mov_b32 m0 +; SI: ds_read_u16 +; SI: ds_read_u16 +; SI: ds_read_u16 +; SI: ds_read_u16 define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) { entry: %0 = load <4 x i16> addrspace(3)* %in @@ -642,20 +643,20 @@ entry: } ; FUNC-LABEL: {{^}}load_v4i16_sext_local: -; R600-CHECK-DAG: LDS_USHORT_READ_RET -; R600-CHECK-DAG: LDS_USHORT_READ_RET -; R600-CHECK-DAG: LDS_USHORT_READ_RET -; R600-CHECK-DAG: LDS_USHORT_READ_RET -; R600-CHECK-DAG: ASHR -; R600-CHECK-DAG: ASHR -; R600-CHECK-DAG: ASHR -; R600-CHECK-DAG: ASHR -; SI-CHECK-NOT: s_wqm_b64 -; SI-CHECK: s_mov_b32 m0 -; SI-CHECK: ds_read_i16 -; SI-CHECK: ds_read_i16 -; SI-CHECK: ds_read_i16 -; SI-CHECK: ds_read_i16 +; R600-DAG: LDS_USHORT_READ_RET +; R600-DAG: LDS_USHORT_READ_RET +; R600-DAG: LDS_USHORT_READ_RET +; R600-DAG: LDS_USHORT_READ_RET +; R600-DAG: ASHR +; R600-DAG: ASHR +; R600-DAG: ASHR +; R600-DAG: ASHR +; SI-NOT: s_wqm_b64 +; SI: s_mov_b32 m0 +; SI: ds_read_i16 +; SI: ds_read_i16 +; SI: ds_read_i16 +; SI: ds_read_i16 define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) { entry: %0 = load <4 x i16> addrspace(3)* %in @@ -666,10 +667,10 @@ entry: ; load an i32 value from the local address space. ; FUNC-LABEL: {{^}}load_i32_local: -; R600-CHECK: LDS_READ_RET -; SI-CHECK-NOT: s_wqm_b64 -; SI-CHECK: s_mov_b32 m0 -; SI-CHECK: ds_read_b32 +; R600: LDS_READ_RET +; SI-NOT: s_wqm_b64 +; SI: s_mov_b32 m0 +; SI: ds_read_b32 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { entry: %0 = load i32 addrspace(3)* %in @@ -679,9 +680,9 @@ entry: ; load a f32 value from the local address space. ; FUNC-LABEL: {{^}}load_f32_local: -; R600-CHECK: LDS_READ_RET -; SI-CHECK: s_mov_b32 m0 -; SI-CHECK: ds_read_b32 +; R600: LDS_READ_RET +; SI: s_mov_b32 m0 +; SI: ds_read_b32 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) { entry: %0 = load float addrspace(3)* %in @@ -691,10 +692,10 @@ entry: ; load a v2f32 value from the local address space ; FUNC-LABEL: {{^}}load_v2f32_local: -; R600-CHECK: LDS_READ_RET -; R600-CHECK: LDS_READ_RET -; SI-CHECK: s_mov_b32 m0 -; SI-CHECK: ds_read_b64 +; R600: LDS_READ_RET +; R600: LDS_READ_RET +; SI: s_mov_b32 m0 +; SI: ds_read_b64 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) { entry: %0 = load <2 x float> addrspace(3)* %in @@ -704,11 +705,11 @@ entry: ; Test loading a i32 and v2i32 value from the same base pointer. ; FUNC-LABEL: {{^}}load_i32_v2i32_local: -; R600-CHECK: LDS_READ_RET -; R600-CHECK: LDS_READ_RET -; R600-CHECK: LDS_READ_RET -; SI-CHECK-DAG: ds_read_b32 -; SI-CHECK-DAG: ds_read2_b32 +; R600: LDS_READ_RET +; R600: LDS_READ_RET +; R600: LDS_READ_RET +; SI-DAG: ds_read_b32 +; SI-DAG: ds_read2_b32 define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) { %scalar = load i32 addrspace(3)* %in %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)* @@ -726,9 +727,9 @@ define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3) ; On SI we need to make sure that the base offset is a register and not ; an immediate. ; FUNC-LABEL: {{^}}load_i32_local_const_ptr: -; SI-CHECK: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0 -; SI-CHECK: ds_read_b32 v0, v[[ZERO]] offset:4 -; R600-CHECK: LDS_READ_RET +; SI: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0 +; SI: ds_read_b32 v0, v[[ZERO]] offset:4 +; R600: LDS_READ_RET define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { entry: %tmp0 = getelementptr [512 x i32] addrspace(3)* @lds, i32 0, i32 1 diff --git a/test/CodeGen/R600/load.vec.ll b/test/CodeGen/R600/load.vec.ll index 0d6e213..346d8dc 100644 --- a/test/CodeGen/R600/load.vec.ll +++ b/test/CodeGen/R600/load.vec.ll @@ -1,11 +1,12 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s ; load a v2i32 value from the global address space. -; EG-CHECK: {{^}}load_v2i32: -; EG-CHECK: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0 -; SI-CHECK: {{^}}load_v2i32: -; SI-CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}] +; EG: {{^}}load_v2i32: +; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0 +; SI: {{^}}load_v2i32: +; SI: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}] define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %a = load <2 x i32> addrspace(1) * %in store <2 x i32> %a, <2 x i32> addrspace(1)* %out @@ -13,10 +14,10 @@ define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i } ; load a v4i32 value from the global address space. -; EG-CHECK: {{^}}load_v4i32: -; EG-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0 -; SI-CHECK: {{^}}load_v4i32: -; SI-CHECK: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}] +; EG: {{^}}load_v4i32: +; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0 +; SI: {{^}}load_v4i32: +; SI: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}] define void @load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %a = load <4 x i32> addrspace(1) * %in store <4 x i32> %a, <4 x i32> addrspace(1)* %out diff --git a/test/CodeGen/R600/load64.ll b/test/CodeGen/R600/load64.ll index a60c4eb..cb3d654 100644 --- a/test/CodeGen/R600/load64.ll +++ b/test/CodeGen/R600/load64.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ; load a f64 value from the global address space. ; CHECK-LABEL: {{^}}load_f64: diff --git a/test/CodeGen/R600/local-64.ll b/test/CodeGen/R600/local-64.ll index eb14b5f..4b45169 100644 --- a/test/CodeGen/R600/local-64.ll +++ b/test/CodeGen/R600/local-64.ll @@ -1,8 +1,9 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=BOTH %s -; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=BOTH %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s ; BOTH-LABEL: {{^}}local_i32_load -; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28 [M0] +; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28 ; BOTH: buffer_store_dword [[REG]], define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind { %gep = getelementptr i32 addrspace(3)* %in, i32 7 @@ -12,7 +13,7 @@ define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounw } ; BOTH-LABEL: {{^}}local_i32_load_0_offset -; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} [M0] +; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} ; BOTH: buffer_store_dword [[REG]], define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind { %val = load i32 addrspace(3)* %in, align 4 @@ -22,7 +23,7 @@ define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* % ; BOTH-LABEL: {{^}}local_i8_load_i16_max_offset: ; BOTH-NOT: ADD -; BOTH: ds_read_u8 [[REG:v[0-9]+]], {{v[0-9]+}} offset:65535 [M0] +; BOTH: ds_read_u8 [[REG:v[0-9]+]], {{v[0-9]+}} offset:65535 ; BOTH: buffer_store_byte [[REG]], define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind { %gep = getelementptr i8 addrspace(3)* %in, i32 65535 @@ -37,7 +38,7 @@ define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3) ; SI: s_or_b32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000 ; CI: s_add_i32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000 ; BOTH: v_mov_b32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]] -; BOTH: ds_read_u8 [[REG:v[0-9]+]], [[VREGADDR]] [M0] +; BOTH: ds_read_u8 [[REG:v[0-9]+]], [[VREGADDR]] ; BOTH: buffer_store_byte [[REG]], define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind { %gep = getelementptr i8 addrspace(3)* %in, i32 65536 @@ -48,7 +49,7 @@ define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspa ; BOTH-LABEL: {{^}}local_i64_load: ; BOTH-NOT: ADD -; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 [M0] +; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 ; BOTH: buffer_store_dwordx2 [[REG]], define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind { %gep = getelementptr i64 addrspace(3)* %in, i32 7 @@ -58,7 +59,7 @@ define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounw } ; BOTH-LABEL: {{^}}local_i64_load_0_offset -; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} [M0] +; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} ; BOTH: buffer_store_dwordx2 [[REG]], define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind { %val = load i64 addrspace(3)* %in, align 8 @@ -68,7 +69,7 @@ define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* % ; BOTH-LABEL: {{^}}local_f64_load: ; BOTH-NOT: ADD -; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 [M0] +; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 ; BOTH: buffer_store_dwordx2 [[REG]], define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind { %gep = getelementptr double addrspace(3)* %in, i32 7 @@ -78,7 +79,7 @@ define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) } ; BOTH-LABEL: {{^}}local_f64_load_0_offset -; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} [M0] +; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} ; BOTH: buffer_store_dwordx2 [[REG]], define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind { %val = load double addrspace(3)* %in, align 8 @@ -88,7 +89,7 @@ define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace ; BOTH-LABEL: {{^}}local_i64_store: ; BOTH-NOT: ADD -; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 [M0] +; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 define void @local_i64_store(i64 addrspace(3)* %out) nounwind { %gep = getelementptr i64 addrspace(3)* %out, i32 7 store i64 5678, i64 addrspace(3)* %gep, align 8 @@ -97,7 +98,7 @@ define void @local_i64_store(i64 addrspace(3)* %out) nounwind { ; BOTH-LABEL: {{^}}local_i64_store_0_offset: ; BOTH-NOT: ADD -; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0] +; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind { store i64 1234, i64 addrspace(3)* %out, align 8 ret void @@ -105,7 +106,7 @@ define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind { ; BOTH-LABEL: {{^}}local_f64_store: ; BOTH-NOT: ADD -; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 [M0] +; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 define void @local_f64_store(double addrspace(3)* %out) nounwind { %gep = getelementptr double addrspace(3)* %out, i32 7 store double 16.0, double addrspace(3)* %gep, align 8 @@ -113,7 +114,7 @@ define void @local_f64_store(double addrspace(3)* %out) nounwind { } ; BOTH-LABEL: {{^}}local_f64_store_0_offset -; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0] +; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind { store double 20.0, double addrspace(3)* %out, align 8 ret void @@ -121,8 +122,8 @@ define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind { ; BOTH-LABEL: {{^}}local_v2i64_store: ; BOTH-NOT: ADD -; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:112 [M0] -; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:120 [M0] +; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:112 +; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:120 ; BOTH: s_endpgm define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind { %gep = getelementptr <2 x i64> addrspace(3)* %out, i32 7 @@ -132,8 +133,8 @@ define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind { ; BOTH-LABEL: {{^}}local_v2i64_store_0_offset: ; BOTH-NOT: ADD -; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0] -; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 [M0] +; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} +; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 ; BOTH: s_endpgm define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind { store <2 x i64> <i64 1234, i64 1234>, <2 x i64> addrspace(3)* %out, align 16 @@ -142,10 +143,10 @@ define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind { ; BOTH-LABEL: {{^}}local_v4i64_store: ; BOTH-NOT: ADD -; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:224 [M0] -; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:232 [M0] -; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:240 [M0] -; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:248 [M0] +; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:224 +; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:232 +; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:240 +; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:248 ; BOTH: s_endpgm define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind { %gep = getelementptr <4 x i64> addrspace(3)* %out, i32 7 @@ -155,10 +156,10 @@ define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind { ; BOTH-LABEL: {{^}}local_v4i64_store_0_offset: ; BOTH-NOT: ADD -; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0] -; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 [M0] -; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16 [M0] -; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:24 [M0] +; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} +; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 +; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16 +; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:24 ; BOTH: s_endpgm define void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind { store <4 x i64> <i64 1234, i64 1234, i64 1234, i64 1234>, <4 x i64> addrspace(3)* %out, align 16 diff --git a/test/CodeGen/R600/local-atomics.ll b/test/CodeGen/R600/local-atomics.ll index 2ac811f..29921b6 100644 --- a/test/CodeGen/R600/local-atomics.ll +++ b/test/CodeGen/R600/local-atomics.ll @@ -1,15 +1,16 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32: ; EG: LDS_WRXCHG_RET * -; SI: s_load_dword [[SPTR:s[0-9]+]], -; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 -; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] -; SI: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; GCN: s_load_dword [[SPTR:s[0-9]+]], +; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] +; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -18,8 +19,8 @@ define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* % ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset: ; EG: LDS_WRXCHG_RET * -; SI: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst @@ -30,12 +31,12 @@ define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac ; XXX - Is it really necessary to load 4 into VGPR? ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32: ; EG: LDS_ADD_RET * -; SI: s_load_dword [[SPTR:s[0-9]+]], -; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 -; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] -; SI: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; GCN: s_load_dword [[SPTR:s[0-9]+]], +; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] +; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -44,8 +45,8 @@ define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset: ; EG: LDS_ADD_RET * -; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst @@ -55,9 +56,9 @@ define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset: ; EG: LDS_ADD_RET * -; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0] -; CI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { %sub = sub i32 %a, %b %add = add i32 %sub, 4 @@ -69,10 +70,9 @@ define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 ad ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32: ; EG: LDS_ADD_RET * -; SI: s_mov_b32 [[SNEGONE:s[0-9]+]], -1 -; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] -; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0] -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 +; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] +; GCN: s_endpgm define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -81,10 +81,9 @@ define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_offset: ; EG: LDS_ADD_RET * -; SI: s_mov_b32 [[SNEGONE:s[0-9]+]], -1 -; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] -; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16 -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 +; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16 +; GCN: s_endpgm define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst @@ -94,9 +93,9 @@ define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_bad_si_offset: ; EG: LDS_ADD_RET * -; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0] -; CI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; CIVI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { %sub = sub i32 %a, %b %add = add i32 %sub, 4 @@ -108,8 +107,8 @@ define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 ad ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32: ; EG: LDS_SUB_RET * -; SI: ds_sub_rtn_u32 -; SI: s_endpgm +; GCN: ds_sub_rtn_u32 +; GCN: s_endpgm define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -118,8 +117,8 @@ define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset: ; EG: LDS_SUB_RET * -; SI: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst @@ -129,10 +128,9 @@ define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace ; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32: ; EG: LDS_SUB_RET * -; SI: s_mov_b32 [[SNEGONE:s[0-9]+]], -1 -; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] -; SI: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0] -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 +; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] +; GCN: s_endpgm define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -141,10 +139,9 @@ define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p ; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32_offset: ; EG: LDS_SUB_RET * -; SI: s_mov_b32 [[SNEGONE:s[0-9]+]], -1 -; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] -; SI: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16 -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 +; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16 +; GCN: s_endpgm define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst @@ -154,8 +151,8 @@ define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32: ; EG: LDS_AND_RET * -; SI: ds_and_rtn_b32 -; SI: s_endpgm +; GCN: ds_and_rtn_b32 +; GCN: s_endpgm define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -164,8 +161,8 @@ define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset: ; EG: LDS_AND_RET * -; SI: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst @@ -175,8 +172,8 @@ define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32: ; EG: LDS_OR_RET * -; SI: ds_or_rtn_b32 -; SI: s_endpgm +; GCN: ds_or_rtn_b32 +; GCN: s_endpgm define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -185,8 +182,8 @@ define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %pt ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset: ; EG: LDS_OR_RET * -; SI: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst @@ -196,8 +193,8 @@ define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace( ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32: ; EG: LDS_XOR_RET * -; SI: ds_xor_rtn_b32 -; SI: s_endpgm +; GCN: ds_xor_rtn_b32 +; GCN: s_endpgm define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -206,8 +203,8 @@ define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset: ; EG: LDS_XOR_RET * -; SI: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst @@ -225,8 +222,8 @@ define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32: ; EG: LDS_MIN_INT_RET * -; SI: ds_min_rtn_i32 -; SI: s_endpgm +; GCN: ds_min_rtn_i32 +; GCN: s_endpgm define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -235,8 +232,8 @@ define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset: ; EG: LDS_MIN_INT_RET * -; SI: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst @@ -246,8 +243,8 @@ define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32: ; EG: LDS_MAX_INT_RET * -; SI: ds_max_rtn_i32 -; SI: s_endpgm +; GCN: ds_max_rtn_i32 +; GCN: s_endpgm define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -256,8 +253,8 @@ define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset: ; EG: LDS_MAX_INT_RET * -; SI: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst @@ -267,8 +264,8 @@ define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32: ; EG: LDS_MIN_UINT_RET * -; SI: ds_min_rtn_u32 -; SI: s_endpgm +; GCN: ds_min_rtn_u32 +; GCN: s_endpgm define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -277,8 +274,8 @@ define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* % ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset: ; EG: LDS_MIN_UINT_RET * -; SI: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst @@ -288,8 +285,8 @@ define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32: ; EG: LDS_MAX_UINT_RET * -; SI: ds_max_rtn_u32 -; SI: s_endpgm +; GCN: ds_max_rtn_u32 +; GCN: s_endpgm define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -298,8 +295,8 @@ define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* % ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset: ; EG: LDS_MAX_UINT_RET * -; SI: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst @@ -308,19 +305,19 @@ define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac } ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32: -; SI: s_load_dword [[SPTR:s[0-9]+]], -; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 -; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] -; SI: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0] -; SI: s_endpgm +; GCN: s_load_dword [[SPTR:s[0-9]+]], +; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] +; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] +; GCN: s_endpgm define void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset: -; SI: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst @@ -329,19 +326,19 @@ define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { ; XXX - Is it really necessary to load 4 into VGPR? ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32: -; SI: s_load_dword [[SPTR:s[0-9]+]], -; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 -; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] -; SI: ds_add_u32 [[VPTR]], [[DATA]] [M0] -; SI: s_endpgm +; GCN: s_load_dword [[SPTR:s[0-9]+]], +; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] +; GCN: ds_add_u32 [[VPTR]], [[DATA]] +; GCN: s_endpgm define void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset: -; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst @@ -349,9 +346,9 @@ define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset -; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} [M0] -; CI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0] -; SI: s_endpgm +; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} +; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { %sub = sub i32 %a, %b %add = add i32 %sub, 4 @@ -361,20 +358,18 @@ define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 } ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32: -; SI: s_mov_b32 [[SNEGONE:s[0-9]+]], -1 -; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] -; SI: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] [M0] -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 +; GCN: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] +; GCN: s_endpgm define void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_offset: -; SI: s_mov_b32 [[SNEGONE:s[0-9]+]], -1 -; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] -; SI: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] offset:16 -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 +; GCN: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] offset:16 +; GCN: s_endpgm define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst @@ -383,8 +378,8 @@ define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_bad_si_offset: ; SI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} -; CI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; CIVI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { %sub = sub i32 %a, %b %add = add i32 %sub, 4 @@ -394,16 +389,16 @@ define void @lds_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 } ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32: -; SI: ds_sub_u32 -; SI: s_endpgm +; GCN: ds_sub_u32 +; GCN: s_endpgm define void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset: -; SI: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst @@ -411,20 +406,18 @@ define void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32: -; SI: s_mov_b32 [[SNEGONE:s[0-9]+]], -1 -; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] -; SI: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 +; GCN: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] +; GCN: s_endpgm define void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset: -; SI: s_mov_b32 [[SNEGONE:s[0-9]+]], -1 -; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] -; SI: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] offset:16 -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 +; GCN: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] offset:16 +; GCN: s_endpgm define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst @@ -432,16 +425,16 @@ define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32: -; SI: ds_and_b32 -; SI: s_endpgm +; GCN: ds_and_b32 +; GCN: s_endpgm define void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset: -; SI: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst @@ -449,16 +442,16 @@ define void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32: -; SI: ds_or_b32 -; SI: s_endpgm +; GCN: ds_or_b32 +; GCN: s_endpgm define void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset: -; SI: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst @@ -466,16 +459,16 @@ define void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32: -; SI: ds_xor_b32 -; SI: s_endpgm +; GCN: ds_xor_b32 +; GCN: s_endpgm define void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset: -; SI: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst @@ -490,16 +483,16 @@ define void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { ; } ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32: -; SI: ds_min_i32 -; SI: s_endpgm +; GCN: ds_min_i32 +; GCN: s_endpgm define void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset: -; SI: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst @@ -507,16 +500,16 @@ define void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32: -; SI: ds_max_i32 -; SI: s_endpgm +; GCN: ds_max_i32 +; GCN: s_endpgm define void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset: -; SI: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst @@ -524,16 +517,16 @@ define void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32: -; SI: ds_min_u32 -; SI: s_endpgm +; GCN: ds_min_u32 +; GCN: s_endpgm define void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset: -; SI: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst @@ -541,16 +534,16 @@ define void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32: -; SI: ds_max_u32 -; SI: s_endpgm +; GCN: ds_max_u32 +; GCN: s_endpgm define void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset: -; SI: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst diff --git a/test/CodeGen/R600/local-atomics64.ll b/test/CodeGen/R600/local-atomics64.ll index ce0cf59..50d039f 100644 --- a/test/CodeGen/R600/local-atomics64.ll +++ b/test/CodeGen/R600/local-atomics64.ll @@ -1,8 +1,9 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=VI -check-prefix=GCN %s ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64: -; SI: ds_wrxchg_rtn_b64 -; SI: s_endpgm +; GCN: ds_wrxchg_rtn_b64 +; GCN: s_endpgm define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -10,8 +11,8 @@ define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* % } ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset: -; SI: ds_wrxchg_rtn_b64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst @@ -20,8 +21,8 @@ define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspac } ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64: -; SI: ds_add_rtn_u64 -; SI: s_endpgm +; GCN: ds_add_rtn_u64 +; GCN: s_endpgm define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -29,14 +30,14 @@ define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64_offset: +; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 +; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 ; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb -; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, 9 -; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]] -; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]] -; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; SI: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0] -; SI: buffer_store_dwordx2 [[RESULT]], -; SI: s_endpgm +; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 +; GCN: buffer_store_dwordx2 [[RESULT]], +; GCN: s_endpgm define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i64 4 %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst @@ -45,12 +46,11 @@ define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace } ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64: -; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1 -; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]] -; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]] -; SI: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} -; SI: buffer_store_dwordx2 [[RESULT]], -; SI: s_endpgm +; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1 +; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1 +; GCN: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} +; GCN: buffer_store_dwordx2 [[RESULT]], +; GCN: s_endpgm define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -58,8 +58,8 @@ define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64_offset: -; SI: ds_inc_rtn_u64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_inc_rtn_u64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst @@ -68,8 +68,8 @@ define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace } ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64: -; SI: ds_sub_rtn_u64 -; SI: s_endpgm +; GCN: ds_sub_rtn_u64 +; GCN: s_endpgm define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -77,8 +77,8 @@ define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64_offset: -; SI: ds_sub_rtn_u64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_sub_rtn_u64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst @@ -87,12 +87,11 @@ define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace } ; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64: -; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1 -; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]] -; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]] -; SI: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} -; SI: buffer_store_dwordx2 [[RESULT]], -; SI: s_endpgm +; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1 +; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1 +; GCN: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} +; GCN: buffer_store_dwordx2 [[RESULT]], +; GCN: s_endpgm define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -100,8 +99,8 @@ define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64_offset: -; SI: ds_dec_rtn_u64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_dec_rtn_u64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst @@ -110,8 +109,8 @@ define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace } ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64: -; SI: ds_and_rtn_b64 -; SI: s_endpgm +; GCN: ds_and_rtn_b64 +; GCN: s_endpgm define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -119,8 +118,8 @@ define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64_offset: -; SI: ds_and_rtn_b64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_and_rtn_b64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst @@ -129,8 +128,8 @@ define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace } ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64: -; SI: ds_or_rtn_b64 -; SI: s_endpgm +; GCN: ds_or_rtn_b64 +; GCN: s_endpgm define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -138,8 +137,8 @@ define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %pt } ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64_offset: -; SI: ds_or_rtn_b64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_or_rtn_b64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst @@ -148,8 +147,8 @@ define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace( } ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64: -; SI: ds_xor_rtn_b64 -; SI: s_endpgm +; GCN: ds_xor_rtn_b64 +; GCN: s_endpgm define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -157,8 +156,8 @@ define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64_offset: -; SI: ds_xor_rtn_b64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_xor_rtn_b64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst @@ -175,8 +174,8 @@ define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace ; } ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64: -; SI: ds_min_rtn_i64 -; SI: s_endpgm +; GCN: ds_min_rtn_i64 +; GCN: s_endpgm define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -184,8 +183,8 @@ define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64_offset: -; SI: ds_min_rtn_i64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_min_rtn_i64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst @@ -194,8 +193,8 @@ define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace } ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64: -; SI: ds_max_rtn_i64 -; SI: s_endpgm +; GCN: ds_max_rtn_i64 +; GCN: s_endpgm define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -203,8 +202,8 @@ define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64_offset: -; SI: ds_max_rtn_i64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_max_rtn_i64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst @@ -213,8 +212,8 @@ define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace } ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64: -; SI: ds_min_rtn_u64 -; SI: s_endpgm +; GCN: ds_min_rtn_u64 +; GCN: s_endpgm define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -222,8 +221,8 @@ define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* % } ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64_offset: -; SI: ds_min_rtn_u64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_min_rtn_u64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst @@ -232,8 +231,8 @@ define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspac } ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64: -; SI: ds_max_rtn_u64 -; SI: s_endpgm +; GCN: ds_max_rtn_u64 +; GCN: s_endpgm define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -241,8 +240,8 @@ define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* % } ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64_offset: -; SI: ds_max_rtn_u64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_max_rtn_u64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst @@ -251,16 +250,16 @@ define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspac } ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64: -; SI: ds_wrxchg_rtn_b64 -; SI: s_endpgm +; GCN: ds_wrxchg_rtn_b64 +; GCN: s_endpgm define void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset: -; SI: ds_wrxchg_rtn_b64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst @@ -268,8 +267,8 @@ define void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64: -; SI: ds_add_u64 -; SI: s_endpgm +; GCN: ds_add_u64 +; GCN: s_endpgm define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst ret void @@ -277,12 +276,12 @@ define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind { ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64_offset: ; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 -; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, 9 -; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]] -; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]] -; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; SI: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0] -; SI: s_endpgm +; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24 +; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 +; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 +; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; GCN: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 +; GCN: s_endpgm define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i64 4 %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst @@ -290,19 +289,18 @@ define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64: -; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1 -; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]] -; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]] -; SI: ds_inc_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} -; SI: s_endpgm +; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1 +; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1 +; GCN: ds_inc_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} +; GCN: s_endpgm define void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64_offset: -; SI: ds_inc_u64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_inc_u64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst @@ -310,16 +308,16 @@ define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64: -; SI: ds_sub_u64 -; SI: s_endpgm +; GCN: ds_sub_u64 +; GCN: s_endpgm define void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64_offset: -; SI: ds_sub_u64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_sub_u64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst @@ -327,19 +325,18 @@ define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64: -; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1 -; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]] -; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]] -; SI: ds_dec_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} -; SI: s_endpgm +; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1 +; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1 +; GCN: ds_dec_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} +; GCN: s_endpgm define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64_offset: -; SI: ds_dec_u64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_dec_u64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst @@ -347,16 +344,16 @@ define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64: -; SI: ds_and_b64 -; SI: s_endpgm +; GCN: ds_and_b64 +; GCN: s_endpgm define void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64_offset: -; SI: ds_and_b64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_and_b64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst @@ -364,16 +361,16 @@ define void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64: -; SI: ds_or_b64 -; SI: s_endpgm +; GCN: ds_or_b64 +; GCN: s_endpgm define void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64_offset: -; SI: ds_or_b64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_or_b64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst @@ -381,16 +378,16 @@ define void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64: -; SI: ds_xor_b64 -; SI: s_endpgm +; GCN: ds_xor_b64 +; GCN: s_endpgm define void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64_offset: -; SI: ds_xor_b64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_xor_b64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst @@ -405,16 +402,16 @@ define void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { ; } ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64: -; SI: ds_min_i64 -; SI: s_endpgm +; GCN: ds_min_i64 +; GCN: s_endpgm define void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64_offset: -; SI: ds_min_i64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_min_i64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst @@ -422,16 +419,16 @@ define void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64: -; SI: ds_max_i64 -; SI: s_endpgm +; GCN: ds_max_i64 +; GCN: s_endpgm define void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64_offset: -; SI: ds_max_i64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_max_i64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst @@ -439,16 +436,16 @@ define void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64: -; SI: ds_min_u64 -; SI: s_endpgm +; GCN: ds_min_u64 +; GCN: s_endpgm define void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64_offset: -; SI: ds_min_u64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_min_u64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst @@ -456,16 +453,16 @@ define void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64: -; SI: ds_max_u64 -; SI: s_endpgm +; GCN: ds_max_u64 +; GCN: s_endpgm define void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64_offset: -; SI: ds_max_u64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_max_u64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst diff --git a/test/CodeGen/R600/local-memory-two-objects.ll b/test/CodeGen/R600/local-memory-two-objects.ll index 88ef05d..3d90ab1 100644 --- a/test/CodeGen/R600/local-memory-two-objects.ll +++ b/test/CodeGen/R600/local-memory-two-objects.ll @@ -1,38 +1,38 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s -; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=SI %s -; RUN: llc < %s -march=r600 -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=CI %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s +; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=SI %s +; RUN: llc < %s -march=amdgcn -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=CI %s @local_memory_two_objects.local_mem0 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4 @local_memory_two_objects.local_mem1 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4 -; EG-CHECK: {{^}}local_memory_two_objects: +; EG: {{^}}local_memory_two_objects: ; Check that the LDS size emitted correctly -; EG-CHECK: .long 166120 -; EG-CHECK-NEXT: .long 8 -; SI-CHECK: .long 47180 -; SI-CHECK-NEXT: .long 32768 +; EG: .long 166120 +; EG-NEXT: .long 8 +; GCN: .long 47180 +; GCN-NEXT: .long 38792 ; We would like to check the the lds writes are using different ; addresses, but due to variations in the scheduler, we can't do ; this consistently on evergreen GPUs. -; EG-CHECK: LDS_WRITE -; EG-CHECK: LDS_WRITE -; SI-CHECK: ds_write_b32 {{v[0-9]*}}, v[[ADDRW:[0-9]*]] -; SI-CHECK-NOT: ds_write_b32 {{v[0-9]*}}, v[[ADDRW]] +; EG: LDS_WRITE +; EG: LDS_WRITE +; GCN: ds_write_b32 {{v[0-9]*}}, v[[ADDRW:[0-9]*]] +; GCN-NOT: ds_write_b32 {{v[0-9]*}}, v[[ADDRW]] ; GROUP_BARRIER must be the last instruction in a clause -; EG-CHECK: GROUP_BARRIER -; EG-CHECK-NEXT: ALU clause +; EG: GROUP_BARRIER +; EG-NEXT: ALU clause ; Make sure the lds reads are using different addresses, at different ; constant offsets. -; EG-CHECK: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]] -; EG-CHECK-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]] +; EG: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]] +; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]] ; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}} -; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]] [M0] -; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16 [M0] -; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]] [M0] +; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]] +; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] +; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]] offset:16 define void @local_memory_two_objects(i32 addrspace(1)* %out) { entry: diff --git a/test/CodeGen/R600/local-memory.ll b/test/CodeGen/R600/local-memory.ll index 9b13cb2..68e72c5 100644 --- a/test/CodeGen/R600/local-memory.ll +++ b/test/CodeGen/R600/local-memory.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s @local_memory.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4 @@ -10,9 +10,9 @@ ; EG: .long 166120 ; EG-NEXT: .long 128 ; SI: .long 47180 -; SI-NEXT: .long 65536 +; SI-NEXT: .long 71560 ; CI: .long 47180 -; CI-NEXT: .long 32768 +; CI-NEXT: .long 38792 ; EG: LDS_WRITE ; SI-NOT: s_wqm_b64 diff --git a/test/CodeGen/R600/loop-address.ll b/test/CodeGen/R600/loop-address.ll index b46d8e9..03e0f01 100644 --- a/test/CodeGen/R600/loop-address.ll +++ b/test/CodeGen/R600/loop-address.ll @@ -31,7 +31,7 @@ attributes #0 = { nounwind "fp-contract-model"="standard" "relocation-model"="pi !opencl.kernels = !{!0, !1, !2, !3} -!0 = metadata !{void (i32 addrspace(1)*, i32)* @loop_ge} -!1 = metadata !{null} -!2 = metadata !{null} -!3 = metadata !{null} +!0 = !{void (i32 addrspace(1)*, i32)* @loop_ge} +!1 = !{null} +!2 = !{null} +!3 = !{null} diff --git a/test/CodeGen/R600/loop-idiom.ll b/test/CodeGen/R600/loop-idiom.ll index 0478bdb..a0b00ab 100644 --- a/test/CodeGen/R600/loop-idiom.ll +++ b/test/CodeGen/R600/loop-idiom.ll @@ -1,5 +1,6 @@ ; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s -; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: opt -basicaa -loop-idiom -S < %s -march=amdgcn -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: opt -basicaa -loop-idiom -S < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "r600--" diff --git a/test/CodeGen/R600/lshl.ll b/test/CodeGen/R600/lshl.ll index 9785866..9ac988d 100644 --- a/test/CodeGen/R600/lshl.ll +++ b/test/CodeGen/R600/lshl.ll @@ -1,4 +1,5 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK: s_lshl_b32 s{{[0-9]}}, s{{[0-9]}}, 1 diff --git a/test/CodeGen/R600/lshr.ll b/test/CodeGen/R600/lshr.ll index acfc1fd..50e444a 100644 --- a/test/CodeGen/R600/lshr.ll +++ b/test/CodeGen/R600/lshr.ll @@ -1,4 +1,5 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK: s_lshr_b32 s{{[0-9]}}, s{{[0-9]}}, 1 diff --git a/test/CodeGen/R600/m0-spill.ll b/test/CodeGen/R600/m0-spill.ll index a8b0e0d..4dade82 100644 --- a/test/CodeGen/R600/m0-spill.ll +++ b/test/CodeGen/R600/m0-spill.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s @lds = external addrspace(3) global [64 x float] diff --git a/test/CodeGen/R600/mad-combine.ll b/test/CodeGen/R600/mad-combine.ll new file mode 100644 index 0000000..8c4e09b --- /dev/null +++ b/test/CodeGen/R600/mad-combine.ll @@ -0,0 +1,567 @@ +; Make sure we still form mad even when unsafe math or fp-contract is allowed instead of fma. + +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s + +; Make sure we don't form mad with denormals +; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s + +declare i32 @llvm.r600.read.tidig.x() #0 +declare float @llvm.fabs.f32(float) #0 +declare float @llvm.fma.f32(float, float, float) #0 +declare float @llvm.fmuladd.f32(float, float, float) #0 + +; (fadd (fmul x, y), z) -> (fma x, y, z) +; FUNC-LABEL: {{^}}combine_to_mad_f32_0: +; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} + +; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]] + +; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]] + +; SI-DENORM-SLOWFMAF-NOT: v_fma +; SI-DENORM-SLOWFMAF-NOT: v_mad + +; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]] +; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]] + +; SI: buffer_store_dword [[RESULT]] +define void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 + %gep.out = getelementptr float addrspace(1)* %out, i32 %tid + + %a = load float addrspace(1)* %gep.0 + %b = load float addrspace(1)* %gep.1 + %c = load float addrspace(1)* %gep.2 + + %mul = fmul float %a, %b + %fma = fadd float %mul, %c + store float %fma, float addrspace(1)* %gep.out + ret void +} + +; (fadd (fmul x, y), z) -> (fma x, y, z) +; FUNC-LABEL: {{^}}combine_to_mad_f32_0_2use: +; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}} + +; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]] +; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]] + +; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]] +; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]] + +; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]] +; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]] +; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]] + +; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; SI: s_endpgm +define void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3 + %gep.out.0 = getelementptr float addrspace(1)* %out, i32 %tid + %gep.out.1 = getelementptr float addrspace(1)* %gep.out.0, i32 1 + + %a = load float addrspace(1)* %gep.0 + %b = load float addrspace(1)* %gep.1 + %c = load float addrspace(1)* %gep.2 + %d = load float addrspace(1)* %gep.3 + + %mul = fmul float %a, %b + %fma0 = fadd float %mul, %c + %fma1 = fadd float %mul, %d + + store float %fma0, float addrspace(1)* %gep.out.0 + store float %fma1, float addrspace(1)* %gep.out.1 + ret void +} + +; (fadd x, (fmul y, z)) -> (fma y, z, x) +; FUNC-LABEL: {{^}}combine_to_mad_f32_1: +; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} + +; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]] +; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]] + +; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]] +; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]] + +; SI: buffer_store_dword [[RESULT]] +define void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 + %gep.out = getelementptr float addrspace(1)* %out, i32 %tid + + %a = load float addrspace(1)* %gep.0 + %b = load float addrspace(1)* %gep.1 + %c = load float addrspace(1)* %gep.2 + + %mul = fmul float %a, %b + %fma = fadd float %c, %mul + store float %fma, float addrspace(1)* %gep.out + ret void +} + +; (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) +; FUNC-LABEL: {{^}}combine_to_mad_fsub_0_f32: +; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} + +; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]] +; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]] + +; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]] +; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]] + +; SI: buffer_store_dword [[RESULT]] +define void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 + %gep.out = getelementptr float addrspace(1)* %out, i32 %tid + + %a = load float addrspace(1)* %gep.0 + %b = load float addrspace(1)* %gep.1 + %c = load float addrspace(1)* %gep.2 + + %mul = fmul float %a, %b + %fma = fsub float %mul, %c + store float %fma, float addrspace(1)* %gep.out + ret void +} + +; (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) +; FUNC-LABEL: {{^}}combine_to_mad_fsub_0_f32_2use: +; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}} + +; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]] +; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]] + +; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]] +; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]] + +; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]] +; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]] +; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]] + +; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; SI: s_endpgm +define void @combine_to_mad_fsub_0_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3 + %gep.out.0 = getelementptr float addrspace(1)* %out, i32 %tid + %gep.out.1 = getelementptr float addrspace(1)* %gep.out.0, i32 1 + + %a = load float addrspace(1)* %gep.0 + %b = load float addrspace(1)* %gep.1 + %c = load float addrspace(1)* %gep.2 + %d = load float addrspace(1)* %gep.3 + + %mul = fmul float %a, %b + %fma0 = fsub float %mul, %c + %fma1 = fsub float %mul, %d + store float %fma0, float addrspace(1)* %gep.out.0 + store float %fma1, float addrspace(1)* %gep.out.1 + ret void +} + +; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) +; FUNC-LABEL: {{^}}combine_to_mad_fsub_1_f32: +; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} + +; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]] +; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]] + +; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]] +; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]] + +; SI: buffer_store_dword [[RESULT]] +define void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 + %gep.out = getelementptr float addrspace(1)* %out, i32 %tid + + %a = load float addrspace(1)* %gep.0 + %b = load float addrspace(1)* %gep.1 + %c = load float addrspace(1)* %gep.2 + + %mul = fmul float %a, %b + %fma = fsub float %c, %mul + store float %fma, float addrspace(1)* %gep.out + ret void +} + +; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) +; FUNC-LABEL: {{^}}combine_to_mad_fsub_1_f32_2use: +; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} + +; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]] +; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]] + +; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]] +; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]] + +; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]] +; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[TMP]], [[C]] +; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[TMP]], [[D]] + +; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; SI: s_endpgm +define void @combine_to_mad_fsub_1_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3 + %gep.out.0 = getelementptr float addrspace(1)* %out, i32 %tid + %gep.out.1 = getelementptr float addrspace(1)* %gep.out.0, i32 1 + + %a = load float addrspace(1)* %gep.0 + %b = load float addrspace(1)* %gep.1 + %c = load float addrspace(1)* %gep.2 + %d = load float addrspace(1)* %gep.3 + + %mul = fmul float %a, %b + %fma0 = fsub float %c, %mul + %fma1 = fsub float %d, %mul + store float %fma0, float addrspace(1)* %gep.out.0 + store float %fma1, float addrspace(1)* %gep.out.1 + ret void +} + +; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z)) +; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32: +; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} + +; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], -[[C]] + +; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], -[[C]] + +; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]] +; SI-DENORM-SLOWFMAF: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[TMP]], [[C]] + +; SI: buffer_store_dword [[RESULT]] +define void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 + %gep.out = getelementptr float addrspace(1)* %out, i32 %tid + + %a = load float addrspace(1)* %gep.0 + %b = load float addrspace(1)* %gep.1 + %c = load float addrspace(1)* %gep.2 + + %mul = fmul float %a, %b + %mul.neg = fsub float -0.0, %mul + %fma = fsub float %mul.neg, %c + + store float %fma, float addrspace(1)* %gep.out + ret void +} + +; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z)) +; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32_2uses_neg: +; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} + +; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]] +; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], -[[D]] + +; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]] +; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], -[[D]] + +; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]] +; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT0:v[0-9]+]], -[[TMP]], [[C]] +; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT1:v[0-9]+]], -[[TMP]], [[D]] + +; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; SI: s_endpgm +define void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3 + %gep.out.0 = getelementptr float addrspace(1)* %out, i32 %tid + %gep.out.1 = getelementptr float addrspace(1)* %gep.out.0, i32 1 + + %a = load float addrspace(1)* %gep.0 + %b = load float addrspace(1)* %gep.1 + %c = load float addrspace(1)* %gep.2 + %d = load float addrspace(1)* %gep.3 + + %mul = fmul float %a, %b + %mul.neg = fsub float -0.0, %mul + %fma0 = fsub float %mul.neg, %c + %fma1 = fsub float %mul.neg, %d + + store float %fma0, float addrspace(1)* %gep.out.0 + store float %fma1, float addrspace(1)* %gep.out.1 + ret void +} + +; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z)) +; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32_2uses_mul: +; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} + +; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]] +; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]] + +; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]] +; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]] + +; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]] +; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT0:v[0-9]+]], -[[TMP]], [[C]] +; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]] + +; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; SI: s_endpgm +define void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3 + %gep.out.0 = getelementptr float addrspace(1)* %out, i32 %tid + %gep.out.1 = getelementptr float addrspace(1)* %gep.out.0, i32 1 + + %a = load float addrspace(1)* %gep.0 + %b = load float addrspace(1)* %gep.1 + %c = load float addrspace(1)* %gep.2 + %d = load float addrspace(1)* %gep.3 + + %mul = fmul float %a, %b + %mul.neg = fsub float -0.0, %mul + %fma0 = fsub float %mul.neg, %c + %fma1 = fsub float %mul, %d + + store float %fma0, float addrspace(1)* %gep.out.0 + store float %fma1, float addrspace(1)* %gep.out.1 + ret void +} + +; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z))) + +; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_0_f32: +; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}} +; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} + +; SI-STD: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]] +; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]] +; SI-STD: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP1]] + +; SI-DENORM: v_fma_f32 [[TMP0:v[0-9]+]], [[D]], [[E]], -[[C]] +; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP0]] + +; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]] +; SI-DENORM-SLOWFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]] +; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[C]], [[TMP1]] + +; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +define void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3 + %gep.4 = getelementptr float addrspace(1)* %gep.0, i32 4 + %gep.out = getelementptr float addrspace(1)* %out, i32 %tid + + %x = load float addrspace(1)* %gep.0 + %y = load float addrspace(1)* %gep.1 + %z = load float addrspace(1)* %gep.2 + %u = load float addrspace(1)* %gep.3 + %v = load float addrspace(1)* %gep.4 + + %tmp0 = fmul float %u, %v + %tmp1 = call float @llvm.fma.f32(float %x, float %y, float %tmp0) #0 + %tmp2 = fsub float %tmp1, %z + + store float %tmp2, float addrspace(1)* %gep.out + ret void +} + +; fold (fsub x, (fma y, z, (fmul u, v))) +; -> (fma (fneg y), z, (fma (fneg u), v, x)) + +; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_1_f32: +; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}} +; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} + +; SI-STD: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]] +; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]] +; SI-STD: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]] + +; SI-DENORM: v_fma_f32 [[TMP0:v[0-9]+]], -[[D]], [[E]], [[A]] +; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP0]] + +; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]] +; SI-DENORM-SLOWFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]] +; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]] + +; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI: s_endpgm +define void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3 + %gep.4 = getelementptr float addrspace(1)* %gep.0, i32 4 + %gep.out = getelementptr float addrspace(1)* %out, i32 %tid + + %x = load float addrspace(1)* %gep.0 + %y = load float addrspace(1)* %gep.1 + %z = load float addrspace(1)* %gep.2 + %u = load float addrspace(1)* %gep.3 + %v = load float addrspace(1)* %gep.4 + + %tmp0 = fmul float %u, %v + %tmp1 = call float @llvm.fma.f32(float %y, float %z, float %tmp0) #0 + %tmp2 = fsub float %x, %tmp1 + + store float %tmp2, float addrspace(1)* %gep.out + ret void +} + +; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z))) + +; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_2_f32: +; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}} +; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} + +; SI-STD: v_mad_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]] +; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP]] + +; SI-DENORM: v_fma_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]] +; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP]] + +; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]] +; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[B]], [[A]] +; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP0]], [[TMP1]] +; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP2]] + +; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI: s_endpgm +define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3 + %gep.4 = getelementptr float addrspace(1)* %gep.0, i32 4 + %gep.out = getelementptr float addrspace(1)* %out, i32 %tid + + %x = load float addrspace(1)* %gep.0 + %y = load float addrspace(1)* %gep.1 + %z = load float addrspace(1)* %gep.2 + %u = load float addrspace(1)* %gep.3 + %v = load float addrspace(1)* %gep.4 + + %tmp0 = fmul float %u, %v + %tmp1 = call float @llvm.fmuladd.f32(float %x, float %y, float %tmp0) #0 + %tmp2 = fsub float %tmp1, %z + + store float %tmp2, float addrspace(1)* %gep.out + ret void +} + +; fold (fsub x, (fmuladd y, z, (fmul u, v))) +; -> (fmuladd (fneg y), z, (fmuladd (fneg u), v, x)) + +; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_3_f32: +; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} +; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}} +; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} + +; SI-STD: v_mad_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]] +; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]] + +; SI-DENORM: v_fma_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]] +; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]] + +; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]] +; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[C]], [[B]] +; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP0]], [[TMP1]] +; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP2]], [[A]] + +; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI: s_endpgm +define void @aggressive_combine_to_mad_fsub_3_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { + %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 + %gep.3 = getelementptr float addrspace(1)* %gep.0, i32 3 + %gep.4 = getelementptr float addrspace(1)* %gep.0, i32 4 + %gep.out = getelementptr float addrspace(1)* %out, i32 %tid + + %x = load float addrspace(1)* %gep.0 + %y = load float addrspace(1)* %gep.1 + %z = load float addrspace(1)* %gep.2 + %u = load float addrspace(1)* %gep.3 + %v = load float addrspace(1)* %gep.4 + + %tmp0 = fmul float %u, %v + %tmp1 = call float @llvm.fmuladd.f32(float %y, float %z, float %tmp0) #0 + %tmp2 = fsub float %x, %tmp1 + + store float %tmp2, float addrspace(1)* %gep.out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } diff --git a/test/CodeGen/R600/mad-sub.ll b/test/CodeGen/R600/mad-sub.ll index 240abd0..7b4020d 100644 --- a/test/CodeGen/R600/mad-sub.ll +++ b/test/CodeGen/R600/mad-sub.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() #0 declare float @llvm.fabs.f32(float) #0 @@ -171,7 +171,7 @@ define void @mad_fabs_sub_f32(float addrspace(1)* noalias nocapture %out, float ; FUNC-LABEL: {{^}}fsub_c_fadd_a_a: ; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_mad_f32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]] ; SI: buffer_store_dword [[RESULT]] define void @fsub_c_fadd_a_a(float addrspace(1)* %out, float addrspace(1)* %in) { @@ -192,7 +192,7 @@ define void @fsub_c_fadd_a_a(float addrspace(1)* %out, float addrspace(1)* %in) ; FUNC-LABEL: {{^}}fsub_fadd_a_a_c: ; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4 +; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]] ; SI: buffer_store_dword [[RESULT]] define void @fsub_fadd_a_a_c(float addrspace(1)* %out, float addrspace(1)* %in) { diff --git a/test/CodeGen/R600/mad_int24.ll b/test/CodeGen/R600/mad_int24.ll index c8dd377..86d75a6 100644 --- a/test/CodeGen/R600/mad_int24.ll +++ b/test/CodeGen/R600/mad_int24.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC declare i32 @llvm.AMDGPU.imul24(i32, i32) nounwind readnone diff --git a/test/CodeGen/R600/mad_uint24.ll b/test/CodeGen/R600/mad_uint24.ll index b7b32fe..95fe341 100644 --- a/test/CodeGen/R600/mad_uint24.ll +++ b/test/CodeGen/R600/mad_uint24.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC ; FUNC-LABEL: {{^}}u32_mad24: ; EG: MULADD_UINT24 diff --git a/test/CodeGen/R600/madak.ll b/test/CodeGen/R600/madak.ll new file mode 100644 index 0000000..505a49b --- /dev/null +++ b/test/CodeGen/R600/madak.ll @@ -0,0 +1,193 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; XUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s + +; FIXME: Enable VI + +declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare float @llvm.fabs.f32(float) nounwind readnone + +; GCN-LABEL: {{^}}madak_f32: +; GCN: buffer_load_dword [[VA:v[0-9]+]] +; GCN: buffer_load_dword [[VB:v[0-9]+]] +; GCN: v_madak_f32 {{v[0-9]+}}, [[VB]], [[VA]], 0x41200000 +define void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind { + %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %in.a.gep = getelementptr float addrspace(1)* %in.a, i32 %tid + %in.b.gep = getelementptr float addrspace(1)* %in.b, i32 %tid + %out.gep = getelementptr float addrspace(1)* %out, i32 %tid + + %a = load float addrspace(1)* %in.a.gep, align 4 + %b = load float addrspace(1)* %in.b.gep, align 4 + + %mul = fmul float %a, %b + %madak = fadd float %mul, 10.0 + store float %madak, float addrspace(1)* %out.gep, align 4 + ret void +} + +; Make sure this is only folded with one use. This is a code size +; optimization and if we fold the immediate multiple times, we'll undo +; it. + +; GCN-LABEL: {{^}}madak_2_use_f32: +; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 +; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 +; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], [[VK]] +; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VC]], [[VK]] +; GCN: s_endpgm +define void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { + %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + + %in.gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1 + %in.gep.2 = getelementptr float addrspace(1)* %in.gep.0, i32 2 + + %out.gep.0 = getelementptr float addrspace(1)* %out, i32 %tid + %out.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1 + + %a = load float addrspace(1)* %in.gep.0, align 4 + %b = load float addrspace(1)* %in.gep.1, align 4 + %c = load float addrspace(1)* %in.gep.2, align 4 + + %mul0 = fmul float %a, %b + %mul1 = fmul float %a, %c + %madak0 = fadd float %mul0, 10.0 + %madak1 = fadd float %mul1, 10.0 + + store float %madak0, float addrspace(1)* %out.gep.0, align 4 + store float %madak1, float addrspace(1)* %out.gep.1, align 4 + ret void +} + +; GCN-LABEL: {{^}}madak_m_inline_imm_f32: +; GCN: buffer_load_dword [[VA:v[0-9]+]] +; GCN: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000 +define void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind { + %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %in.a.gep = getelementptr float addrspace(1)* %in.a, i32 %tid + %out.gep = getelementptr float addrspace(1)* %out, i32 %tid + + %a = load float addrspace(1)* %in.a.gep, align 4 + + %mul = fmul float 4.0, %a + %madak = fadd float %mul, 10.0 + store float %madak, float addrspace(1)* %out.gep, align 4 + ret void +} + +; Make sure nothing weird happens with a value that is also allowed as +; an inline immediate. + +; GCN-LABEL: {{^}}madak_inline_imm_f32: +; GCN: buffer_load_dword [[VA:v[0-9]+]] +; GCN: buffer_load_dword [[VB:v[0-9]+]] +; GCN: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0 +define void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind { + %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %in.a.gep = getelementptr float addrspace(1)* %in.a, i32 %tid + %in.b.gep = getelementptr float addrspace(1)* %in.b, i32 %tid + %out.gep = getelementptr float addrspace(1)* %out, i32 %tid + + %a = load float addrspace(1)* %in.a.gep, align 4 + %b = load float addrspace(1)* %in.b.gep, align 4 + + %mul = fmul float %a, %b + %madak = fadd float %mul, 4.0 + store float %madak, float addrspace(1)* %out.gep, align 4 + ret void +} + +; We can't use an SGPR when forming madak +; GCN-LABEL: {{^}}s_v_madak_f32: +; GCN: s_load_dword [[SB:s[0-9]+]] +; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 +; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]] +; GCN-NOT: v_madak_f32 +; GCN: v_mad_f32 {{v[0-9]+}}, [[SB]], [[VA]], [[VK]] +define void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) nounwind { + %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %in.a.gep = getelementptr float addrspace(1)* %in.a, i32 %tid + %out.gep = getelementptr float addrspace(1)* %out, i32 %tid + + %a = load float addrspace(1)* %in.a.gep, align 4 + + %mul = fmul float %a, %b + %madak = fadd float %mul, 10.0 + store float %madak, float addrspace(1)* %out.gep, align 4 + ret void +} + +; GCN-LABEL: @v_s_madak_f32 +; GCN-DAG: s_load_dword [[SB:s[0-9]+]] +; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 +; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]] +; GCN-NOT: v_madak_f32 +; GCN: v_mad_f32 {{v[0-9]+}}, [[VA]], [[SB]], [[VK]] +define void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) nounwind { + %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %in.b.gep = getelementptr float addrspace(1)* %in.b, i32 %tid + %out.gep = getelementptr float addrspace(1)* %out, i32 %tid + + %b = load float addrspace(1)* %in.b.gep, align 4 + + %mul = fmul float %a, %b + %madak = fadd float %mul, 10.0 + store float %madak, float addrspace(1)* %out.gep, align 4 + ret void +} + +; GCN-LABEL: {{^}}s_s_madak_f32: +; GCN-NOT: v_madak_f32 +; GCN: v_mad_f32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} +define void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) nounwind { + %mul = fmul float %a, %b + %madak = fadd float %mul, 10.0 + store float %madak, float addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}no_madak_src0_modifier_f32: +; GCN: buffer_load_dword [[VA:v[0-9]+]] +; GCN: buffer_load_dword [[VB:v[0-9]+]] +; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}} +; GCN: s_endpgm +define void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind { + %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %in.a.gep = getelementptr float addrspace(1)* %in.a, i32 %tid + %in.b.gep = getelementptr float addrspace(1)* %in.b, i32 %tid + %out.gep = getelementptr float addrspace(1)* %out, i32 %tid + + %a = load float addrspace(1)* %in.a.gep, align 4 + %b = load float addrspace(1)* %in.b.gep, align 4 + + %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone + + %mul = fmul float %a.fabs, %b + %madak = fadd float %mul, 10.0 + store float %madak, float addrspace(1)* %out.gep, align 4 + ret void +} + +; GCN-LABEL: {{^}}no_madak_src1_modifier_f32: +; GCN: buffer_load_dword [[VA:v[0-9]+]] +; GCN: buffer_load_dword [[VB:v[0-9]+]] +; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}} +; GCN: s_endpgm +define void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind { + %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %in.a.gep = getelementptr float addrspace(1)* %in.a, i32 %tid + %in.b.gep = getelementptr float addrspace(1)* %in.b, i32 %tid + %out.gep = getelementptr float addrspace(1)* %out, i32 %tid + + %a = load float addrspace(1)* %in.a.gep, align 4 + %b = load float addrspace(1)* %in.b.gep, align 4 + + %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone + + %mul = fmul float %a, %b.fabs + %madak = fadd float %mul, 10.0 + store float %madak, float addrspace(1)* %out.gep, align 4 + ret void +} diff --git a/test/CodeGen/R600/madmk.ll b/test/CodeGen/R600/madmk.ll new file mode 100644 index 0000000..249e48e --- /dev/null +++ b/test/CodeGen/R600/madmk.ll @@ -0,0 +1,181 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; XUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare float @llvm.fabs.f32(float) nounwind readnone + +; GCN-LABEL: {{^}}madmk_f32: +; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; GCN: v_madmk_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000 +define void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { + %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + %out.gep = getelementptr float addrspace(1)* %out, i32 %tid + + %a = load float addrspace(1)* %gep.0, align 4 + %b = load float addrspace(1)* %gep.1, align 4 + + %mul = fmul float %a, 10.0 + %madmk = fadd float %mul, %b + store float %madmk, float addrspace(1)* %out.gep, align 4 + ret void +} + +; GCN-LABEL: {{^}}madmk_2_use_f32: +; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 +; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 +; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VK]], [[VB]] +; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VK]], [[VC]] +; GCN: s_endpgm +define void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { + %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + + %in.gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1 + %in.gep.2 = getelementptr float addrspace(1)* %in.gep.0, i32 2 + + %out.gep.0 = getelementptr float addrspace(1)* %out, i32 %tid + %out.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1 + + %a = load float addrspace(1)* %in.gep.0, align 4 + %b = load float addrspace(1)* %in.gep.1, align 4 + %c = load float addrspace(1)* %in.gep.2, align 4 + + %mul0 = fmul float %a, 10.0 + %mul1 = fmul float %a, 10.0 + %madmk0 = fadd float %mul0, %b + %madmk1 = fadd float %mul1, %c + + store float %madmk0, float addrspace(1)* %out.gep.0, align 4 + store float %madmk1, float addrspace(1)* %out.gep.1, align 4 + ret void +} + +; We don't get any benefit if the constant is an inline immediate. +; GCN-LABEL: {{^}}madmk_inline_imm_f32: +; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; GCN: v_mad_f32 {{v[0-9]+}}, 4.0, [[VA]], [[VB]] +define void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { + %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + %out.gep = getelementptr float addrspace(1)* %out, i32 %tid + + %a = load float addrspace(1)* %gep.0, align 4 + %b = load float addrspace(1)* %gep.1, align 4 + + %mul = fmul float %a, 4.0 + %madmk = fadd float %mul, %b + store float %madmk, float addrspace(1)* %out.gep, align 4 + ret void +} + +; GCN-LABEL: {{^}}s_s_madmk_f32: +; GCN-NOT: v_madmk_f32 +; GCN: v_mad_f32 +; GCN: s_endpgm +define void @s_s_madmk_f32(float addrspace(1)* noalias %out, float %a, float %b) nounwind { + %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %out.gep = getelementptr float addrspace(1)* %out, i32 %tid + + %mul = fmul float %a, 10.0 + %madmk = fadd float %mul, %b + store float %madmk, float addrspace(1)* %out.gep, align 4 + ret void +} + +; GCN-LABEL: {{^}}v_s_madmk_f32: +; GCN-NOT: v_madmk_f32 +; GCN: v_mad_f32 +; GCN: s_endpgm +define void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %b) nounwind { + %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %out.gep = getelementptr float addrspace(1)* %out, i32 %tid + %a = load float addrspace(1)* %gep.0, align 4 + + %mul = fmul float %a, 10.0 + %madmk = fadd float %mul, %b + store float %madmk, float addrspace(1)* %out.gep, align 4 + ret void +} + +; GCN-LABEL: {{^}}scalar_vector_madmk_f32: +; GCN-NOT: v_madmk_f32 +; GCN: v_mad_f32 +; GCN: s_endpgm +define void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %a) nounwind { + %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %out.gep = getelementptr float addrspace(1)* %out, i32 %tid + %b = load float addrspace(1)* %gep.0, align 4 + + %mul = fmul float %a, 10.0 + %madmk = fadd float %mul, %b + store float %madmk, float addrspace(1)* %out.gep, align 4 + ret void +} + +; GCN-LABEL: {{^}}no_madmk_src0_modifier_f32: +; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}} +define void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { + %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + %out.gep = getelementptr float addrspace(1)* %out, i32 %tid + + %a = load float addrspace(1)* %gep.0, align 4 + %b = load float addrspace(1)* %gep.1, align 4 + + %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone + + %mul = fmul float %a.fabs, 10.0 + %madmk = fadd float %mul, %b + store float %madmk, float addrspace(1)* %out.gep, align 4 + ret void +} + +; GCN-LABEL: {{^}}no_madmk_src2_modifier_f32: +; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, |{{[sv][0-9]+}}| +define void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { + %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + %out.gep = getelementptr float addrspace(1)* %out, i32 %tid + + %a = load float addrspace(1)* %gep.0, align 4 + %b = load float addrspace(1)* %gep.1, align 4 + + %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone + + %mul = fmul float %a, 10.0 + %madmk = fadd float %mul, %b.fabs + store float %madmk, float addrspace(1)* %out.gep, align 4 + ret void +} + +; GCN-LABEL: {{^}}madmk_add_inline_imm_f32: +; GCN: buffer_load_dword [[A:v[0-9]+]] +; GCN: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 +; GCN: v_mad_f32 {{v[0-9]+}}, [[VK]], [[A]], 2.0 +define void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { + %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %out.gep = getelementptr float addrspace(1)* %out, i32 %tid + + %a = load float addrspace(1)* %gep.0, align 4 + + %mul = fmul float %a, 10.0 + %madmk = fadd float %mul, 2.0 + store float %madmk, float addrspace(1)* %out.gep, align 4 + ret void +} diff --git a/test/CodeGen/R600/max.ll b/test/CodeGen/R600/max.ll index d67ef47..20af993 100644 --- a/test/CodeGen/R600/max.ll +++ b/test/CodeGen/R600/max.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() nounwind readnone diff --git a/test/CodeGen/R600/max3.ll b/test/CodeGen/R600/max3.ll index 74b08f6..f905e17 100644 --- a/test/CodeGen/R600/max3.ll +++ b/test/CodeGen/R600/max3.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() nounwind readnone diff --git a/test/CodeGen/R600/min.ll b/test/CodeGen/R600/min.ll index 88c0dff..00ba5c6 100644 --- a/test/CodeGen/R600/min.ll +++ b/test/CodeGen/R600/min.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() nounwind readnone @@ -97,3 +97,24 @@ define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin store i32 %val, i32 addrspace(1)* %out, align 4 ret void } + +; FUNC-LABEL: @v_test_umin_ult_i32_multi_use +; SI-NOT: v_min +; SI: v_cmp_lt_u32 +; SI-NEXT: v_cndmask_b32 +; SI-NOT: v_min +; SI: s_endpgm +define void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid + %outgep0 = getelementptr i32 addrspace(1)* %out0, i32 %tid + %outgep1 = getelementptr i1 addrspace(1)* %out1, i32 %tid + %a = load i32 addrspace(1)* %gep0, align 4 + %b = load i32 addrspace(1)* %gep1, align 4 + %cmp = icmp ult i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %outgep0, align 4 + store i1 %cmp, i1 addrspace(1)* %outgep1 + ret void +} diff --git a/test/CodeGen/R600/min3.ll b/test/CodeGen/R600/min3.ll index f852cff..6c11a65 100644 --- a/test/CodeGen/R600/min3.ll +++ b/test/CodeGen/R600/min3.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() nounwind readnone diff --git a/test/CodeGen/R600/missing-store.ll b/test/CodeGen/R600/missing-store.ll index 5346046..8ddef35 100644 --- a/test/CodeGen/R600/missing-store.ll +++ b/test/CodeGen/R600/missing-store.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s @ptr_load = addrspace(3) global i32 addrspace(2)* undef, align 8 diff --git a/test/CodeGen/R600/mubuf.ll b/test/CodeGen/R600/mubuf.ll index c2efda4..988e5c1 100644 --- a/test/CodeGen/R600/mubuf.ll +++ b/test/CodeGen/R600/mubuf.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s declare i32 @llvm.r600.read.tidig.x() readnone @@ -8,7 +8,7 @@ declare i32 @llvm.r600.read.tidig.x() readnone ; MUBUF load with an immediate byte offset that fits into 12-bits ; CHECK-LABEL: {{^}}mubuf_load0: -; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:0x4 ; encoding: [0x04,0x00,0x30,0xe0 +; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x30,0xe0 define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: %0 = getelementptr i32 addrspace(1)* %in, i64 1 @@ -19,7 +19,7 @@ entry: ; MUBUF load with the largest possible immediate offset ; CHECK-LABEL: {{^}}mubuf_load1: -; CHECK: buffer_load_ubyte v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:0xfff ; encoding: [0xff,0x0f,0x20,0xe0 +; CHECK: buffer_load_ubyte v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0 define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) { entry: %0 = getelementptr i8 addrspace(1)* %in, i64 4095 @@ -30,7 +30,8 @@ entry: ; MUBUF load with an immediate byte offset that doesn't fit into 12-bits ; CHECK-LABEL: {{^}}mubuf_load2: -; CHECK: buffer_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 ; encoding: [0x00,0x80,0x30,0xe0 +; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x1000 +; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]+:[0-9]+}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x30,0xe0 define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: %0 = getelementptr i32 addrspace(1)* %in, i64 1024 @@ -42,7 +43,7 @@ entry: ; MUBUF load with a 12-bit immediate offset and a register offset ; CHECK-LABEL: {{^}}mubuf_load3: ; CHECK-NOT: ADD -; CHECK: buffer_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80,0x30,0xe0 +; CHECK: buffer_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:4 ; encoding: [0x04,0x80,0x30,0xe0 define void @mubuf_load3(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i64 %offset) { entry: %0 = getelementptr i32 addrspace(1)* %in, i64 %offset @@ -52,13 +53,46 @@ entry: ret void } +; CHECK-LABEL: {{^}}soffset_max_imm: +; CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 64 offen glc +define void @soffset_max_imm([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 { +main_body: + %tmp0 = getelementptr [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0 + %tmp1 = load <16 x i8> addrspace(2)* %tmp0 + %tmp2 = shl i32 %6, 2 + %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) + %tmp4 = add i32 %6, 16 + %tmp5 = bitcast float 0.0 to i32 + call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp5, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0) + ret void +} + +; Make sure immediates that aren't inline constants don't get folded into +; the soffset operand. +; FIXME: for this test we should be smart enough to shift the immediate into +; the offset field. +; CHECK-LABEL: {{^}}soffset_no_fold: +; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x41 +; CHECK: buffer_load_dword v{{[0-9+]}}, v{{[0-9+]}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc +define void @soffset_no_fold([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 { +main_body: + %tmp0 = getelementptr [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0 + %tmp1 = load <16 x i8> addrspace(2)* %tmp0 + %tmp2 = shl i32 %6, 2 + %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) + %tmp4 = add i32 %6, 16 + %tmp5 = bitcast float 0.0 to i32 + call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp5, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0) + ret void +} + ;;;==========================================================================;;; ;;; MUBUF STORE TESTS ;;;==========================================================================;;; ; MUBUF store with an immediate byte offset that fits into 12-bits ; CHECK-LABEL: {{^}}mubuf_store0: -; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:0x4 ; encoding: [0x04,0x00,0x70,0xe0 +; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x70,0xe0 define void @mubuf_store0(i32 addrspace(1)* %out) { entry: %0 = getelementptr i32 addrspace(1)* %out, i64 1 @@ -68,7 +102,7 @@ entry: ; MUBUF store with the largest possible immediate offset ; CHECK-LABEL: {{^}}mubuf_store1: -; CHECK: buffer_store_byte v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:0xfff ; encoding: [0xff,0x0f,0x60,0xe0 +; CHECK: buffer_store_byte v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0 define void @mubuf_store1(i8 addrspace(1)* %out) { entry: @@ -79,7 +113,8 @@ entry: ; MUBUF store with an immediate byte offset that doesn't fit into 12-bits ; CHECK-LABEL: {{^}}mubuf_store2: -; CHECK: buffer_store_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0 addr64 ; encoding: [0x00,0x80,0x70,0xe0 +; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x1000 +; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x70,0xe0 define void @mubuf_store2(i32 addrspace(1)* %out) { entry: %0 = getelementptr i32 addrspace(1)* %out, i64 1024 @@ -90,7 +125,7 @@ entry: ; MUBUF store with a 12-bit immediate offset and a register offset ; CHECK-LABEL: {{^}}mubuf_store3: ; CHECK-NOT: ADD -; CHECK: buffer_store_dword v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80,0x70,0xe0 +; CHECK: buffer_store_dword v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:4 ; encoding: [0x04,0x80,0x70,0xe0 define void @mubuf_store3(i32 addrspace(1)* %out, i64 %offset) { entry: %0 = getelementptr i32 addrspace(1)* %out, i64 %offset @@ -107,7 +142,7 @@ define void @store_sgpr_ptr(i32 addrspace(1)* %out) #0 { } ; CHECK-LABEL: {{^}}store_sgpr_ptr_offset: -; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:0x28 +; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:40 define void @store_sgpr_ptr_offset(i32 addrspace(1)* %out) #0 { %out.gep = getelementptr i32 addrspace(1)* %out, i32 10 store i32 99, i32 addrspace(1)* %out.gep, align 4 @@ -115,13 +150,23 @@ define void @store_sgpr_ptr_offset(i32 addrspace(1)* %out) #0 { } ; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset: -; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 +; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000 +; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]] define void @store_sgpr_ptr_large_offset(i32 addrspace(1)* %out) #0 { %out.gep = getelementptr i32 addrspace(1)* %out, i32 32768 store i32 99, i32 addrspace(1)* %out.gep, align 4 ret void } +; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset_atomic: +; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000 +; CHECK: buffer_atomic_add v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]] +define void @store_sgpr_ptr_large_offset_atomic(i32 addrspace(1)* %out) #0 { + %gep = getelementptr i32 addrspace(1)* %out, i32 32768 + %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 5 seq_cst + ret void +} + ; CHECK-LABEL: {{^}}store_vgpr_ptr: ; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 define void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 { @@ -130,3 +175,9 @@ define void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 { store i32 99, i32 addrspace(1)* %out.gep, align 4 ret void } + +declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #3 +declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) + +attributes #1 = { "ShaderType"="2" "unsafe-fp-math"="true" } +attributes #3 = { nounwind readonly } diff --git a/test/CodeGen/R600/mul.ll b/test/CodeGen/R600/mul.ll index be5d6a0..6f15e70 100644 --- a/test/CodeGen/R600/mul.ll +++ b/test/CodeGen/R600/mul.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s -check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; mul24 and mad24 are affected diff --git a/test/CodeGen/R600/mul_int24.ll b/test/CodeGen/R600/mul_int24.ll index be58f7e..7609dcc 100644 --- a/test/CodeGen/R600/mul_int24.ll +++ b/test/CodeGen/R600/mul_int24.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC ; FUNC-LABEL: {{^}}i32_mul24: ; Signed 24-bit multiply is not supported on pre-Cayman GPUs. diff --git a/test/CodeGen/R600/mul_uint24.ll b/test/CodeGen/R600/mul_uint24.ll index 8d1cda8..e640a7c 100644 --- a/test/CodeGen/R600/mul_uint24.ll +++ b/test/CodeGen/R600/mul_uint24.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC ; FUNC-LABEL: {{^}}u32_mul24: ; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W diff --git a/test/CodeGen/R600/mulhu.ll b/test/CodeGen/R600/mulhu.ll index 82a0783..29b0944 100644 --- a/test/CodeGen/R600/mulhu.ll +++ b/test/CodeGen/R600/mulhu.ll @@ -1,7 +1,8 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab -;CHECK: v_mul_hi_u32 v0, {{[sv][0-9]+}}, {{v[0-9]+}} +;CHECK: v_mul_hi_u32 v0, {{v[0-9]+}}, {{s[0-9]+}} ;CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v0 define void @test(i32 %p) { diff --git a/test/CodeGen/R600/no-initializer-constant-addrspace.ll b/test/CodeGen/R600/no-initializer-constant-addrspace.ll index cd2dca3..532edf0 100644 --- a/test/CodeGen/R600/no-initializer-constant-addrspace.ll +++ b/test/CodeGen/R600/no-initializer-constant-addrspace.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -o /dev/null %s +; RUN: llc -march=amdgcn -mcpu=SI -o /dev/null %s +; RUN: llc -march=amdgcn -mcpu=tonga -o /dev/null %s ; RUN: llc -march=r600 -mcpu=cypress -o /dev/null %s @extern_const_addrspace = external unnamed_addr addrspace(2) constant [5 x i32], align 4 diff --git a/test/CodeGen/R600/no-shrink-extloads.ll b/test/CodeGen/R600/no-shrink-extloads.ll new file mode 100644 index 0000000..3079492 --- /dev/null +++ b/test/CodeGen/R600/no-shrink-extloads.ll @@ -0,0 +1,191 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare i32 @llvm.r600.read.tidig.x() nounwind readnone + +; Make sure we don't turn the 32-bit argument load into a 16-bit +; load. There aren't extending scalar lods, so that would require +; using a buffer_load instruction. + +; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i16: +; SI: s_load_dword s +; SI: buffer_store_short v +define void @truncate_kernarg_i32_to_i16(i16 addrspace(1)* %out, i32 %arg) nounwind { + %trunc = trunc i32 %arg to i16 + store i16 %trunc, i16 addrspace(1)* %out + ret void +} + +; It should be OK (and probably performance neutral) to reduce this, +; but we don't know if the load is uniform yet. + +; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i16: +; SI: buffer_load_dword v +; SI: buffer_store_short v +define void @truncate_buffer_load_i32_to_i16(i16 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i16 addrspace(1)* %out, i32 %tid + %load = load i32 addrspace(1)* %gep.in + %trunc = trunc i32 %load to i16 + store i16 %trunc, i16 addrspace(1)* %gep.out + ret void +} + +; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i8: +; SI: s_load_dword s +; SI: buffer_store_byte v +define void @truncate_kernarg_i32_to_i8(i8 addrspace(1)* %out, i32 %arg) nounwind { + %trunc = trunc i32 %arg to i8 + store i8 %trunc, i8 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i8: +; SI: buffer_load_dword v +; SI: buffer_store_byte v +define void @truncate_buffer_load_i32_to_i8(i8 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i8 addrspace(1)* %out, i32 %tid + %load = load i32 addrspace(1)* %gep.in + %trunc = trunc i32 %load to i8 + store i8 %trunc, i8 addrspace(1)* %gep.out + ret void +} + +; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i1: +; SI: s_load_dword s +; SI: buffer_store_byte v +define void @truncate_kernarg_i32_to_i1(i1 addrspace(1)* %out, i32 %arg) nounwind { + %trunc = trunc i32 %arg to i1 + store i1 %trunc, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i1: +; SI: buffer_load_dword v +; SI: buffer_store_byte v +define void @truncate_buffer_load_i32_to_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i1 addrspace(1)* %out, i32 %tid + %load = load i32 addrspace(1)* %gep.in + %trunc = trunc i32 %load to i1 + store i1 %trunc, i1 addrspace(1)* %gep.out + ret void +} + +; FUNC-LABEL: {{^}}truncate_kernarg_i64_to_i32: +; SI: s_load_dword s +; SI: buffer_store_dword v +define void @truncate_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind { + %trunc = trunc i64 %arg to i32 + store i32 %trunc, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}truncate_buffer_load_i64_to_i32: +; SI: buffer_load_dword v +; SI: buffer_store_dword v +define void @truncate_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.in = getelementptr i64 addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid + %load = load i64 addrspace(1)* %gep.in + %trunc = trunc i64 %load to i32 + store i32 %trunc, i32 addrspace(1)* %gep.out + ret void +} + +; FUNC-LABEL: {{^}}srl_kernarg_i64_to_i32: +; SI: s_load_dword s +; SI: buffer_store_dword v +define void @srl_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind { + %srl = lshr i64 %arg, 32 + %trunc = trunc i64 %srl to i32 + store i32 %trunc, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}srl_buffer_load_i64_to_i32: +; SI: buffer_load_dword v +; SI: buffer_store_dword v +define void @srl_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.in = getelementptr i64 addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid + %load = load i64 addrspace(1)* %gep.in + %srl = lshr i64 %load, 32 + %trunc = trunc i64 %srl to i32 + store i32 %trunc, i32 addrspace(1)* %gep.out + ret void +} + +; Might as well reduce to 8-bit loads. +; FUNC-LABEL: {{^}}truncate_kernarg_i16_to_i8: +; SI: s_load_dword s +; SI: buffer_store_byte v +define void @truncate_kernarg_i16_to_i8(i8 addrspace(1)* %out, i16 %arg) nounwind { + %trunc = trunc i16 %arg to i8 + store i8 %trunc, i8 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}truncate_buffer_load_i16_to_i8: +; SI: buffer_load_ubyte v +; SI: buffer_store_byte v +define void @truncate_buffer_load_i16_to_i8(i8 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.in = getelementptr i16 addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i8 addrspace(1)* %out, i32 %tid + %load = load i16 addrspace(1)* %gep.in + %trunc = trunc i16 %load to i8 + store i8 %trunc, i8 addrspace(1)* %gep.out + ret void +} + +; FUNC-LABEL: {{^}}srl_kernarg_i64_to_i8: +; SI: s_load_dword s +; SI: buffer_store_byte v +define void @srl_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind { + %srl = lshr i64 %arg, 32 + %trunc = trunc i64 %srl to i8 + store i8 %trunc, i8 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}srl_buffer_load_i64_to_i8: +; SI: buffer_load_dword v +; SI: buffer_store_byte v +define void @srl_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.in = getelementptr i64 addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i8 addrspace(1)* %out, i32 %tid + %load = load i64 addrspace(1)* %gep.in + %srl = lshr i64 %load, 32 + %trunc = trunc i64 %srl to i8 + store i8 %trunc, i8 addrspace(1)* %gep.out + ret void +} + +; FUNC-LABEL: {{^}}truncate_kernarg_i64_to_i8: +; SI: s_load_dword s +; SI: buffer_store_byte v +define void @truncate_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind { + %trunc = trunc i64 %arg to i8 + store i8 %trunc, i8 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}truncate_buffer_load_i64_to_i8: +; SI: buffer_load_dword v +; SI: buffer_store_byte v +define void @truncate_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.in = getelementptr i64 addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i8 addrspace(1)* %out, i32 %tid + %load = load i64 addrspace(1)* %gep.in + %trunc = trunc i64 %load to i8 + store i8 %trunc, i8 addrspace(1)* %gep.out + ret void +} diff --git a/test/CodeGen/R600/operand-folding.ll b/test/CodeGen/R600/operand-folding.ll new file mode 100644 index 0000000..88a8145 --- /dev/null +++ b/test/CodeGen/R600/operand-folding.ll @@ -0,0 +1,113 @@ +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s + +; CHECK-LABEL: {{^}}fold_sgpr: +; CHECK: v_add_i32_e32 v{{[0-9]+}}, s +define void @fold_sgpr(i32 addrspace(1)* %out, i32 %fold) { +entry: + %tmp0 = icmp ne i32 %fold, 0 + br i1 %tmp0, label %if, label %endif + +if: + %id = call i32 @llvm.r600.read.tidig.x() + %offset = add i32 %fold, %id + %tmp1 = getelementptr i32 addrspace(1)* %out, i32 %offset + store i32 0, i32 addrspace(1)* %tmp1 + br label %endif + +endif: + ret void +} + +; CHECK-LABEL: {{^}}fold_imm: +; CHECK v_or_i32_e32 v{{[0-9]+}}, 5 +define void @fold_imm(i32 addrspace(1)* %out, i32 %cmp) { +entry: + %fold = add i32 3, 2 + %tmp0 = icmp ne i32 %cmp, 0 + br i1 %tmp0, label %if, label %endif + +if: + %id = call i32 @llvm.r600.read.tidig.x() + %val = or i32 %id, %fold + store i32 %val, i32 addrspace(1)* %out + br label %endif + +endif: + ret void +} + +; CHECK-LABEL: {{^}}fold_64bit_constant_add: +; CHECK-NOT: s_mov_b64 +; FIXME: It would be better if we could use v_add here and drop the extra +; v_mov_b32 instructions. +; CHECK-DAG: s_add_u32 [[LO:s[0-9]+]], s{{[0-9]+}}, 1 +; CHECK-DAG: s_addc_u32 [[HI:s[0-9]+]], s{{[0-9]+}}, 0 +; CHECK-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[LO]] +; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[HI]] +; CHECK: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}, + +define void @fold_64bit_constant_add(i64 addrspace(1)* %out, i32 %cmp, i64 %val) { +entry: + %tmp0 = add i64 %val, 1 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; Inline constants should always be folded. + +; CHECK-LABEL: {{^}}vector_inline: +; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}} +; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}} +; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}} +; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}} + +define void @vector_inline(<4 x i32> addrspace(1)* %out) { +entry: + %tmp0 = call i32 @llvm.r600.read.tidig.x() + %tmp1 = add i32 %tmp0, 1 + %tmp2 = add i32 %tmp0, 2 + %tmp3 = add i32 %tmp0, 3 + %vec0 = insertelement <4 x i32> undef, i32 %tmp0, i32 0 + %vec1 = insertelement <4 x i32> %vec0, i32 %tmp1, i32 1 + %vec2 = insertelement <4 x i32> %vec1, i32 %tmp2, i32 2 + %vec3 = insertelement <4 x i32> %vec2, i32 %tmp3, i32 3 + %tmp4 = xor <4 x i32> <i32 5, i32 5, i32 5, i32 5>, %vec3 + store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %out + ret void +} + +; Immediates with one use should be folded +; CHECK-LABEL: {{^}}imm_one_use: +; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 0x64, v{{[0-9]+}} + +define void @imm_one_use(i32 addrspace(1)* %out) { +entry: + %tmp0 = call i32 @llvm.r600.read.tidig.x() + %tmp1 = xor i32 %tmp0, 100 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} +; CHECK-LABEL: {{^}}vector_imm: +; CHECK: s_movk_i32 [[IMM:s[0-9]+]], 0x64 +; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}} +; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}} +; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}} +; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}} + +define void @vector_imm(<4 x i32> addrspace(1)* %out) { +entry: + %tmp0 = call i32 @llvm.r600.read.tidig.x() + %tmp1 = add i32 %tmp0, 1 + %tmp2 = add i32 %tmp0, 2 + %tmp3 = add i32 %tmp0, 3 + %vec0 = insertelement <4 x i32> undef, i32 %tmp0, i32 0 + %vec1 = insertelement <4 x i32> %vec0, i32 %tmp1, i32 1 + %vec2 = insertelement <4 x i32> %vec1, i32 %tmp2, i32 2 + %vec3 = insertelement <4 x i32> %vec2, i32 %tmp3, i32 3 + %tmp4 = xor <4 x i32> <i32 100, i32 100, i32 100, i32 100>, %vec3 + store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %out + ret void +} + +declare i32 @llvm.r600.read.tidig.x() #0 +attributes #0 = { readnone } diff --git a/test/CodeGen/R600/operand-spacing.ll b/test/CodeGen/R600/operand-spacing.ll index f0d228d..20420a8 100644 --- a/test/CodeGen/R600/operand-spacing.ll +++ b/test/CodeGen/R600/operand-spacing.ll @@ -1,13 +1,16 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=VI -check-prefix=GCN %s ; Make sure there isn't an extra space between the instruction name and first operands. -; SI-LABEL: {{^}}add_f32: +; GCN-LABEL: {{^}}add_f32: ; SI-DAG: s_load_dword [[SREGA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dword [[SREGB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc -; SI: v_mov_b32_e32 [[VREGB:v[0-9]+]], [[SREGB]] -; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SREGA]], [[VREGB]] -; SI: buffer_store_dword [[RESULT]], +; VI-DAG: s_load_dword [[SREGA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI-DAG: s_load_dword [[SREGB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 +; GCN: v_mov_b32_e32 [[VREGB:v[0-9]+]], [[SREGB]] +; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SREGA]], [[VREGB]] +; GCN: buffer_store_dword [[RESULT]], define void @add_f32(float addrspace(1)* %out, float %a, float %b) { %result = fadd float %a, %b store float %result, float addrspace(1)* %out diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll index b7493d3..78879a8 100644 --- a/test/CodeGen/R600/or.ll +++ b/test/CodeGen/R600/or.ll @@ -1,14 +1,14 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; EG-LABEL: {{^}}or_v2i32: + +; FUNC-LABEL: {{^}}or_v2i32: ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; SI-LABEL: {{^}}or_v2i32: ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} - define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 %a = load <2 x i32> addrspace(1) * %in @@ -18,18 +18,16 @@ define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) ret void } -; EG-LABEL: {{^}}or_v4i32: +; FUNC-LABEL: {{^}}or_v4i32: ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; SI-LABEL: {{^}}or_v4i32: ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} - define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 %a = load <4 x i32> addrspace(1) * %in @@ -39,7 +37,7 @@ define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) ret void } -; SI-LABEL: {{^}}scalar_or_i32: +; FUNC-LABEL: {{^}}scalar_or_i32: ; SI: s_or_b32 define void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { %or = or i32 %a, %b @@ -47,7 +45,7 @@ define void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { ret void } -; SI-LABEL: {{^}}vector_or_i32: +; FUNC-LABEL: {{^}}vector_or_i32: ; SI: v_or_b32_e32 v{{[0-9]}} define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) { %loada = load i32 addrspace(1)* %a @@ -56,7 +54,7 @@ define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) ret void } -; SI-LABEL: {{^}}scalar_or_literal_i32: +; FUNC-LABEL: {{^}}scalar_or_literal_i32: ; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1869f define void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a) { %or = or i32 %a, 99999 @@ -64,7 +62,7 @@ define void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a) { ret void } -; SI-LABEL: {{^}}vector_or_literal_i32: +; FUNC-LABEL: {{^}}vector_or_literal_i32: ; SI: v_or_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}} define void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) { %loada = load i32 addrspace(1)* %a, align 4 @@ -73,7 +71,7 @@ define void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, ret void } -; SI-LABEL: {{^}}vector_or_inline_immediate_i32: +; FUNC-LABEL: {{^}}vector_or_inline_immediate_i32: ; SI: v_or_b32_e32 v{{[0-9]+}}, 4, v{{[0-9]+}} define void @vector_or_inline_immediate_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) { %loada = load i32 addrspace(1)* %a, align 4 @@ -82,10 +80,10 @@ define void @vector_or_inline_immediate_i32(i32 addrspace(1)* %out, i32 addrspac ret void } -; EG-LABEL: {{^}}scalar_or_i64: +; FUNC-LABEL: {{^}}scalar_or_i64: ; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y ; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z -; SI-LABEL: {{^}}scalar_or_i64: + ; SI: s_or_b64 define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { %or = or i64 %a, %b @@ -93,7 +91,7 @@ define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { ret void } -; SI-LABEL: {{^}}vector_or_i64: +; FUNC-LABEL: {{^}}vector_or_i64: ; SI: v_or_b32_e32 v{{[0-9]}} ; SI: v_or_b32_e32 v{{[0-9]}} define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { @@ -104,7 +102,7 @@ define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add ret void } -; SI-LABEL: {{^}}scalar_vector_or_i64: +; FUNC-LABEL: {{^}}scalar_vector_or_i64: ; SI: v_or_b32_e32 v{{[0-9]}} ; SI: v_or_b32_e32 v{{[0-9]}} define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) { @@ -114,7 +112,7 @@ define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, ret void } -; SI-LABEL: {{^}}vector_or_i64_loadimm: +; FUNC-LABEL: {{^}}vector_or_i64_loadimm: ; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f ; SI-DAG: s_movk_i32 [[HI_S_IMM:s[0-9]+]], 0x146f ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, @@ -129,7 +127,7 @@ define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, } ; FIXME: The or 0 should really be removed. -; SI-LABEL: {{^}}vector_or_i64_imm: +; FUNC-LABEL: {{^}}vector_or_i64_imm: ; SI: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, ; SI: v_or_b32_e32 {{v[0-9]+}}, 8, v[[LO_VREG]] ; SI: v_or_b32_e32 {{v[0-9]+}}, 0, {{.*}} @@ -141,7 +139,7 @@ define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 ret void } -; SI-LABEL: {{^}}trunc_i64_or_to_i32: +; FUNC-LABEL: {{^}}trunc_i64_or_to_i32: ; SI: s_load_dword s[[SREG0:[0-9]+]] ; SI: s_load_dword s[[SREG1:[0-9]+]] ; SI: s_or_b32 s[[SRESULT:[0-9]+]], s[[SREG1]], s[[SREG0]] @@ -154,14 +152,13 @@ define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) { ret void } -; EG-CHECK: {{^}}or_i1: -; EG-CHECK: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}} +; FUNC-LABEL: {{^}}or_i1: +; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}} -; SI-CHECK: {{^}}or_i1: -; SI-CHECK: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] +; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) { - %a = load float addrspace(1) * %in0 - %b = load float addrspace(1) * %in1 + %a = load float addrspace(1)* %in0 + %b = load float addrspace(1)* %in1 %acmp = fcmp oge float %a, 0.000000e+00 %bcmp = fcmp oge float %b, 0.000000e+00 %or = or i1 %acmp, %bcmp @@ -169,3 +166,13 @@ define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float add store float %result, float addrspace(1)* %out ret void } + +; FUNC-LABEL: {{^}}s_or_i1: +; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}] +define void @s_or_i1(i1 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) { + %cmp0 = icmp eq i32 %a, %b + %cmp1 = icmp eq i32 %c, %d + %or = or i1 %cmp0, %cmp1 + store i1 %or, i1 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/private-memory-atomics.ll b/test/CodeGen/R600/private-memory-atomics.ll index def4f9d..3ceb0c0 100644 --- a/test/CodeGen/R600/private-memory-atomics.ll +++ b/test/CodeGen/R600/private-memory-atomics.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s ; This works because promote allocas pass replaces these with LDS atomics. diff --git a/test/CodeGen/R600/private-memory-broken.ll b/test/CodeGen/R600/private-memory-broken.ll index 4086085..10590a9 100644 --- a/test/CodeGen/R600/private-memory-broken.ll +++ b/test/CodeGen/R600/private-memory-broken.ll @@ -1,4 +1,5 @@ -; RUN: not llc -verify-machineinstrs -march=r600 -mcpu=SI %s -o /dev/null 2>&1 | FileCheck %s +; RUN: not llc -verify-machineinstrs -march=amdgcn -mcpu=SI %s -o /dev/null 2>&1 | FileCheck %s +; RUN: not llc -verify-machineinstrs -march=amdgcn -mcpu=tonga %s -o /dev/null 2>&1 | FileCheck %s ; Make sure promote alloca pass doesn't crash diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll index bfb4a6a..b03029c 100644 --- a/test/CodeGen/R600/private-memory.ll +++ b/test/CodeGen/R600/private-memory.ll @@ -1,6 +1,8 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC -; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC +; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC +; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC +; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC +; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC declare i32 @llvm.r600.read.tidig.x() nounwind readnone @@ -117,7 +119,7 @@ for.end: ; R600: MOVA_INT ; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x68,0xe0 -; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:0x2 ; encoding: [0x02,0x10,0x68,0xe0 +; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:2 ; encoding: [0x02,0x10,0x68,0xe0 ; SI-PROMOTE: buffer_load_sshort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} define void @short_array(i32 addrspace(1)* %out, i32 %index) { entry: @@ -138,7 +140,7 @@ entry: ; R600: MOVA_INT ; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x60,0xe0 -; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:0x1 ; encoding: [0x01,0x10,0x60,0xe0 +; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:1 ; encoding: [0x01,0x10,0x60,0xe0 define void @char_array(i32 addrspace(1)* %out, i32 %index) { entry: %0 = alloca [2 x i8] @@ -296,7 +298,7 @@ entry: ; FUNC-LABEL: ptrtoint: ; SI-NOT: ds_write ; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen -; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:0x5 +; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:5 define void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) { %alloca = alloca [16 x i32] %tmp0 = getelementptr [16 x i32]* %alloca, i32 0, i32 %a diff --git a/test/CodeGen/R600/r600-encoding.ll b/test/CodeGen/R600/r600-encoding.ll index 112cdac..3a82ee3 100644 --- a/test/CodeGen/R600/r600-encoding.ll +++ b/test/CodeGen/R600/r600-encoding.ll @@ -1,14 +1,14 @@ -; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s -; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=rs880 | FileCheck --check-prefix=R600-CHECK %s +; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=redwood | FileCheck --check-prefix=EG %s +; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=rs880 | FileCheck --check-prefix=R600 %s ; The earliest R600 GPUs have a slightly different encoding than the rest of ; the VLIW4/5 GPUs. -; EG-CHECK: {{^}}test: -; EG-CHECK: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x01,0x[0-9a-f]+,0x[0-9a-f]+}}] +; EG: {{^}}test: +; EG: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x01,0x[0-9a-f]+,0x[0-9a-f]+}}] -; R600-CHECK: {{^}}test: -; R600-CHECK: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x02,0x[0-9a-f]+,0x[0-9a-f]+}}] +; R600: {{^}}test: +; R600: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x02,0x[0-9a-f]+,0x[0-9a-f]+}}] define void @test(<4 x float> inreg %reg0) #0 { entry: diff --git a/test/CodeGen/R600/register-count-comments.ll b/test/CodeGen/R600/register-count-comments.ll index 61d1b5e..2b49f97 100644 --- a/test/CodeGen/R600/register-count-comments.ll +++ b/test/CodeGen/R600/register-count-comments.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs -asm-verbose < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -asm-verbose < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.SI.tid() nounwind readnone diff --git a/test/CodeGen/R600/reorder-stores.ll b/test/CodeGen/R600/reorder-stores.ll index 30c0171..ea50d5e 100644 --- a/test/CodeGen/R600/reorder-stores.ll +++ b/test/CodeGen/R600/reorder-stores.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}no_reorder_v2f64_global_load_store: ; SI: buffer_load_dwordx2 diff --git a/test/CodeGen/R600/rotl.i64.ll b/test/CodeGen/R600/rotl.i64.ll index 84a35b6..6da17a4 100644 --- a/test/CodeGen/R600/rotl.i64.ll +++ b/test/CodeGen/R600/rotl.i64.ll @@ -1,11 +1,12 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=BOTH %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=BOTH %s -; FUNC-LABEL: {{^}}s_rotl_i64: -; SI-DAG: s_lshl_b64 -; SI-DAG: s_sub_i32 -; SI-DAG: s_lshr_b64 -; SI: s_or_b64 -; SI: s_endpgm +; BOTH-LABEL: {{^}}s_rotl_i64: +; BOTH-DAG: s_lshl_b64 +; BOTH-DAG: s_sub_i32 +; BOTH-DAG: s_lshr_b64 +; BOTH: s_or_b64 +; BOTH: s_endpgm define void @s_rotl_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) { entry: %0 = shl i64 %x, %y @@ -16,13 +17,15 @@ entry: ret void } -; FUNC-LABEL: {{^}}v_rotl_i64: +; BOTH-LABEL: {{^}}v_rotl_i64: ; SI-DAG: v_lshl_b64 -; SI-DAG: v_sub_i32 +; VI-DAG: v_lshlrev_b64 +; BOTH-DAG: v_sub_i32 ; SI: v_lshr_b64 -; SI: v_or_b32 -; SI: v_or_b32 -; SI: s_endpgm +; VI: v_lshrrev_b64 +; BOTH: v_or_b32 +; BOTH: v_or_b32 +; BOTH: s_endpgm define void @v_rotl_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) { entry: %x = load i64 addrspace(1)* %xptr, align 8 diff --git a/test/CodeGen/R600/rotl.ll b/test/CodeGen/R600/rotl.ll index 6c8e503..6c144cd 100644 --- a/test/CodeGen/R600/rotl.ll +++ b/test/CodeGen/R600/rotl.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=R600 -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}rotl_i32: ; R600: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x diff --git a/test/CodeGen/R600/rotr.i64.ll b/test/CodeGen/R600/rotr.i64.ll index 9e14570..f1d1d26 100644 --- a/test/CodeGen/R600/rotr.i64.ll +++ b/test/CodeGen/R600/rotr.i64.ll @@ -1,10 +1,11 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=BOTH %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=BOTH %s -; FUNC-LABEL: {{^}}s_rotr_i64: -; SI-DAG: s_sub_i32 -; SI-DAG: s_lshr_b64 -; SI-DAG: s_lshl_b64 -; SI: s_or_b64 +; BOTH-LABEL: {{^}}s_rotr_i64: +; BOTH-DAG: s_sub_i32 +; BOTH-DAG: s_lshr_b64 +; BOTH-DAG: s_lshl_b64 +; BOTH: s_or_b64 define void @s_rotr_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) { entry: %tmp0 = sub i64 64, %y @@ -15,12 +16,14 @@ entry: ret void } -; FUNC-LABEL: {{^}}v_rotr_i64: -; SI-DAG: v_sub_i32 +; BOTH-LABEL: {{^}}v_rotr_i64: +; BOTH-DAG: v_sub_i32 ; SI-DAG: v_lshr_b64 ; SI-DAG: v_lshl_b64 -; SI: v_or_b32 -; SI: v_or_b32 +; VI-DAG: v_lshrrev_b64 +; VI-DAG: v_lshlrev_b64 +; BOTH: v_or_b32 +; BOTH: v_or_b32 define void @v_rotr_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) { entry: %x = load i64 addrspace(1)* %xptr, align 8 @@ -33,7 +36,7 @@ entry: ret void } -; FUNC-LABEL: {{^}}s_rotr_v2i64: +; BOTH-LABEL: {{^}}s_rotr_v2i64: define void @s_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> %x, <2 x i64> %y) { entry: %tmp0 = sub <2 x i64> <i64 64, i64 64>, %y @@ -44,7 +47,7 @@ entry: ret void } -; FUNC-LABEL: {{^}}v_rotr_v2i64: +; BOTH-LABEL: {{^}}v_rotr_v2i64: define void @v_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> addrspace(1)* %xptr, <2 x i64> addrspace(1)* %yptr) { entry: %x = load <2 x i64> addrspace(1)* %xptr, align 8 diff --git a/test/CodeGen/R600/rotr.ll b/test/CodeGen/R600/rotr.ll index a1add11..044f9ff 100644 --- a/test/CodeGen/R600/rotr.ll +++ b/test/CodeGen/R600/rotr.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=R600 -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}rotr_i32: ; R600: BIT_ALIGN_INT diff --git a/test/CodeGen/R600/rsq.ll b/test/CodeGen/R600/rsq.ll index d792c9f..b8a23df 100644 --- a/test/CodeGen/R600/rsq.ll +++ b/test/CodeGen/R600/rsq.ll @@ -1,6 +1,7 @@ -; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI %s -; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI %s +declare i32 @llvm.r600.read.tidig.x() nounwind readnone declare float @llvm.sqrt.f32(float) nounwind readnone declare double @llvm.sqrt.f64(double) nounwind readnone @@ -36,3 +37,38 @@ define void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) nounwind store float %div, float addrspace(1)* %out, align 4 ret void } + +; Recognize that this is rsqrt(a) * rcp(b) * c, +; not 1 / ( 1 / sqrt(a)) * rcp(b) * c. + +; SI-LABEL: @rsqrt_fmul +; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; SI-DAG: buffer_load_dword [[C:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 + +; SI-UNSAFE-DAG: v_rsq_f32_e32 [[RSQA:v[0-9]+]], [[A]] +; SI-UNSAFE-DAG: v_rcp_f32_e32 [[RCPB:v[0-9]+]], [[B]] +; SI-UNSAFE-DAG: v_mul_f32_e32 [[TMP:v[0-9]+]], [[RCPB]], [[RSQA]] +; SI-UNSAFE: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]] +; SI-UNSAFE: buffer_store_dword [[RESULT]] + +; SI-SAFE-NOT: v_rsq_f32 + +; SI: s_endpgm +define void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(1)* %in) { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %out.gep = getelementptr float addrspace(1)* %out, i32 %tid + %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1 + %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2 + + %a = load float addrspace(1)* %gep.0 + %b = load float addrspace(1)* %gep.1 + %c = load float addrspace(1)* %gep.2 + + %x = call float @llvm.sqrt.f32(float %a) + %y = fmul float %x, %b + %z = fdiv float %c, %y + store float %z, float addrspace(1)* %out.gep + ret void +} diff --git a/test/CodeGen/R600/s_movk_i32.ll b/test/CodeGen/R600/s_movk_i32.ll index 71f9a41..8be2d1d 100644 --- a/test/CodeGen/R600/s_movk_i32.ll +++ b/test/CodeGen/R600/s_movk_i32.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}s_movk_i32_k0: ; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff{{$}} diff --git a/test/CodeGen/R600/saddo.ll b/test/CodeGen/R600/saddo.ll index 654967c..8e625c1 100644 --- a/test/CodeGen/R600/saddo.ll +++ b/test/CodeGen/R600/saddo.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone diff --git a/test/CodeGen/R600/salu-to-valu.ll b/test/CodeGen/R600/salu-to-valu.ll index 23af3e4..dfb181d 100644 --- a/test/CodeGen/R600/salu-to-valu.ll +++ b/test/CodeGen/R600/salu-to-valu.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s ; In this test both the pointer and the offset operands to the ; BUFFER_LOAD instructions end up being stored in vgprs. This diff --git a/test/CodeGen/R600/scalar_to_vector.ll b/test/CodeGen/R600/scalar_to_vector.ll index dc9ebe0..b82e552 100644 --- a/test/CodeGen/R600/scalar_to_vector.ll +++ b/test/CodeGen/R600/scalar_to_vector.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}scalar_to_vector_v2i32: diff --git a/test/CodeGen/R600/schedule-global-loads.ll b/test/CodeGen/R600/schedule-global-loads.ll index 5422ca7..b6437d2 100644 --- a/test/CodeGen/R600/schedule-global-loads.ll +++ b/test/CodeGen/R600/schedule-global-loads.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s declare i32 @llvm.r600.read.tidig.x() #1 @@ -10,7 +10,7 @@ declare i32 @llvm.r600.read.tidig.x() #1 ; FUNC-LABEL: {{^}}cluster_global_arg_loads: ; SI-DAG: buffer_load_dword [[REG0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; SI-DAG: buffer_load_dword [[REG1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:0x4 +; SI-DAG: buffer_load_dword [[REG1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4 ; SI: buffer_store_dword [[REG0]] ; SI: buffer_store_dword [[REG1]] define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr) #0 { diff --git a/test/CodeGen/R600/schedule-kernel-arg-loads.ll b/test/CodeGen/R600/schedule-kernel-arg-loads.ll index e774157..f9641fa 100644 --- a/test/CodeGen/R600/schedule-kernel-arg-loads.ll +++ b/test/CodeGen/R600/schedule-kernel-arg-loads.ll @@ -1,10 +1,18 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=VI %s ; FUNC-LABEL: {{^}}cluster_arg_loads: ; SI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x9 ; SI-NEXT: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd ; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xe +; VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x24 +; VI-NEXT: s_nop 0 +; VI-NEXT: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI-NEXT: s_nop 0 +; VI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34 +; VI-NEXT: s_nop 0 +; VI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x38 define void @cluster_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) nounwind { store i32 %x, i32 addrspace(1)* %out0, align 4 store i32 %y, i32 addrspace(1)* %out1, align 4 diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll index baac5b5..76b655d 100644 --- a/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll +++ b/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll @@ -1,6 +1,7 @@ ; XFAIL: * ; REQUIRES: asserts -; RUN: llc -O0 -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI +; RUN: llc -O0 -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI +; RUN: llc -O0 -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate diff --git a/test/CodeGen/R600/scratch-buffer.ll b/test/CodeGen/R600/scratch-buffer.ll new file mode 100644 index 0000000..8c5a990 --- /dev/null +++ b/test/CodeGen/R600/scratch-buffer.ll @@ -0,0 +1,87 @@ +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s + +; When a frame index offset is more than 12-bits, make sure we don't store +; it in mubuf's offset field. + +; Also, make sure we use the same register for storing the scratch buffer addresss +; for both stores. This register is allocated by the register scavenger, so we +; should be able to reuse the same regiser for each scratch buffer access. + +; CHECK-LABEL: {{^}}legal_offset_fi: +; CHECK: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0{{$}} +; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen +; CHECK: v_mov_b32_e32 [[OFFSET]], 0x8000 +; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} + +define void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) { +entry: + %scratch0 = alloca [8192 x i32] + %scratch1 = alloca [8192 x i32] + + %scratchptr0 = getelementptr [8192 x i32]* %scratch0, i32 0, i32 0 + store i32 1, i32* %scratchptr0 + + %scratchptr1 = getelementptr [8192 x i32]* %scratch1, i32 0, i32 0 + store i32 2, i32* %scratchptr1 + + %cmp = icmp eq i32 %cond, 0 + br i1 %cmp, label %if, label %else + +if: + %if_ptr = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %if_offset + %if_value = load i32* %if_ptr + br label %done + +else: + %else_ptr = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %else_offset + %else_value = load i32* %else_ptr + br label %done + +done: + %value = phi i32 [%if_value, %if], [%else_value, %else] + store i32 %value, i32 addrspace(1)* %out + ret void + + ret void + +} + +; CHECK-LABEL: {{^}}legal_offset_fi_offset +; CHECK: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen +; CHECK: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000 +; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} + +define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) { +entry: + %scratch0 = alloca [8192 x i32] + %scratch1 = alloca [8192 x i32] + + %offset0 = load i32 addrspace(1)* %offsets + %scratchptr0 = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %offset0 + store i32 %offset0, i32* %scratchptr0 + + %offsetptr1 = getelementptr i32 addrspace(1)* %offsets, i32 1 + %offset1 = load i32 addrspace(1)* %offsetptr1 + %scratchptr1 = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %offset1 + store i32 %offset1, i32* %scratchptr1 + + %cmp = icmp eq i32 %cond, 0 + br i1 %cmp, label %if, label %else + +if: + %if_ptr = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %if_offset + %if_value = load i32* %if_ptr + br label %done + +else: + %else_ptr = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %else_offset + %else_value = load i32* %else_ptr + br label %done + +done: + %value = phi i32 [%if_value, %if], [%else_value, %else] + store i32 %value, i32 addrspace(1)* %out + ret void +} + diff --git a/test/CodeGen/R600/sdiv.ll b/test/CodeGen/R600/sdiv.ll index 16853e0..07bb417 100644 --- a/test/CodeGen/R600/sdiv.ll +++ b/test/CodeGen/R600/sdiv.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; The code generated by sdiv is long and complex and may frequently change. @@ -35,7 +36,7 @@ define void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { ; FUNC-LABEL: {{^}}slow_sdiv_i32_3435: ; SI: buffer_load_dword [[VAL:v[0-9]+]], ; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b -; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[VAL]], [[MAGIC]] +; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[MAGIC]], [[VAL]] ; SI: v_add_i32 ; SI: v_lshrrev_b32 ; SI: v_ashrrev_i32 diff --git a/test/CodeGen/R600/sdivrem24.ll b/test/CodeGen/R600/sdivrem24.ll index 228cf76..e8c5c25 100644 --- a/test/CodeGen/R600/sdivrem24.ll +++ b/test/CodeGen/R600/sdivrem24.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}sdiv24_i8: diff --git a/test/CodeGen/R600/sdivrem64.ll b/test/CodeGen/R600/sdivrem64.ll new file mode 100644 index 0000000..a9b2b7f --- /dev/null +++ b/test/CodeGen/R600/sdivrem64.ll @@ -0,0 +1,225 @@ +;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s +;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s +;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s + +;FUNC-LABEL: {{^}}test_sdiv: +;EG: RECIP_UINT +;EG: LSHL {{.*}}, 1, +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT + +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN-NOT: v_mad_f32 +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: s_endpgm +define void @test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { + %result = sdiv i64 %x, %y + store i64 %result, i64 addrspace(1)* %out + ret void +} + +;FUNC-LABEL: {{^}}test_srem: +;EG: RECIP_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: AND_INT {{.*}}, 1, + +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN-NOT: v_mad_f32 +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: s_endpgm +define void @test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) { + %result = urem i64 %x, %y + store i64 %result, i64 addrspace(1)* %out + ret void +} + +;FUNC-LABEL: {{^}}test_sdiv3264: +;EG: RECIP_UINT +;EG-NOT: BFE_UINT + +;GCN-NOT: s_bfe_u32 +;GCN-NOT: v_mad_f32 +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: s_endpgm +define void @test_sdiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) { + %1 = ashr i64 %x, 33 + %2 = ashr i64 %y, 33 + %result = sdiv i64 %1, %2 + store i64 %result, i64 addrspace(1)* %out + ret void +} + +;FUNC-LABEL: {{^}}test_srem3264: +;EG: RECIP_UINT +;EG-NOT: BFE_UINT + +;GCN-NOT: s_bfe_u32 +;GCN-NOT: v_mad_f32 +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: s_endpgm +define void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) { + %1 = ashr i64 %x, 33 + %2 = ashr i64 %y, 33 + %result = srem i64 %1, %2 + store i64 %result, i64 addrspace(1)* %out + ret void +} + +;FUNC-LABEL: {{^}}test_sdiv2464: +;EG: INT_TO_FLT +;EG: INT_TO_FLT +;EG: FLT_TO_INT +;EG-NOT: RECIP_UINT +;EG-NOT: BFE_UINT + +;GCN-NOT: s_bfe_u32 +;GCN: v_mad_f32 +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: s_endpgm +define void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) { + %1 = ashr i64 %x, 40 + %2 = ashr i64 %y, 40 + %result = sdiv i64 %1, %2 + store i64 %result, i64 addrspace(1)* %out + ret void +} + +;FUNC-LABEL: {{^}}test_srem2464: +;EG: INT_TO_FLT +;EG: INT_TO_FLT +;EG: FLT_TO_INT +;EG-NOT: RECIP_UINT +;EG-NOT: BFE_UINT + +;GCN-NOT: s_bfe_u32 +;GCN: v_mad_f32 +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: s_endpgm +define void @test_srem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) { + %1 = ashr i64 %x, 40 + %2 = ashr i64 %y, 40 + %result = srem i64 %1, %2 + store i64 %result, i64 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/select-i1.ll b/test/CodeGen/R600/select-i1.ll index 2e2d0e4..6735394 100644 --- a/test/CodeGen/R600/select-i1.ll +++ b/test/CodeGen/R600/select-i1.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FIXME: This should go in existing select.ll test, except the current testcase there is broken on SI diff --git a/test/CodeGen/R600/select-vectors.ll b/test/CodeGen/R600/select-vectors.ll index 7d8df2e..59082c6 100644 --- a/test/CodeGen/R600/select-vectors.ll +++ b/test/CodeGen/R600/select-vectors.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; Test expansion of scalar selects on vectors. ; Evergreen not enabled since it seems to be having problems with doubles. diff --git a/test/CodeGen/R600/select64.ll b/test/CodeGen/R600/select64.ll index 8de34d5..0245dae 100644 --- a/test/CodeGen/R600/select64.ll +++ b/test/CodeGen/R600/select64.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ; CHECK-LABEL: {{^}}select0: ; i64 select should be split into two i32 selects, and we shouldn't need @@ -48,3 +49,20 @@ define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspa store i32 %trunc, i32 addrspace(1)* %out, align 4 ret void } + +; CHECK-LABEL: {{^}}v_select_i64_split_imm: +; CHECK: s_mov_b32 [[SHI:s[0-9]+]], 63 +; CHECK: s_mov_b32 [[SLO:s[0-9]+]], 0 +; CHECK-DAG: v_mov_b32_e32 [[VHI:v[0-9]+]], [[SHI]] +; CHECK-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], [[SLO]] +; CHECK-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, [[VLO]], {{v[0-9]+}} +; CHECK-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, [[VHI]], {{v[0-9]+}} +; CHECK: s_endpgm +define void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { + %cmp = icmp ugt i32 %cond, 5 + %a = load i64 addrspace(1)* %aptr, align 8 + %b = load i64 addrspace(1)* %bptr, align 8 + %sel = select i1 %cmp, i64 %a, i64 270582939648 ; 63 << 32 + store i64 %sel, i64 addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/selectcc-opt.ll b/test/CodeGen/R600/selectcc-opt.ll index 82577bb..7780371 100644 --- a/test/CodeGen/R600/selectcc-opt.ll +++ b/test/CodeGen/R600/selectcc-opt.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s diff --git a/test/CodeGen/R600/selectcc.ll b/test/CodeGen/R600/selectcc.ll index 5a09b5c..f378e15 100644 --- a/test/CodeGen/R600/selectcc.ll +++ b/test/CodeGen/R600/selectcc.ll @@ -1,5 +1,6 @@ ; RUN: llc -verify-machineinstrs -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}selectcc_i64: ; EG: XOR_INT diff --git a/test/CodeGen/R600/setcc-opt.ll b/test/CodeGen/R600/setcc-opt.ll index af48df8..93860f5 100644 --- a/test/CodeGen/R600/setcc-opt.ll +++ b/test/CodeGen/R600/setcc-opt.ll @@ -1,15 +1,236 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; SI-LABEL: {{^}}sext_bool_icmp_ne: -; SI: v_cmp_ne_i32 -; SI-NEXT: v_cndmask_b32 -; SI-NOT: v_cmp_ne_i32 -; SI-NOT: v_cndmask_b32 -; SI: s_endpgm -define void @sext_bool_icmp_ne(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { +; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0: +; GCN-NOT: v_cmp +; GCN: v_cmp_ne_i32_e32 vcc, +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN-NEXT:buffer_store_byte [[RESULT]] +; GCN-NEXT: s_endpgm + +; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W +; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1 +define void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %icmp0 = icmp eq i32 %a, %b + %ext = sext i1 %icmp0 to i32 + %icmp1 = icmp eq i32 %ext, 0 + store i1 %icmp1, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0: +; GCN-NOT: v_cmp +; GCN: v_cmp_ne_i32_e32 vcc, +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN-NEXT: s_endpgm + +; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W +; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1 +define void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %icmp0 = icmp ne i32 %a, %b %ext = sext i1 %icmp0 to i32 %icmp1 = icmp ne i32 %ext, 0 store i1 %icmp1, i1 addrspace(1)* %out ret void } + +; This really folds away to false +; FUNC-LABEL: {{^}}sext_bool_icmp_eq_1: +; GCN: v_cmp_eq_i32_e32 vcc, +; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc +; GCN-NEXT: v_cmp_eq_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}} +; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1, +; GCN-NEXT: buffer_store_byte [[TMP]] +; GCN-NEXT: s_endpgm +define void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %icmp0 = icmp eq i32 %a, %b + %ext = sext i1 %icmp0 to i32 + %icmp1 = icmp eq i32 %ext, 1 + store i1 %icmp1, i1 addrspace(1)* %out + ret void +} + +; This really folds away to true +; FUNC-LABEL: {{^}}sext_bool_icmp_ne_1: +; GCN: v_cmp_ne_i32_e32 vcc, +; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc +; GCN-NEXT: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}} +; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1, +; GCN-NEXT: buffer_store_byte [[TMP]] +; GCN-NEXT: s_endpgm +define void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %icmp0 = icmp ne i32 %a, %b + %ext = sext i1 %icmp0 to i32 + %icmp1 = icmp ne i32 %ext, 1 + store i1 %icmp1, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0: +; GCN-NOT: v_cmp +; GCN: v_cmp_ne_i32_e32 vcc, +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN-NEXT: s_endpgm +define void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %icmp0 = icmp eq i32 %a, %b + %ext = zext i1 %icmp0 to i32 + %icmp1 = icmp eq i32 %ext, 0 + store i1 %icmp1, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0: +; GCN-NOT: v_cmp +; GCN: v_cmp_ne_i32_e32 vcc, +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN-NEXT: s_endpgm +define void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %icmp0 = icmp ne i32 %a, %b + %ext = zext i1 %icmp0 to i32 + %icmp1 = icmp ne i32 %ext, 0 + store i1 %icmp1, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1: +; GCN-NOT: v_cmp +; GCN: v_cmp_eq_i32_e32 vcc, +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN-NEXT: s_endpgm +define void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %icmp0 = icmp eq i32 %a, %b + %ext = zext i1 %icmp0 to i32 + %icmp1 = icmp eq i32 %ext, 1 + store i1 %icmp1, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1: +; GCN-NOT: v_cmp +; GCN: v_cmp_eq_i32_e32 vcc, +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN-NEXT: buffer_store_byte [[RESULT]] +define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %icmp0 = icmp ne i32 %a, %b + %ext = zext i1 %icmp0 to i32 + %icmp1 = icmp ne i32 %ext, 1 + store i1 %icmp1, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k: +; SI-DAG: s_load_dword [[A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: s_load_dword [[B:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc +; VI-DAG: s_load_dword [[A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI-DAG: s_load_dword [[B:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 +; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]] +; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[VB]], 2{{$}} +; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]] +; GCN: buffer_store_byte +; GCN: s_endpgm +define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %icmp0 = icmp ne i32 %a, %b + %ext = sext i1 %icmp0 to i32 + %icmp1 = icmp ne i32 %ext, 2 + store i1 %icmp1, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}cmp_zext_k_i8max: +; GCN: buffer_load_ubyte [[B:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44 +; GCN: v_mov_b32_e32 [[K255:v[0-9]+]], 0xff{{$}} +; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[K255]] +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN: s_endpgm +define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind { + %b.ext = zext i8 %b to i32 + %icmp0 = icmp ne i32 %b.ext, 255 + store i1 %icmp0, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}cmp_sext_k_neg1: +; GCN: buffer_load_sbyte [[B:v[0-9]+]] +; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}} +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]] +; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN: s_endpgm +define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind { + %b = load i8 addrspace(1)* %b.ptr + %b.ext = sext i8 %b to i32 + %icmp0 = icmp ne i32 %b.ext, -1 + store i1 %icmp0, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_sext_arg: +; GCN: s_load_dword [[B:s[0-9]+]] +; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}} +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]] +; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN: s_endpgm +define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) nounwind { + %b.ext = sext i8 %b to i32 + %icmp0 = icmp ne i32 %b.ext, -1 + store i1 %icmp0, i1 addrspace(1)* %out + ret void +} + +; FIXME: This ends up doing a buffer_load_ubyte, and and compare to +; 255. Seems to be because of ordering problems when not allowing load widths to be reduced. +; Should do a buffer_load_sbyte and compare with -1 + +; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg: +; GCN-DAG: buffer_load_ubyte [[B:v[0-9]+]] +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff{{$}} +; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[K]]{{$}} +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN: s_endpgm +define void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind { + %b.ext = sext i8 %b to i32 + %icmp0 = icmp ne i32 %b.ext, -1 + store i1 %icmp0, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}cmp_zext_k_neg1: +; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}} +; GCN: buffer_store_byte [[RESULT]] +; GCN: s_endpgm +define void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind { + %b.ext = zext i8 %b to i32 + %icmp0 = icmp ne i32 %b.ext, -1 + store i1 %icmp0, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zext_bool_icmp_ne_k: +; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}} +; GCN: buffer_store_byte [[RESULT]] +; GCN-NEXT: s_endpgm +define void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %icmp0 = icmp ne i32 %a, %b + %ext = zext i1 %icmp0 to i32 + %icmp1 = icmp ne i32 %ext, 2 + store i1 %icmp1, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zext_bool_icmp_eq_k: +; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} +; GCN: buffer_store_byte [[RESULT]] +; GCN-NEXT: s_endpgm +define void @zext_bool_icmp_eq_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %icmp0 = icmp ne i32 %a, %b + %ext = zext i1 %icmp0 to i32 + %icmp1 = icmp eq i32 %ext, 2 + store i1 %icmp1, i1 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/setcc.ll b/test/CodeGen/R600/setcc.ll index 8dd2ce4..f9c7e4f 100644 --- a/test/CodeGen/R600/setcc.ll +++ b/test/CodeGen/R600/setcc.ll @@ -1,5 +1,7 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s -;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; FUNC-LABEL: {{^}}setcc_v2i32: ; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z @@ -94,11 +96,9 @@ entry: ; R600-DAG: SETNE_DX10 ; R600-DAG: AND_INT ; R600-DAG: SETNE_INT -; SI: v_cmp_o_f32 -; SI: v_cmp_neq_f32 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_and_b32_e32 + +; SI: v_cmp_lg_f32_e32 vcc +; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc define void @f32_one(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp one float %a, %b @@ -128,11 +128,9 @@ entry: ; R600-DAG: SETE_DX10 ; R600-DAG: OR_INT ; R600-DAG: SETNE_INT -; SI: v_cmp_u_f32 -; SI: v_cmp_eq_f32 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_or_b32_e32 + +; SI: v_cmp_nlg_f32_e32 vcc +; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc define void @f32_ueq(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp ueq float %a, %b @@ -144,11 +142,8 @@ entry: ; FUNC-LABEL: {{^}}f32_ugt: ; R600: SETGE ; R600: SETE_DX10 -; SI: v_cmp_u_f32 -; SI: v_cmp_gt_f32 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_or_b32_e32 +; SI: v_cmp_nle_f32_e32 vcc +; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc define void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp ugt float %a, %b @@ -160,11 +155,9 @@ entry: ; FUNC-LABEL: {{^}}f32_uge: ; R600: SETGT ; R600: SETE_DX10 -; SI: v_cmp_u_f32 -; SI: v_cmp_ge_f32 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_or_b32_e32 + +; SI: v_cmp_nlt_f32_e32 vcc +; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc define void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp uge float %a, %b @@ -176,11 +169,9 @@ entry: ; FUNC-LABEL: {{^}}f32_ult: ; R600: SETGE ; R600: SETE_DX10 -; SI: v_cmp_u_f32 -; SI: v_cmp_lt_f32 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_or_b32_e32 + +; SI: v_cmp_nge_f32_e32 vcc +; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc define void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp ult float %a, %b @@ -192,11 +183,9 @@ entry: ; FUNC-LABEL: {{^}}f32_ule: ; R600: SETGT ; R600: SETE_DX10 -; SI: v_cmp_u_f32 -; SI: v_cmp_le_f32 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_or_b32_e32 + +; SI: v_cmp_ngt_f32_e32 vcc +; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc define void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp ule float %a, %b @@ -343,3 +332,46 @@ entry: store i32 %1, i32 addrspace(1)* %out ret void } + +; FIXME: This does 4 compares +; FUNC-LABEL: {{^}}v3i32_eq: +; SI-DAG: v_cmp_eq_i32 +; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, +; SI-DAG: v_cmp_eq_i32 +; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, +; SI-DAG: v_cmp_eq_i32 +; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, +; SI: s_endpgm +define void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptra, <3 x i32> addrspace(1)* %ptrb) { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.a = getelementptr <3 x i32> addrspace(1)* %ptra, i32 %tid + %gep.b = getelementptr <3 x i32> addrspace(1)* %ptrb, i32 %tid + %gep.out = getelementptr <3 x i32> addrspace(1)* %out, i32 %tid + %a = load <3 x i32> addrspace(1)* %gep.a + %b = load <3 x i32> addrspace(1)* %gep.b + %cmp = icmp eq <3 x i32> %a, %b + %ext = sext <3 x i1> %cmp to <3 x i32> + store <3 x i32> %ext, <3 x i32> addrspace(1)* %gep.out + ret void +} + +; FUNC-LABEL: {{^}}v3i8_eq: +; SI-DAG: v_cmp_eq_i32 +; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, +; SI-DAG: v_cmp_eq_i32 +; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, +; SI-DAG: v_cmp_eq_i32 +; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, +; SI: s_endpgm +define void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra, <3 x i8> addrspace(1)* %ptrb) { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep.a = getelementptr <3 x i8> addrspace(1)* %ptra, i32 %tid + %gep.b = getelementptr <3 x i8> addrspace(1)* %ptrb, i32 %tid + %gep.out = getelementptr <3 x i8> addrspace(1)* %out, i32 %tid + %a = load <3 x i8> addrspace(1)* %gep.a + %b = load <3 x i8> addrspace(1)* %gep.b + %cmp = icmp eq <3 x i8> %a, %b + %ext = sext <3 x i1> %cmp to <3 x i8> + store <3 x i8> %ext, <3 x i8> addrspace(1)* %gep.out + ret void +} diff --git a/test/CodeGen/R600/setcc64.ll b/test/CodeGen/R600/setcc64.ll index 6e43172..231be7a 100644 --- a/test/CodeGen/R600/setcc64.ll +++ b/test/CodeGen/R600/setcc64.ll @@ -1,4 +1,5 @@ -;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s +;RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s ; XXX: Merge this into setcc, once R600 supports 64-bit operations @@ -57,11 +58,8 @@ entry: } ; FUNC-LABEL: {{^}}f64_one: -; SI: v_cmp_o_f64 -; SI: v_cmp_neq_f64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_and_b32_e32 +; SI: v_cmp_lg_f64_e32 vcc +; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc define void @f64_one(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp one double %a, %b @@ -81,11 +79,8 @@ entry: } ; FUNC-LABEL: {{^}}f64_ueq: -; SI: v_cmp_u_f64 -; SI: v_cmp_eq_f64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_or_b32_e32 +; SI: v_cmp_nlg_f64_e32 vcc +; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc define void @f64_ueq(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp ueq double %a, %b @@ -95,11 +90,9 @@ entry: } ; FUNC-LABEL: {{^}}f64_ugt: -; SI: v_cmp_u_f64 -; SI: v_cmp_gt_f64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_or_b32_e32 + +; SI: v_cmp_nle_f64_e32 vcc +; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc define void @f64_ugt(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp ugt double %a, %b @@ -109,11 +102,8 @@ entry: } ; FUNC-LABEL: {{^}}f64_uge: -; SI: v_cmp_u_f64 -; SI: v_cmp_ge_f64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_or_b32_e32 +; SI: v_cmp_nlt_f64_e32 vcc +; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc define void @f64_uge(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp uge double %a, %b @@ -123,11 +113,8 @@ entry: } ; FUNC-LABEL: {{^}}f64_ult: -; SI: v_cmp_u_f64 -; SI: v_cmp_lt_f64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_or_b32_e32 +; SI: v_cmp_nge_f64_e32 vcc +; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc define void @f64_ult(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp ult double %a, %b @@ -137,11 +124,8 @@ entry: } ; FUNC-LABEL: {{^}}f64_ule: -; SI: v_cmp_u_f64 -; SI: v_cmp_le_f64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_or_b32_e32 +; SI: v_cmp_ngt_f64_e32 vcc +; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc define void @f64_ule(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp ule double %a, %b diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll index 5fe6ff6..9b5d6b5 100644 --- a/test/CodeGen/R600/seto.ll +++ b/test/CodeGen/R600/seto.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}main: ; CHECK: v_cmp_o_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]] diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll index a391177..76346c4 100644 --- a/test/CodeGen/R600/setuo.ll +++ b/test/CodeGen/R600/setuo.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}main: ; CHECK: v_cmp_u_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]] diff --git a/test/CodeGen/R600/sext-in-reg.ll b/test/CodeGen/R600/sext-in-reg.ll index d364e6b..3260179 100644 --- a/test/CodeGen/R600/sext-in-reg.ll +++ b/test/CodeGen/R600/sext-in-reg.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone diff --git a/test/CodeGen/R600/sgpr-control-flow.ll b/test/CodeGen/R600/sgpr-control-flow.ll index d8b8dff..f0236ac 100644 --- a/test/CodeGen/R600/sgpr-control-flow.ll +++ b/test/CodeGen/R600/sgpr-control-flow.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; ; ; Most SALU instructions ignore control flow, so we need to make sure @@ -59,6 +59,47 @@ endif: ret void } +; FIXME: Should write to different SGPR pairs instead of copying to +; VALU for i1 phi. + +; SI-LABEL: {{^}}sgpr_if_else_valu_cmp_phi_br: +; SI: buffer_load_dword [[AVAL:v[0-9]+]] +; SI: v_cmp_lt_i32_e64 [[CMP_IF:s\[[0-9]+:[0-9]+\]]], [[AVAL]], 0 +; SI: v_cndmask_b32_e64 [[V_CMP:v[0-9]+]], 0, -1, [[CMP_IF]] + +; SI: BB2_1: +; SI: buffer_load_dword [[AVAL:v[0-9]+]] +; SI: v_cmp_eq_i32_e64 [[CMP_ELSE:s\[[0-9]+:[0-9]+\]]], [[AVAL]], 0 +; SI: v_cndmask_b32_e64 [[V_CMP]], 0, -1, [[CMP_ELSE]] + +; SI: v_cmp_ne_i32_e64 [[CMP_CMP:s\[[0-9]+:[0-9]+\]]], [[V_CMP]], 0 +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP_CMP]] +; SI: buffer_store_dword [[RESULT]] +define void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) { +entry: + %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tmp1 = icmp eq i32 %tid, 0 + br i1 %tmp1, label %if, label %else + +if: + %gep.if = getelementptr i32 addrspace(1)* %a, i32 %tid + %a.val = load i32 addrspace(1)* %gep.if + %cmp.if = icmp eq i32 %a.val, 0 + br label %endif + +else: + %gep.else = getelementptr i32 addrspace(1)* %b, i32 %tid + %b.val = load i32 addrspace(1)* %gep.else + %cmp.else = icmp slt i32 %b.val, 0 + br label %endif + +endif: + %tmp4 = phi i1 [%cmp.if, %if], [%cmp.else, %else] + %ext = sext i1 %tmp4 to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + declare i32 @llvm.r600.read.tidig.x() #0 attributes #0 = { readnone } diff --git a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll index aa97fbf..893f5a3 100644 --- a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll +++ b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; Copy VGPR -> SGPR used twice as an instruction operand, which is then ; used in an REG_SEQUENCE that also needs to be handled. diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/R600/sgpr-copy.ll index 8daf753..57cbadd 100644 --- a/test/CodeGen/R600/sgpr-copy.ll +++ b/test/CodeGen/R600/sgpr-copy.ll @@ -1,9 +1,10 @@ -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ; This test checks that no VGPR to SGPR copies are created by the register ; allocator. ; CHECK-LABEL: {{^}}phi1: -; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0 +; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0 ; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]] define void @phi1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { @@ -202,8 +203,8 @@ attributes #2 = { readonly } attributes #3 = { readnone } attributes #4 = { nounwind readonly } -!0 = metadata !{metadata !"const", null} -!1 = metadata !{metadata !0, metadata !0, i64 0, i32 1} +!0 = !{!"const", null} +!1 = !{!0, !0, i64 0, i32 1} ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 @@ -267,7 +268,7 @@ endif: ret void } -!2 = metadata !{metadata !"const", null, i32 1} +!2 = !{!"const", null, i32 1} ; CHECK-LABEL: {{^}}copy1: ; CHECK: buffer_load_dword diff --git a/test/CodeGen/R600/shl.ll b/test/CodeGen/R600/shl.ll index 71c9fc4..f89353b 100644 --- a/test/CodeGen/R600/shl.ll +++ b/test/CodeGen/R600/shl.ll @@ -1,13 +1,18 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI %s -;EG-CHECK: {{^}}shl_v2i32: -;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: {{^}}shl_v2i32: +;EG: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: {{^}}shl_v2i32: -;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: {{^}}shl_v2i32: +;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} + +;VI: {{^}}shl_v2i32: +;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 @@ -18,17 +23,23 @@ define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in ret void } -;EG-CHECK: {{^}}shl_v4i32: -;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: {{^}}shl_v4i32: +;EG: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +;SI: {{^}}shl_v4i32: +;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: {{^}}shl_v4i32: -;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI: {{^}}shl_v4i32: +;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 @@ -39,20 +50,23 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in ret void } -;EG-CHECK: {{^}}shl_i64: -;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] -;EG-CHECK: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} -;EG-CHECK: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 +;EG: {{^}}shl_i64: +;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] +;EG: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} +;EG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 ;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal -;EG-CHECK-DAG: LSHL {{\*? *}}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]] -;EG-CHECK-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} -;EG-CHECK-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]}} -;EG-CHECK-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal -;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} -;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0 +;EG-DAG: LSHL {{\*? *}}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]] +;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} +;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]}} +;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +;EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} +;EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0 -;SI-CHECK: {{^}}shl_i64: -;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI: {{^}}shl_i64: +;SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + +;VI: {{^}}shl_i64: +;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1 @@ -63,31 +77,35 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { ret void } -;EG-CHECK: {{^}}shl_v2i64: -;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] -;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] -;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHA]] -;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHB]] -;EG-CHECK-DAG: LSHR {{.*}}, 1 -;EG-CHECK-DAG: LSHR {{.*}}, 1 -;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal -;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal -;EG-CHECK-DAG: LSHL {{.*}}, [[SHA]] -;EG-CHECK-DAG: LSHL {{.*}}, [[SHB]] -;EG-CHECK-DAG: LSHL {{.*}}, [[SHA]] -;EG-CHECK-DAG: LSHL {{.*}}, [[SHB]] -;EG-CHECK-DAG: LSHL -;EG-CHECK-DAG: LSHL -;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal -;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal -;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 -;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 -;EG-CHECK-DAG: CNDE_INT -;EG-CHECK-DAG: CNDE_INT - -;SI-CHECK: {{^}}shl_v2i64: -;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} -;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;EG: {{^}}shl_v2i64: +;EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] +;EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] +;EG-DAG: LSHR {{\*? *}}[[COMPSHA]] +;EG-DAG: LSHR {{\*? *}}[[COMPSHB]] +;EG-DAG: LSHR {{.*}}, 1 +;EG-DAG: LSHR {{.*}}, 1 +;EG-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-DAG: LSHL {{.*}}, [[SHA]] +;EG-DAG: LSHL {{.*}}, [[SHB]] +;EG-DAG: LSHL {{.*}}, [[SHA]] +;EG-DAG: LSHL {{.*}}, [[SHB]] +;EG-DAG: LSHL +;EG-DAG: LSHL +;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal +;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal +;EG-DAG: CNDE_INT {{.*}}, 0.0 +;EG-DAG: CNDE_INT {{.*}}, 0.0 +;EG-DAG: CNDE_INT +;EG-DAG: CNDE_INT + +;SI: {{^}}shl_v2i64: +;SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + +;VI: {{^}}shl_v2i64: +;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1 @@ -98,53 +116,59 @@ define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in ret void } -;EG-CHECK: {{^}}shl_v4i64: -;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] -;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] -;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]] -;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]] -;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHA]] -;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHB]] -;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHC]] -;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHD]] -;EG-CHECK-DAG: LSHR {{.*}}, 1 -;EG-CHECK-DAG: LSHR {{.*}}, 1 -;EG-CHECK-DAG: LSHR {{.*}}, 1 -;EG-CHECK-DAG: LSHR {{.*}}, 1 -;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal -;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal -;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal -;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal -;EG-CHECK-DAG: LSHL {{.*}}, [[SHA]] -;EG-CHECK-DAG: LSHL {{.*}}, [[SHB]] -;EG-CHECK-DAG: LSHL {{.*}}, [[SHC]] -;EG-CHECK-DAG: LSHL {{.*}}, [[SHD]] -;EG-CHECK-DAG: LSHL {{.*}}, [[SHA]] -;EG-CHECK-DAG: LSHL {{.*}}, [[SHB]] -;EG-CHECK-DAG: LSHL {{.*}}, [[SHC]] -;EG-CHECK-DAG: LSHL {{.*}}, [[SHD]] -;EG-CHECK-DAG: LSHL -;EG-CHECK-DAG: LSHL -;EG-CHECK-DAG: LSHL -;EG-CHECK-DAG: LSHL -;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal -;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal -;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal -;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal -;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 -;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 -;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 -;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 -;EG-CHECK-DAG: CNDE_INT -;EG-CHECK-DAG: CNDE_INT -;EG-CHECK-DAG: CNDE_INT -;EG-CHECK-DAG: CNDE_INT - -;SI-CHECK: {{^}}shl_v4i64: -;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} -;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} -;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} -;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;EG: {{^}}shl_v4i64: +;EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] +;EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] +;EG-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]] +;EG-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]] +;EG-DAG: LSHR {{\*? *}}[[COMPSHA]] +;EG-DAG: LSHR {{\*? *}}[[COMPSHB]] +;EG-DAG: LSHR {{\*? *}}[[COMPSHC]] +;EG-DAG: LSHR {{\*? *}}[[COMPSHD]] +;EG-DAG: LSHR {{.*}}, 1 +;EG-DAG: LSHR {{.*}}, 1 +;EG-DAG: LSHR {{.*}}, 1 +;EG-DAG: LSHR {{.*}}, 1 +;EG-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-DAG: ADD_INT {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-DAG: ADD_INT {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-DAG: LSHL {{.*}}, [[SHA]] +;EG-DAG: LSHL {{.*}}, [[SHB]] +;EG-DAG: LSHL {{.*}}, [[SHC]] +;EG-DAG: LSHL {{.*}}, [[SHD]] +;EG-DAG: LSHL {{.*}}, [[SHA]] +;EG-DAG: LSHL {{.*}}, [[SHB]] +;EG-DAG: LSHL {{.*}}, [[SHC]] +;EG-DAG: LSHL {{.*}}, [[SHD]] +;EG-DAG: LSHL +;EG-DAG: LSHL +;EG-DAG: LSHL +;EG-DAG: LSHL +;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal +;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal +;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal +;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal +;EG-DAG: CNDE_INT {{.*}}, 0.0 +;EG-DAG: CNDE_INT {{.*}}, 0.0 +;EG-DAG: CNDE_INT {{.*}}, 0.0 +;EG-DAG: CNDE_INT {{.*}}, 0.0 +;EG-DAG: CNDE_INT +;EG-DAG: CNDE_INT +;EG-DAG: CNDE_INT +;EG-DAG: CNDE_INT + +;SI: {{^}}shl_v4i64: +;SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + +;VI: {{^}}shl_v4i64: +;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1 diff --git a/test/CodeGen/R600/shl_add_constant.ll b/test/CodeGen/R600/shl_add_constant.ll index 801f77d..6915495 100644 --- a/test/CodeGen/R600/shl_add_constant.ll +++ b/test/CodeGen/R600/shl_add_constant.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.r600.read.tidig.x() #1 diff --git a/test/CodeGen/R600/shl_add_ptr.ll b/test/CodeGen/R600/shl_add_ptr.ll index 047cf25..d423153 100644 --- a/test/CodeGen/R600/shl_add_ptr.ll +++ b/test/CodeGen/R600/shl_add_ptr.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s ; Test that doing a shift of a pointer with a constant add will be ; folded into the constant offset addressing mode even if the add has @@ -16,7 +17,7 @@ declare i32 @llvm.r600.read.tidig.x() #1 ; SI-LABEL: {{^}}load_shl_base_lds_0: ; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} -; SI: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8 [M0] +; SI: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8 ; SI: s_endpgm define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -33,7 +34,7 @@ define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %ad ; SI-LABEL: {{^}}load_shl_base_lds_1: ; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} -; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8 [M0] +; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8 ; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], 8, v{{[0-9]+}} ; SI-DAG: buffer_store_dword [[RESULT]] ; SI-DAG: buffer_store_dword [[ADDUSE]] @@ -68,8 +69,9 @@ define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3) ; pointer can be used with an offset into the second one. ; SI-LABEL: {{^}}load_shl_base_lds_2: -; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} -; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9 [M0] +; SI: s_mov_b32 m0, -1 +; SI-NEXT: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} +; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9 ; SI: s_endpgm define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 { %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -85,7 +87,7 @@ define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 { ; SI-LABEL: {{^}}store_shl_base_lds_0: ; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} -; SI: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8 [M0] +; SI: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8 ; SI: s_endpgm define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 diff --git a/test/CodeGen/R600/si-annotate-cf-assertion.ll b/test/CodeGen/R600/si-annotate-cf-assertion.ll index 6d60b0a..69d7193 100644 --- a/test/CodeGen/R600/si-annotate-cf-assertion.ll +++ b/test/CodeGen/R600/si-annotate-cf-assertion.ll @@ -1,6 +1,7 @@ ; REQUIRES: asserts ; XFAIL: * -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs-asm-verbose=false < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs-asm-verbose=false < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs-asm-verbose=false < %s | FileCheck %s define void @test(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind { diff --git a/test/CodeGen/R600/si-lod-bias.ll b/test/CodeGen/R600/si-lod-bias.ll index 60277d6..d6cbd0f 100644 --- a/test/CodeGen/R600/si-lod-bias.ll +++ b/test/CodeGen/R600/si-lod-bias.ll @@ -1,4 +1,5 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ; This shader has the potential to generated illegal VGPR to SGPR copies if ; the wrong register class is used for the REG_SEQUENCE instructions. @@ -47,5 +48,5 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } -!0 = metadata !{metadata !"const", null} -!1 = metadata !{metadata !0, metadata !0, i64 0, i32 1} +!0 = !{!"const", null} +!1 = !{!0, !0, i64 0, i32 1} diff --git a/test/CodeGen/R600/si-sgpr-spill.ll b/test/CodeGen/R600/si-sgpr-spill.ll index 439d8e2..18fda20 100644 --- a/test/CodeGen/R600/si-sgpr-spill.ll +++ b/test/CodeGen/R600/si-sgpr-spill.ll @@ -1,9 +1,11 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck %s ; These tests check that the compiler won't crash when it needs to spill ; SGPRs. ; CHECK-LABEL: {{^}}main: +; CHECK: s_wqm ; Writing to M0 from an SMRD instruction will hang the GPU. ; CHECK-NOT: s_buffer_load_dword m0 ; CHECK: s_endpgm @@ -686,7 +688,7 @@ attributes #2 = { readnone } attributes #3 = { readonly } attributes #4 = { nounwind readonly } -!0 = metadata !{metadata !"const", null, i32 1} +!0 = !{!"const", null, i32 1} ; CHECK-LABEL: {{^}}main1: ; CHECK: s_endpgm diff --git a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll index 2c146eb..a4475c0 100644 --- a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll +++ b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -check-prefix=FUNC -check-prefix=CI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -check-prefix=FUNC -check-prefix=CI %s declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) @@ -51,8 +51,8 @@ define void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspac ; FUNC-LABEL: @no_reorder_barrier_local_load_global_store_local_load ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4 -; CI: buffer_store_dword ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8 +; CI: buffer_store_dword define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4 @@ -94,12 +94,10 @@ define void @no_reorder_constant_load_global_store_constant_load(i32 addrspace(1 ret void } -; XXX: Should be able to reorder this, but the laods count as ordered - ; FUNC-LABEL: @reorder_constant_load_local_store_constant_load ; CI: buffer_load_dword -; CI: ds_write_b32 ; CI: buffer_load_dword +; CI: ds_write_b32 ; CI: buffer_store_dword define void @reorder_constant_load_local_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr) #0 { %ptr0 = load i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8 @@ -183,11 +181,11 @@ define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspa } ; FUNC-LABEL: @reorder_global_offsets -; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:0xc -; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:0x190 -; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:0x194 -; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:0x190 -; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:0x194 +; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12 +; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400 +; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404 +; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400 +; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404 ; CI: buffer_store_dword ; CI: s_endpgm define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(1)* noalias nocapture %ptr0) #0 { diff --git a/test/CodeGen/R600/si-vector-hang.ll b/test/CodeGen/R600/si-vector-hang.ll index 6f91c71..61812c6 100644 --- a/test/CodeGen/R600/si-vector-hang.ll +++ b/test/CodeGen/R600/si-vector-hang.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ; CHECK: {{^}}test_8_min_char: ; CHECK: buffer_store_byte @@ -96,12 +97,12 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"= !opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8} -!0 = metadata !{null} -!1 = metadata !{null} -!2 = metadata !{null} -!3 = metadata !{void (i8 addrspace(1)*, i8 addrspace(1)*, i8 addrspace(1)*)* @test_8_min_char} -!4 = metadata !{null} -!5 = metadata !{null} -!6 = metadata !{null} -!7 = metadata !{null} -!8 = metadata !{null} +!0 = !{null} +!1 = !{null} +!2 = !{null} +!3 = !{void (i8 addrspace(1)*, i8 addrspace(1)*, i8 addrspace(1)*)* @test_8_min_char} +!4 = !{null} +!5 = !{null} +!6 = !{null} +!7 = !{null} +!8 = !{null} diff --git a/test/CodeGen/R600/sign_extend.ll b/test/CodeGen/R600/sign_extend.ll index 94f4c46..f194759 100644 --- a/test/CodeGen/R600/sign_extend.ll +++ b/test/CodeGen/R600/sign_extend.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}s_sext_i1_to_i32: ; SI: v_cndmask_b32_e64 @@ -23,8 +24,9 @@ entry: } ; SI-LABEL: {{^}}s_sext_i1_to_i64: -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 +; SI: v_cndmask_b32_e64 v[[LOREG:[0-9]+]], 0, -1, vcc +; SI: v_mov_b32_e32 v[[HIREG:[0-9]+]], v[[LOREG]] +; SI: buffer_store_dwordx2 v{{\[}}[[LOREG]]:[[HIREG]]{{\]}} ; SI: s_endpgm define void @s_sext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %cmp = icmp eq i32 %a, %b diff --git a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll index 8d9ee42..28a413c 100644 --- a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll +++ b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll @@ -1,5 +1,6 @@ ; XFAIL: * -; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI %s ; 64-bit select was originally lowered with a build_pair, and this ; could be simplified to 1 cndmask instead of 2, but that broken when diff --git a/test/CodeGen/R600/sint_to_fp.f64.ll b/test/CodeGen/R600/sint_to_fp.f64.ll index 6e4f87c..893cfb3 100644 --- a/test/CodeGen/R600/sint_to_fp.f64.ll +++ b/test/CodeGen/R600/sint_to_fp.f64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.r600.read.tidig.x() nounwind readnone @@ -10,12 +10,13 @@ define void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) { ret void } +; FIXME: select on 0, 0 ; SI-LABEL: {{^}}sint_to_fp_i1_f64: ; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], -; FIXME: We should the VGPR sources for V_CNDMASK are copied from SGPRs, -; we should be able to fold the SGPRs into the V_CNDMASK instructions. -; SI: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]] -; SI: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]] +; We can't fold the SGPRs into v_cndmask_b32_e64, because it already +; uses an SGPR for [[CMP]] +; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]] +; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, [[CMP]] ; SI: buffer_store_dwordx2 ; SI: s_endpgm define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) { @@ -45,9 +46,9 @@ define void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) { ; SI-LABEL: @v_sint_to_fp_i64_to_f64 ; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} -; SI-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]] -; SI-DAG: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]] +; SI: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]] ; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32 +; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]] ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]] ; SI: buffer_store_dwordx2 [[RESULT]] define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) { diff --git a/test/CodeGen/R600/sint_to_fp.ll b/test/CodeGen/R600/sint_to_fp.ll index 7b6ce43..6a291cf 100644 --- a/test/CodeGen/R600/sint_to_fp.ll +++ b/test/CodeGen/R600/sint_to_fp.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s diff --git a/test/CodeGen/R600/smrd.ll b/test/CodeGen/R600/smrd.ll index 1c7df16..bad1668 100644 --- a/test/CodeGen/R600/smrd.ll +++ b/test/CodeGen/R600/smrd.ll @@ -1,8 +1,10 @@ -; RUN: llc < %s -march=r600 -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s ; SMRD load with an immediate offset. -; CHECK-LABEL: {{^}}smrd0: -; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01 +; GCN-LABEL: {{^}}smrd0: +; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01 +; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { entry: %0 = getelementptr i32 addrspace(2)* %ptr, i64 1 @@ -12,8 +14,9 @@ entry: } ; SMRD load with the largest possible immediate offset. -; CHECK-LABEL: {{^}}smrd1: -; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff +; GCN-LABEL: {{^}}smrd1: +; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff +; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { entry: %0 = getelementptr i32 addrspace(2)* %ptr, i64 255 @@ -23,10 +26,11 @@ entry: } ; SMRD load with an offset greater than the largest possible immediate. -; CHECK-LABEL: {{^}}smrd2: -; CHECK: s_movk_i32 s[[OFFSET:[0-9]]], 0x400 -; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]] -; CHECK: s_endpgm +; GCN-LABEL: {{^}}smrd2: +; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400 +; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]] +; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400 +; GCN: s_endpgm define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { entry: %0 = getelementptr i32 addrspace(2)* %ptr, i64 256 @@ -36,15 +40,18 @@ entry: } ; SMRD load with a 64-bit offset -; CHECK-LABEL: {{^}}smrd3: -; CHECK-DAG: s_mov_b32 s[[SHI:[0-9]+]], 4 -; CHECK-DAG: s_mov_b32 s[[SLO:[0-9]+]], 0 ; -; FIXME: We don't need to copy these values to VGPRs -; CHECK-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]] -; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] +; GCN-LABEL: {{^}}smrd3: +; FIXME: There are too many copies here because we don't fold immediates +; through REG_SEQUENCE +; SI: s_mov_b32 s[[SLO:[0-9]+]], 0 ; +; SI: s_mov_b32 s[[SHI:[0-9]+]], 4 +; SI: s_mov_b32 s[[SSLO:[0-9]+]], s[[SLO]] +; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SSLO]] +; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] ; FIXME: We should be able to use s_load_dword here -; CHECK: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 -; CHECK: s_endpgm +; SI: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 +; TODO: Add VI checks +; GCN: s_endpgm define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { entry: %0 = getelementptr i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32 @@ -54,8 +61,9 @@ entry: } ; SMRD load using the load.const intrinsic with an immediate offset -; CHECK-LABEL: {{^}}smrd_load_const0: -; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04 +; GCN-LABEL: {{^}}smrd_load_const0: +; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04 +; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10 define void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0 @@ -67,8 +75,9 @@ main_body: ; SMRD load using the load.const intrinsic with the largest possible immediate ; offset. -; CHECK-LABEL: {{^}}smrd_load_const1: -; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff +; GCN-LABEL: {{^}}smrd_load_const1: +; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff +; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc define void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0 @@ -80,9 +89,10 @@ main_body: ; SMRD load using the load.const intrinsic with an offset greater than the ; largets possible immediate. ; immediate offset. -; CHECK-LABEL: {{^}}smrd_load_const2: -; CHECK: s_movk_i32 s[[OFFSET:[0-9]]], 0x400 -; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]] +; GCN-LABEL: {{^}}smrd_load_const2: +; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400 +; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]] +; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400 define void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0 diff --git a/test/CodeGen/R600/split-scalar-i64-add.ll b/test/CodeGen/R600/split-scalar-i64-add.ll index e3448dc..ec50fd9 100644 --- a/test/CodeGen/R600/split-scalar-i64-add.ll +++ b/test/CodeGen/R600/split-scalar-i64-add.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() readnone diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll index 8ba9daa..d6c6ccd 100644 --- a/test/CodeGen/R600/sra.ll +++ b/test/CodeGen/R600/sra.ll @@ -1,13 +1,18 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI %s -;EG-CHECK-LABEL: {{^}}ashr_v2i32: -;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-LABEL: {{^}}ashr_v2i32: +;EG: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK-LABEL: {{^}}ashr_v2i32: -;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI-LABEL: {{^}}ashr_v2i32: +;SI: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} + +;VI-LABEL: {{^}}ashr_v2i32: +;VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 @@ -18,17 +23,23 @@ define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i ret void } -;EG-CHECK-LABEL: {{^}}ashr_v4i32: -;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-LABEL: {{^}}ashr_v4i32: +;EG: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +;SI-LABEL: {{^}}ashr_v4i32: +;SI: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK-LABEL: {{^}}ashr_v4i32: -;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-LABEL: {{^}}ashr_v4i32: +;VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 @@ -39,11 +50,15 @@ define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i ret void } -;EG-CHECK-LABEL: {{^}}ashr_i64: -;EG-CHECK: ASHR +;EG-LABEL: {{^}}ashr_i64: +;EG: ASHR + +;SI-LABEL: {{^}}ashr_i64: +;SI: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8 + +;VI-LABEL: {{^}}ashr_i64: +;VI: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8 -;SI-CHECK-LABEL: {{^}}ashr_i64: -;SI-CHECK: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8 define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) { entry: %0 = sext i32 %in to i64 @@ -52,22 +67,26 @@ entry: ret void } -;EG-CHECK-LABEL: {{^}}ashr_i64_2: -;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] -;EG-CHECK: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} -;EG-CHECK: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 +;EG-LABEL: {{^}}ashr_i64_2: +;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] +;EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} +;EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 ;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal -;EG-CHECK-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]] -;EG-CHECK-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} -;EG-CHECK-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} -;EG-CHECK-DAG: ASHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal -;EG-CHECK-DAG: ASHR {{\*? *}}[[HIBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal -;EG-CHECK-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal -;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} -;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} - -;SI-CHECK-LABEL: {{^}}ashr_i64_2: -;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]] +;EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} +;EG-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} +;EG-DAG: ASHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal +;EG-DAG: ASHR {{\*? *}}[[HIBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal +;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +;EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} +;EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} + +;SI-LABEL: {{^}}ashr_i64_2: +;SI: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + +;VI-LABEL: {{^}}ashr_i64_2: +;VI: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} + define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { entry: %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1 @@ -78,35 +97,39 @@ entry: ret void } -;EG-CHECK-LABEL: {{^}}ashr_v2i64: -;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] -;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] -;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]] -;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]] -;EG-CHECK-DAG: LSHL {{.*}}, 1 -;EG-CHECK-DAG: LSHL {{.*}}, 1 -;EG-CHECK-DAG: ASHR {{.*}}, [[SHA]] -;EG-CHECK-DAG: ASHR {{.*}}, [[SHB]] -;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]] -;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]] -;EG-CHECK-DAG: OR_INT -;EG-CHECK-DAG: OR_INT -;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal -;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal -;EG-CHECK-DAG: ASHR -;EG-CHECK-DAG: ASHR -;EG-CHECK-DAG: ASHR {{.*}}, literal -;EG-CHECK-DAG: ASHR {{.*}}, literal -;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal -;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal -;EG-CHECK-DAG: CNDE_INT -;EG-CHECK-DAG: CNDE_INT -;EG-CHECK-DAG: CNDE_INT -;EG-CHECK-DAG: CNDE_INT - -;SI-CHECK-LABEL: {{^}}ashr_v2i64: -;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} -;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;EG-LABEL: {{^}}ashr_v2i64: +;EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] +;EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] +;EG-DAG: LSHL {{\*? *}}[[COMPSHA]] +;EG-DAG: LSHL {{\*? *}}[[COMPSHB]] +;EG-DAG: LSHL {{.*}}, 1 +;EG-DAG: LSHL {{.*}}, 1 +;EG-DAG: ASHR {{.*}}, [[SHA]] +;EG-DAG: ASHR {{.*}}, [[SHB]] +;EG-DAG: LSHR {{.*}}, [[SHA]] +;EG-DAG: LSHR {{.*}}, [[SHB]] +;EG-DAG: OR_INT +;EG-DAG: OR_INT +;EG-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-DAG: ASHR +;EG-DAG: ASHR +;EG-DAG: ASHR {{.*}}, literal +;EG-DAG: ASHR {{.*}}, literal +;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal +;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal +;EG-DAG: CNDE_INT +;EG-DAG: CNDE_INT +;EG-DAG: CNDE_INT +;EG-DAG: CNDE_INT + +;SI-LABEL: {{^}}ashr_v2i64: +;SI: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + +;VI-LABEL: {{^}}ashr_v2i64: +;VI: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +;VI: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1 @@ -117,61 +140,67 @@ define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i ret void } -;EG-CHECK-LABEL: {{^}}ashr_v4i64: -;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] -;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] -;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]] -;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]] -;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]] -;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]] -;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHC]] -;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHD]] -;EG-CHECK-DAG: LSHL {{.*}}, 1 -;EG-CHECK-DAG: LSHL {{.*}}, 1 -;EG-CHECK-DAG: LSHL {{.*}}, 1 -;EG-CHECK-DAG: LSHL {{.*}}, 1 -;EG-CHECK-DAG: ASHR {{.*}}, [[SHA]] -;EG-CHECK-DAG: ASHR {{.*}}, [[SHB]] -;EG-CHECK-DAG: ASHR {{.*}}, [[SHC]] -;EG-CHECK-DAG: ASHR {{.*}}, [[SHD]] -;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]] -;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]] -;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]] -;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]] -;EG-CHECK-DAG: OR_INT -;EG-CHECK-DAG: OR_INT -;EG-CHECK-DAG: OR_INT -;EG-CHECK-DAG: OR_INT -;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal -;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal -;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal -;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal -;EG-CHECK-DAG: ASHR -;EG-CHECK-DAG: ASHR -;EG-CHECK-DAG: ASHR -;EG-CHECK-DAG: ASHR -;EG-CHECK-DAG: ASHR {{.*}}, literal -;EG-CHECK-DAG: ASHR {{.*}}, literal -;EG-CHECK-DAG: ASHR {{.*}}, literal -;EG-CHECK-DAG: ASHR {{.*}}, literal -;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal -;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal -;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal -;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal -;EG-CHECK-DAG: CNDE_INT -;EG-CHECK-DAG: CNDE_INT -;EG-CHECK-DAG: CNDE_INT -;EG-CHECK-DAG: CNDE_INT -;EG-CHECK-DAG: CNDE_INT -;EG-CHECK-DAG: CNDE_INT -;EG-CHECK-DAG: CNDE_INT -;EG-CHECK-DAG: CNDE_INT - -;SI-CHECK-LABEL: {{^}}ashr_v4i64: -;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} -;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} -;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} -;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;EG-LABEL: {{^}}ashr_v4i64: +;EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] +;EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] +;EG-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]] +;EG-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]] +;EG-DAG: LSHL {{\*? *}}[[COMPSHA]] +;EG-DAG: LSHL {{\*? *}}[[COMPSHB]] +;EG-DAG: LSHL {{\*? *}}[[COMPSHC]] +;EG-DAG: LSHL {{\*? *}}[[COMPSHD]] +;EG-DAG: LSHL {{.*}}, 1 +;EG-DAG: LSHL {{.*}}, 1 +;EG-DAG: LSHL {{.*}}, 1 +;EG-DAG: LSHL {{.*}}, 1 +;EG-DAG: ASHR {{.*}}, [[SHA]] +;EG-DAG: ASHR {{.*}}, [[SHB]] +;EG-DAG: ASHR {{.*}}, [[SHC]] +;EG-DAG: ASHR {{.*}}, [[SHD]] +;EG-DAG: LSHR {{.*}}, [[SHA]] +;EG-DAG: LSHR {{.*}}, [[SHB]] +;EG-DAG: LSHR {{.*}}, [[SHA]] +;EG-DAG: LSHR {{.*}}, [[SHB]] +;EG-DAG: OR_INT +;EG-DAG: OR_INT +;EG-DAG: OR_INT +;EG-DAG: OR_INT +;EG-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-DAG: ADD_INT {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-DAG: ADD_INT {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-DAG: ASHR +;EG-DAG: ASHR +;EG-DAG: ASHR +;EG-DAG: ASHR +;EG-DAG: ASHR {{.*}}, literal +;EG-DAG: ASHR {{.*}}, literal +;EG-DAG: ASHR {{.*}}, literal +;EG-DAG: ASHR {{.*}}, literal +;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal +;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal +;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal +;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal +;EG-DAG: CNDE_INT +;EG-DAG: CNDE_INT +;EG-DAG: CNDE_INT +;EG-DAG: CNDE_INT +;EG-DAG: CNDE_INT +;EG-DAG: CNDE_INT +;EG-DAG: CNDE_INT +;EG-DAG: CNDE_INT + +;SI-LABEL: {{^}}ashr_v4i64: +;SI: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + +;VI-LABEL: {{^}}ashr_v4i64: +;VI: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +;VI: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +;VI: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +;VI: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1 diff --git a/test/CodeGen/R600/srem.ll b/test/CodeGen/R600/srem.ll index 65e3395..510db0e 100644 --- a/test/CodeGen/R600/srem.ll +++ b/test/CodeGen/R600/srem.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI < %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s define void @srem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { @@ -17,6 +18,19 @@ define void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { ret void } +; FUNC-LABEL: {{^}}srem_i32_7: +; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x92492493 +; SI: v_mul_hi_i32 {{v[0-9]+}}, [[MAGIC]], +; SI: v_mul_lo_i32 +; SI: v_sub_i32 +; SI: s_endpgm +define void @srem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %num = load i32 addrspace(1) * %in + %result = srem i32 %num, 7 + store i32 %result, i32 addrspace(1)* %out + ret void +} + define void @srem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %den_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 %num = load <2 x i32> addrspace(1) * %in @@ -48,3 +62,51 @@ define void @srem_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* store <4 x i32> %result, <4 x i32> addrspace(1)* %out ret void } + +define void @srem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { + %den_ptr = getelementptr i64 addrspace(1)* %in, i64 1 + %num = load i64 addrspace(1) * %in + %den = load i64 addrspace(1) * %den_ptr + %result = srem i64 %num, %den + store i64 %result, i64 addrspace(1)* %out + ret void +} + +define void @srem_i64_4(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { + %num = load i64 addrspace(1) * %in + %result = srem i64 %num, 4 + store i64 %result, i64 addrspace(1)* %out + ret void +} + +define void @srem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { + %den_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1 + %num = load <2 x i64> addrspace(1) * %in + %den = load <2 x i64> addrspace(1) * %den_ptr + %result = srem <2 x i64> %num, %den + store <2 x i64> %result, <2 x i64> addrspace(1)* %out + ret void +} + +define void @srem_v2i64_4(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { + %num = load <2 x i64> addrspace(1) * %in + %result = srem <2 x i64> %num, <i64 4, i64 4> + store <2 x i64> %result, <2 x i64> addrspace(1)* %out + ret void +} + +define void @srem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { + %den_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1 + %num = load <4 x i64> addrspace(1) * %in + %den = load <4 x i64> addrspace(1) * %den_ptr + %result = srem <4 x i64> %num, %den + store <4 x i64> %result, <4 x i64> addrspace(1)* %out + ret void +} + +define void @srem_v4i64_4(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { + %num = load <4 x i64> addrspace(1) * %in + %result = srem <4 x i64> %num, <i64 4, i64 4, i64 4, i64 4> + store <4 x i64> %result, <4 x i64> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/srl.ll b/test/CodeGen/R600/srl.ll index 8c5daf6..1f9b620 100644 --- a/test/CodeGen/R600/srl.ll +++ b/test/CodeGen/R600/srl.ll @@ -1,166 +1,185 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}lshr_i32: +; SI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1 + %a = load i32 addrspace(1)* %in + %b = load i32 addrspace(1)* %b_ptr + %result = lshr i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} -;EG-CHECK: {{^}}lshr_v2i32: -;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; FUNC-LABEL: {{^}}lshr_v2i32: +; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: {{^}}lshr_v2i32: -;SI-CHECK: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 - %a = load <2 x i32> addrspace(1) * %in - %b = load <2 x i32> addrspace(1) * %b_ptr + %a = load <2 x i32> addrspace(1)* %in + %b = load <2 x i32> addrspace(1)* %b_ptr %result = lshr <2 x i32> %a, %b store <2 x i32> %result, <2 x i32> addrspace(1)* %out ret void } - -;EG-CHECK: {{^}}lshr_v4i32: -;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - -;SI-CHECK: {{^}}lshr_v4i32: -;SI-CHECK: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} - +; FUNC-LABEL: {{^}}lshr_v4i32: +; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} + +; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} + +; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 - %a = load <4 x i32> addrspace(1) * %in - %b = load <4 x i32> addrspace(1) * %b_ptr + %a = load <4 x i32> addrspace(1)* %in + %b = load <4 x i32> addrspace(1)* %b_ptr %result = lshr <4 x i32> %a, %b store <4 x i32> %result, <4 x i32> addrspace(1)* %out ret void } -;EG-CHECK: {{^}}lshr_i64: -;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] -;EG-CHECK: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} -;EG-CHECK: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 -;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal -;EG-CHECK-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]] -;EG-CHECK-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} -;EG-CHECK-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} -;EG-CHECK-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} -;EG-CHECK-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal -;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} -;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0 - -;SI-CHECK: {{^}}lshr_i64: -;SI-CHECK: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} - +; FUNC-LABEL: {{^}}lshr_i64: +; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} + +; EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] +; EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} +; EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 +; EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +; EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]] +; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} +; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} +; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} +; EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} +; EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0 define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1 - %a = load i64 addrspace(1) * %in - %b = load i64 addrspace(1) * %b_ptr + %a = load i64 addrspace(1)* %in + %b = load i64 addrspace(1)* %b_ptr %result = lshr i64 %a, %b store i64 %result, i64 addrspace(1)* %out ret void } -;EG-CHECK: {{^}}lshr_v2i64: -;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] -;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] -;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]] -;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]] -;EG-CHECK-DAG: LSHL {{.*}}, 1 -;EG-CHECK-DAG: LSHL {{.*}}, 1 -;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]] -;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]] -;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]] -;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]] -;EG-CHECK-DAG: OR_INT -;EG-CHECK-DAG: OR_INT -;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal -;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal -;EG-CHECK-DAG: LSHR -;EG-CHECK-DAG: LSHR -;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal -;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal -;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 -;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 -;EG-CHECK-DAG: CNDE_INT -;EG-CHECK-DAG: CNDE_INT - -;SI-CHECK: {{^}}lshr_v2i64: -;SI-CHECK: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} -;SI-CHECK: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} - +; FUNC-LABEL: {{^}}lshr_v2i64: +; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + +; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} + +; EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] +; EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] +; EG-DAG: LSHL {{\*? *}}[[COMPSHA]] +; EG-DAG: LSHL {{\*? *}}[[COMPSHB]] +; EG-DAG: LSHL {{.*}}, 1 +; EG-DAG: LSHL {{.*}}, 1 +; EG-DAG: LSHR {{.*}}, [[SHA]] +; EG-DAG: LSHR {{.*}}, [[SHB]] +; EG-DAG: LSHR {{.*}}, [[SHA]] +; EG-DAG: LSHR {{.*}}, [[SHB]] +; EG-DAG: OR_INT +; EG-DAG: OR_INT +; EG-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal +; EG-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal +; EG-DAG: LSHR +; EG-DAG: LSHR +; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal +; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal +; EG-DAG: CNDE_INT {{.*}}, 0.0 +; EG-DAG: CNDE_INT {{.*}}, 0.0 +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1 - %a = load <2 x i64> addrspace(1) * %in - %b = load <2 x i64> addrspace(1) * %b_ptr + %a = load <2 x i64> addrspace(1)* %in + %b = load <2 x i64> addrspace(1)* %b_ptr %result = lshr <2 x i64> %a, %b store <2 x i64> %result, <2 x i64> addrspace(1)* %out ret void } - -;EG-CHECK: {{^}}lshr_v4i64: -;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] -;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] -;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]] -;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]] -;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]] -;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]] -;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHC]] -;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHD]] -;EG-CHECK-DAG: LSHL {{.*}}, 1 -;EG-CHECK-DAG: LSHL {{.*}}, 1 -;EG-CHECK-DAG: LSHL {{.*}}, 1 -;EG-CHECK-DAG: LSHL {{.*}}, 1 -;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]] -;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]] -;EG-CHECK-DAG: LSHR {{.*}}, [[SHC]] -;EG-CHECK-DAG: LSHR {{.*}}, [[SHD]] -;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]] -;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]] -;EG-CHECK-DAG: LSHR {{.*}}, [[SHC]] -;EG-CHECK-DAG: LSHR {{.*}}, [[SHD]] -;EG-CHECK-DAG: OR_INT -;EG-CHECK-DAG: OR_INT -;EG-CHECK-DAG: OR_INT -;EG-CHECK-DAG: OR_INT -;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal -;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal -;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal -;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal -;EG-CHECK-DAG: LSHR -;EG-CHECK-DAG: LSHR -;EG-CHECK-DAG: LSHR -;EG-CHECK-DAG: LSHR -;EG-CHECK-DAG: LSHR -;EG-CHECK-DAG: LSHR -;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal -;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal -;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal -;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal -;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 -;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 -;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 -;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 -;EG-CHECK-DAG: CNDE_INT -;EG-CHECK-DAG: CNDE_INT -;EG-CHECK-DAG: CNDE_INT -;EG-CHECK-DAG: CNDE_INT - -;SI-CHECK: {{^}}lshr_v4i64: -;SI-CHECK: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} -;SI-CHECK: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} -;SI-CHECK: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} -;SI-CHECK: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} - +; FUNC-LABEL: {{^}}lshr_v4i64: +; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + +; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} + +; EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] +; EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] +; EG-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]] +; EG-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]] +; EG-DAG: LSHL {{\*? *}}[[COMPSHA]] +; EG-DAG: LSHL {{\*? *}}[[COMPSHB]] +; EG-DAG: LSHL {{\*? *}}[[COMPSHC]] +; EG-DAG: LSHL {{\*? *}}[[COMPSHD]] +; EG-DAG: LSHL {{.*}}, 1 +; EG-DAG: LSHL {{.*}}, 1 +; EG-DAG: LSHL {{.*}}, 1 +; EG-DAG: LSHL {{.*}}, 1 +; EG-DAG: LSHR {{.*}}, [[SHA]] +; EG-DAG: LSHR {{.*}}, [[SHB]] +; EG-DAG: LSHR {{.*}}, [[SHC]] +; EG-DAG: LSHR {{.*}}, [[SHD]] +; EG-DAG: LSHR {{.*}}, [[SHA]] +; EG-DAG: LSHR {{.*}}, [[SHB]] +; EG-DAG: LSHR {{.*}}, [[SHC]] +; EG-DAG: LSHR {{.*}}, [[SHD]] +; EG-DAG: OR_INT +; EG-DAG: OR_INT +; EG-DAG: OR_INT +; EG-DAG: OR_INT +; EG-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal +; EG-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal +; EG-DAG: ADD_INT {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal +; EG-DAG: ADD_INT {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal +; EG-DAG: LSHR +; EG-DAG: LSHR +; EG-DAG: LSHR +; EG-DAG: LSHR +; EG-DAG: LSHR +; EG-DAG: LSHR +; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal +; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal +; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal +; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal +; EG-DAG: CNDE_INT {{.*}}, 0.0 +; EG-DAG: CNDE_INT {{.*}}, 0.0 +; EG-DAG: CNDE_INT {{.*}}, 0.0 +; EG-DAG: CNDE_INT {{.*}}, 0.0 +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT define void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1 - %a = load <4 x i64> addrspace(1) * %in - %b = load <4 x i64> addrspace(1) * %b_ptr + %a = load <4 x i64> addrspace(1)* %in + %b = load <4 x i64> addrspace(1)* %b_ptr %result = lshr <4 x i64> %a, %b store <4 x i64> %result, <4 x i64> addrspace(1)* %out ret void diff --git a/test/CodeGen/R600/ssubo.ll b/test/CodeGen/R600/ssubo.ll index 8031c6f..09d3959 100644 --- a/test/CodeGen/R600/ssubo.ll +++ b/test/CodeGen/R600/ssubo.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone diff --git a/test/CodeGen/R600/store-barrier.ll b/test/CodeGen/R600/store-barrier.ll index 350b006..ea65bb0 100644 --- a/test/CodeGen/R600/store-barrier.ll +++ b/test/CodeGen/R600/store-barrier.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck --check-prefix=CHECK %s -; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck --check-prefix=CHECK %s ; This test is for a bug in the machine scheduler where stores without ; an underlying object would be moved across the barrier. In this diff --git a/test/CodeGen/R600/store-v3i32.ll b/test/CodeGen/R600/store-v3i32.ll index 0f28f33..33617b5 100644 --- a/test/CodeGen/R600/store-v3i32.ll +++ b/test/CodeGen/R600/store-v3i32.ll @@ -1,5 +1,6 @@ ; XFAIL: * -; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI %s ; 3 vectors have the same size and alignment as 4 vectors, so this ; should be done in a single store. diff --git a/test/CodeGen/R600/store-v3i64.ll b/test/CodeGen/R600/store-v3i64.ll index 247a561..e0c554a 100644 --- a/test/CodeGen/R600/store-v3i64.ll +++ b/test/CodeGen/R600/store-v3i64.ll @@ -1,5 +1,6 @@ ; XFAIL: * -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}global_store_v3i64: ; SI: buffer_store_dwordx4 diff --git a/test/CodeGen/R600/store-vector-ptrs.ll b/test/CodeGen/R600/store-vector-ptrs.ll index aee639b..ba4d94f 100644 --- a/test/CodeGen/R600/store-vector-ptrs.ll +++ b/test/CodeGen/R600/store-vector-ptrs.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s ; This tests for a bug that caused a crash in ; AMDGPUDAGToDAGISel::SelectMUBUFScratch() which is used for selecting diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll index 713ecd6..e4cb313 100644 --- a/test/CodeGen/R600/store.ll +++ b/test/CodeGen/R600/store.ll @@ -1,13 +1,14 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK --check-prefix=FUNC %s -; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=CM-CHECK --check-prefix=FUNC %s -; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s ;===------------------------------------------------------------------------===; ; Global Address Space ;===------------------------------------------------------------------------===; ; FUNC-LABEL: {{^}}store_i1: -; EG-CHECK: MEM_RAT MSKOR -; SI-CHECK: buffer_store_byte +; EG: MEM_RAT MSKOR +; SI: buffer_store_byte define void @store_i1(i1 addrspace(1)* %out) { entry: store i1 true, i1 addrspace(1)* %out @@ -15,27 +16,29 @@ entry: } ; i8 store -; EG-CHECK-LABEL: {{^}}store_i8: -; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X -; EG-CHECK: VTX_READ_8 [[VAL:T[0-9]\.X]], [[VAL]] +; EG-LABEL: {{^}}store_i8: +; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X + ; IG 0: Get the byte index and truncate the value -; EG-CHECK: AND_INT T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x -; EG-CHECK-NEXT: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.y -; EG-CHECK-NEXT: 3(4.203895e-45), 255(3.573311e-43) +; EG: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x +; EG: LSHL T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x +; EG: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.y +; EG-NEXT: 3(4.203895e-45), 255(3.573311e-43) + + ; IG 1: Truncate the calculated the shift amount for the mask -; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x -; EG-CHECK-NEXT: 3 + ; IG 2: Shift the value and the mask -; EG-CHECK: LSHL T[[RW_GPR]].X, T{{[0-9]}}.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]] -; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]] -; EG-CHECK-NEXT: 255 +; EG: LSHL T[[RW_GPR]].X, PS, PV.[[SHIFT_CHAN]] +; EG: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]] +; EG-NEXT: 255 ; IG 3: Initialize the Y and Z channels to zero ; XXX: An optimal scheduler should merge this into one of the prevous IGs. -; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0 -; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0 +; EG: MOV T[[RW_GPR]].Y, 0.0 +; EG: MOV * T[[RW_GPR]].Z, 0.0 -; SI-CHECK-LABEL: {{^}}store_i8: -; SI-CHECK: buffer_store_byte +; SI-LABEL: {{^}}store_i8: +; SI: buffer_store_byte define void @store_i8(i8 addrspace(1)* %out, i8 %in) { entry: @@ -44,39 +47,44 @@ entry: } ; i16 store -; EG-CHECK-LABEL: {{^}}store_i16: -; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X -; EG-CHECK: VTX_READ_16 [[VAL:T[0-9]\.X]], [[VAL]] +; EG-LABEL: {{^}}store_i16: +; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X + ; IG 0: Get the byte index and truncate the value -; EG-CHECK: AND_INT T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x -; EG-CHECK: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.y -; EG-CHECK-NEXT: 3(4.203895e-45), 65535(9.183409e-41) + + +; EG: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x +; EG-NEXT: 3(4.203895e-45), + +; EG: LSHL T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x +; EG: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.y + +; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41) ; IG 1: Truncate the calculated the shift amount for the mask -; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x -; EG-CHECK: 3 + ; IG 2: Shift the value and the mask -; EG-CHECK: LSHL T[[RW_GPR]].X, T{{[0-9]}}.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]] -; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]] -; EG-CHECK-NEXT: 65535 +; EG: LSHL T[[RW_GPR]].X, PS, PV.[[SHIFT_CHAN]] +; EG: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]] +; EG-NEXT: 65535 ; IG 3: Initialize the Y and Z channels to zero ; XXX: An optimal scheduler should merge this into one of the prevous IGs. -; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0 -; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0 +; EG: MOV T[[RW_GPR]].Y, 0.0 +; EG: MOV * T[[RW_GPR]].Z, 0.0 -; SI-CHECK-LABEL: {{^}}store_i16: -; SI-CHECK: buffer_store_short +; SI-LABEL: {{^}}store_i16: +; SI: buffer_store_short define void @store_i16(i16 addrspace(1)* %out, i16 %in) { entry: store i16 %in, i16 addrspace(1)* %out ret void } -; EG-CHECK-LABEL: {{^}}store_v2i8: -; EG-CHECK: MEM_RAT MSKOR -; EG-CHECK-NOT: MEM_RAT MSKOR -; SI-CHECK-LABEL: {{^}}store_v2i8: -; SI-CHECK: buffer_store_byte -; SI-CHECK: buffer_store_byte +; EG-LABEL: {{^}}store_v2i8: +; EG: MEM_RAT MSKOR +; EG-NOT: MEM_RAT MSKOR +; SI-LABEL: {{^}}store_v2i8: +; SI: buffer_store_byte +; SI: buffer_store_byte define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) { entry: %0 = trunc <2 x i32> %in to <2 x i8> @@ -85,13 +93,13 @@ entry: } -; EG-CHECK-LABEL: {{^}}store_v2i16: -; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW -; CM-CHECK-LABEL: {{^}}store_v2i16: -; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD -; SI-CHECK-LABEL: {{^}}store_v2i16: -; SI-CHECK: buffer_store_short -; SI-CHECK: buffer_store_short +; EG-LABEL: {{^}}store_v2i16: +; EG: MEM_RAT_CACHELESS STORE_RAW +; CM-LABEL: {{^}}store_v2i16: +; CM: MEM_RAT_CACHELESS STORE_DWORD +; SI-LABEL: {{^}}store_v2i16: +; SI: buffer_store_short +; SI: buffer_store_short define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) { entry: %0 = trunc <2 x i32> %in to <2 x i16> @@ -99,15 +107,15 @@ entry: ret void } -; EG-CHECK-LABEL: {{^}}store_v4i8: -; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW -; CM-CHECK-LABEL: {{^}}store_v4i8: -; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD -; SI-CHECK-LABEL: {{^}}store_v4i8: -; SI-CHECK: buffer_store_byte -; SI-CHECK: buffer_store_byte -; SI-CHECK: buffer_store_byte -; SI-CHECK: buffer_store_byte +; EG-LABEL: {{^}}store_v4i8: +; EG: MEM_RAT_CACHELESS STORE_RAW +; CM-LABEL: {{^}}store_v4i8: +; CM: MEM_RAT_CACHELESS STORE_DWORD +; SI-LABEL: {{^}}store_v4i8: +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte define void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) { entry: %0 = trunc <4 x i32> %in to <4 x i8> @@ -116,30 +124,30 @@ entry: } ; floating-point store -; EG-CHECK-LABEL: {{^}}store_f32: -; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1 -; CM-CHECK-LABEL: {{^}}store_f32: -; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-CHECK-LABEL: {{^}}store_f32: -; SI-CHECK: buffer_store_dword +; EG-LABEL: {{^}}store_f32: +; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1 +; CM-LABEL: {{^}}store_f32: +; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}} +; SI-LABEL: {{^}}store_f32: +; SI: buffer_store_dword define void @store_f32(float addrspace(1)* %out, float %in) { store float %in, float addrspace(1)* %out ret void } -; EG-CHECK-LABEL: {{^}}store_v4i16: -; EG-CHECK: MEM_RAT MSKOR -; EG-CHECK: MEM_RAT MSKOR -; EG-CHECK: MEM_RAT MSKOR -; EG-CHECK: MEM_RAT MSKOR -; EG-CHECK-NOT: MEM_RAT MSKOR -; SI-CHECK-LABEL: {{^}}store_v4i16: -; SI-CHECK: buffer_store_short -; SI-CHECK: buffer_store_short -; SI-CHECK: buffer_store_short -; SI-CHECK: buffer_store_short -; SI-CHECK-NOT: buffer_store_byte +; EG-LABEL: {{^}}store_v4i16: +; EG: MEM_RAT MSKOR +; EG: MEM_RAT MSKOR +; EG: MEM_RAT MSKOR +; EG: MEM_RAT MSKOR +; EG-NOT: MEM_RAT MSKOR +; SI-LABEL: {{^}}store_v4i16: +; SI: buffer_store_short +; SI: buffer_store_short +; SI: buffer_store_short +; SI: buffer_store_short +; SI-NOT: buffer_store_byte define void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) { entry: %0 = trunc <4 x i32> %in to <4 x i16> @@ -148,12 +156,12 @@ entry: } ; vec2 floating-point stores -; EG-CHECK-LABEL: {{^}}store_v2f32: -; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW -; CM-CHECK-LABEL: {{^}}store_v2f32: -; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD -; SI-CHECK-LABEL: {{^}}store_v2f32: -; SI-CHECK: buffer_store_dwordx2 +; EG-LABEL: {{^}}store_v2f32: +; EG: MEM_RAT_CACHELESS STORE_RAW +; CM-LABEL: {{^}}store_v2f32: +; CM: MEM_RAT_CACHELESS STORE_DWORD +; SI-LABEL: {{^}}store_v2f32: +; SI: buffer_store_dwordx2 define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) { entry: @@ -163,14 +171,14 @@ entry: ret void } -; EG-CHECK-LABEL: {{^}}store_v4i32: -; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW -; EG-CHECK-NOT: MEM_RAT_CACHELESS STORE_RAW -; CM-CHECK-LABEL: {{^}}store_v4i32: -; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD -; CM-CHECK-NOT: MEM_RAT_CACHELESS STORE_DWORD -; SI-CHECK-LABEL: {{^}}store_v4i32: -; SI-CHECK: buffer_store_dwordx4 +; EG-LABEL: {{^}}store_v4i32: +; EG: MEM_RAT_CACHELESS STORE_RAW +; EG-NOT: MEM_RAT_CACHELESS STORE_RAW +; CM-LABEL: {{^}}store_v4i32: +; CM: MEM_RAT_CACHELESS STORE_DWORD +; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD +; SI-LABEL: {{^}}store_v4i32: +; SI: buffer_store_dwordx4 define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) { entry: store <4 x i32> %in, <4 x i32> addrspace(1)* %out @@ -178,8 +186,8 @@ entry: } ; FUNC-LABEL: {{^}}store_i64_i8: -; EG-CHECK: MEM_RAT MSKOR -; SI-CHECK: buffer_store_byte +; EG: MEM_RAT MSKOR +; SI: buffer_store_byte define void @store_i64_i8(i8 addrspace(1)* %out, i64 %in) { entry: %0 = trunc i64 %in to i8 @@ -188,8 +196,8 @@ entry: } ; FUNC-LABEL: {{^}}store_i64_i16: -; EG-CHECK: MEM_RAT MSKOR -; SI-CHECK: buffer_store_short +; EG: MEM_RAT MSKOR +; SI: buffer_store_short define void @store_i64_i16(i16 addrspace(1)* %out, i64 %in) { entry: %0 = trunc i64 %in to i16 @@ -202,89 +210,89 @@ entry: ;===------------------------------------------------------------------------===; ; FUNC-LABEL: {{^}}store_local_i1: -; EG-CHECK: LDS_BYTE_WRITE -; SI-CHECK: ds_write_b8 +; EG: LDS_BYTE_WRITE +; SI: ds_write_b8 define void @store_local_i1(i1 addrspace(3)* %out) { entry: store i1 true, i1 addrspace(3)* %out ret void } -; EG-CHECK-LABEL: {{^}}store_local_i8: -; EG-CHECK: LDS_BYTE_WRITE -; SI-CHECK-LABEL: {{^}}store_local_i8: -; SI-CHECK: ds_write_b8 +; EG-LABEL: {{^}}store_local_i8: +; EG: LDS_BYTE_WRITE +; SI-LABEL: {{^}}store_local_i8: +; SI: ds_write_b8 define void @store_local_i8(i8 addrspace(3)* %out, i8 %in) { store i8 %in, i8 addrspace(3)* %out ret void } -; EG-CHECK-LABEL: {{^}}store_local_i16: -; EG-CHECK: LDS_SHORT_WRITE -; SI-CHECK-LABEL: {{^}}store_local_i16: -; SI-CHECK: ds_write_b16 +; EG-LABEL: {{^}}store_local_i16: +; EG: LDS_SHORT_WRITE +; SI-LABEL: {{^}}store_local_i16: +; SI: ds_write_b16 define void @store_local_i16(i16 addrspace(3)* %out, i16 %in) { store i16 %in, i16 addrspace(3)* %out ret void } -; EG-CHECK-LABEL: {{^}}store_local_v2i16: -; EG-CHECK: LDS_WRITE -; CM-CHECK-LABEL: {{^}}store_local_v2i16: -; CM-CHECK: LDS_WRITE -; SI-CHECK-LABEL: {{^}}store_local_v2i16: -; SI-CHECK: ds_write_b16 -; SI-CHECK: ds_write_b16 +; EG-LABEL: {{^}}store_local_v2i16: +; EG: LDS_WRITE +; CM-LABEL: {{^}}store_local_v2i16: +; CM: LDS_WRITE +; SI-LABEL: {{^}}store_local_v2i16: +; SI: ds_write_b16 +; SI: ds_write_b16 define void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) { entry: store <2 x i16> %in, <2 x i16> addrspace(3)* %out ret void } -; EG-CHECK-LABEL: {{^}}store_local_v4i8: -; EG-CHECK: LDS_WRITE -; CM-CHECK-LABEL: {{^}}store_local_v4i8: -; CM-CHECK: LDS_WRITE -; SI-CHECK-LABEL: {{^}}store_local_v4i8: -; SI-CHECK: ds_write_b8 -; SI-CHECK: ds_write_b8 -; SI-CHECK: ds_write_b8 -; SI-CHECK: ds_write_b8 +; EG-LABEL: {{^}}store_local_v4i8: +; EG: LDS_WRITE +; CM-LABEL: {{^}}store_local_v4i8: +; CM: LDS_WRITE +; SI-LABEL: {{^}}store_local_v4i8: +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 define void @store_local_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> %in) { entry: store <4 x i8> %in, <4 x i8> addrspace(3)* %out ret void } -; EG-CHECK-LABEL: {{^}}store_local_v2i32: -; EG-CHECK: LDS_WRITE -; EG-CHECK: LDS_WRITE -; CM-CHECK-LABEL: {{^}}store_local_v2i32: -; CM-CHECK: LDS_WRITE -; CM-CHECK: LDS_WRITE -; SI-CHECK-LABEL: {{^}}store_local_v2i32: -; SI-CHECK: ds_write_b64 +; EG-LABEL: {{^}}store_local_v2i32: +; EG: LDS_WRITE +; EG: LDS_WRITE +; CM-LABEL: {{^}}store_local_v2i32: +; CM: LDS_WRITE +; CM: LDS_WRITE +; SI-LABEL: {{^}}store_local_v2i32: +; SI: ds_write_b64 define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) { entry: store <2 x i32> %in, <2 x i32> addrspace(3)* %out ret void } -; EG-CHECK-LABEL: {{^}}store_local_v4i32: -; EG-CHECK: LDS_WRITE -; EG-CHECK: LDS_WRITE -; EG-CHECK: LDS_WRITE -; EG-CHECK: LDS_WRITE -; CM-CHECK-LABEL: {{^}}store_local_v4i32: -; CM-CHECK: LDS_WRITE -; CM-CHECK: LDS_WRITE -; CM-CHECK: LDS_WRITE -; CM-CHECK: LDS_WRITE -; SI-CHECK-LABEL: {{^}}store_local_v4i32: -; SI-CHECK: ds_write_b32 -; SI-CHECK: ds_write_b32 -; SI-CHECK: ds_write_b32 -; SI-CHECK: ds_write_b32 +; EG-LABEL: {{^}}store_local_v4i32: +; EG: LDS_WRITE +; EG: LDS_WRITE +; EG: LDS_WRITE +; EG: LDS_WRITE +; CM-LABEL: {{^}}store_local_v4i32: +; CM: LDS_WRITE +; CM: LDS_WRITE +; CM: LDS_WRITE +; CM: LDS_WRITE +; SI-LABEL: {{^}}store_local_v4i32: +; SI: ds_write_b32 +; SI: ds_write_b32 +; SI: ds_write_b32 +; SI: ds_write_b32 define void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) { entry: store <4 x i32> %in, <4 x i32> addrspace(3)* %out @@ -292,8 +300,8 @@ entry: } ; FUNC-LABEL: {{^}}store_local_i64_i8: -; EG-CHECK: LDS_BYTE_WRITE -; SI-CHECK: ds_write_b8 +; EG: LDS_BYTE_WRITE +; SI: ds_write_b8 define void @store_local_i64_i8(i8 addrspace(3)* %out, i64 %in) { entry: %0 = trunc i64 %in to i8 @@ -302,8 +310,8 @@ entry: } ; FUNC-LABEL: {{^}}store_local_i64_i16: -; EG-CHECK: LDS_SHORT_WRITE -; SI-CHECK: ds_write_b16 +; EG: LDS_SHORT_WRITE +; SI: ds_write_b16 define void @store_local_i64_i16(i16 addrspace(3)* %out, i64 %in) { entry: %0 = trunc i64 %in to i16 @@ -318,12 +326,12 @@ entry: ; Evergreen / Northern Islands don't support 64-bit stores yet, so there should ; be two 32-bit stores. -; EG-CHECK-LABEL: {{^}}vecload2: -; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW -; CM-CHECK-LABEL: {{^}}vecload2: -; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD -; SI-CHECK-LABEL: {{^}}vecload2: -; SI-CHECK: buffer_store_dwordx2 +; EG-LABEL: {{^}}vecload2: +; EG: MEM_RAT_CACHELESS STORE_RAW +; CM-LABEL: {{^}}vecload2: +; CM: MEM_RAT_CACHELESS STORE_DWORD +; SI-LABEL: {{^}}vecload2: +; SI: buffer_store_dwordx2 define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 { entry: %0 = load i32 addrspace(2)* %mem, align 4 @@ -341,14 +349,14 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"= ; FUNC-LABEL: {{^}}"i128-const-store": ; FIXME: We should be able to to this with one store instruction -; EG-CHECK: STORE_RAW -; EG-CHECK: STORE_RAW -; EG-CHECK: STORE_RAW -; EG-CHECK: STORE_RAW -; CM-CHECK: STORE_DWORD -; CM-CHECK: STORE_DWORD -; CM-CHECK: STORE_DWORD -; CM-CHECK: STORE_DWORD +; EG: STORE_RAW +; EG: STORE_RAW +; EG: STORE_RAW +; EG: STORE_RAW +; CM: STORE_DWORD +; CM: STORE_DWORD +; CM: STORE_DWORD +; CM: STORE_DWORD ; SI: buffer_store_dwordx2 ; SI: buffer_store_dwordx2 define void @i128-const-store(i32 addrspace(1)* %out) { diff --git a/test/CodeGen/R600/store.r600.ll b/test/CodeGen/R600/store.r600.ll index 3df30d4..2197260 100644 --- a/test/CodeGen/R600/store.r600.ll +++ b/test/CodeGen/R600/store.r600.ll @@ -1,10 +1,10 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s ; XXX: Merge this test into store.ll once it is supported on SI ; v4i32 store -; EG-CHECK: {{^}}store_v4i32: -; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1 +; EG: {{^}}store_v4i32: +; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1 define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %1 = load <4 x i32> addrspace(1) * %in @@ -13,8 +13,8 @@ define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* % } ; v4f32 store -; EG-CHECK: {{^}}store_v4f32: -; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1 +; EG: {{^}}store_v4f32: +; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1 define void @store_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { %1 = load <4 x float> addrspace(1) * %in store <4 x float> %1, <4 x float> addrspace(1)* %out diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll index 2bbc0cf..be48e18 100644 --- a/test/CodeGen/R600/sub.ll +++ b/test/CodeGen/R600/sub.ll @@ -1,16 +1,31 @@ -;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -;RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + declare i32 @llvm.r600.read.tidig.x() readnone -;FUNC-LABEL: {{^}}test2: -;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; FUNC-LABEL: {{^}}test_sub_i32: +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +; SI: v_subrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1 + %a = load i32 addrspace(1)* %in + %b = load i32 addrspace(1)* %b_ptr + %result = sub i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} -;SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { +; FUNC-LABEL: {{^}}test_sub_v2i32: +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} + +define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 %a = load <2 x i32> addrspace(1) * %in %b = load <2 x i32> addrspace(1) * %b_ptr @@ -19,18 +34,18 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { ret void } -;FUNC-LABEL: {{^}}test4: -;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; FUNC-LABEL: {{^}}test_sub_v4i32: +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { +define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 %a = load <4 x i32> addrspace(1) * %in %b = load <4 x i32> addrspace(1) * %b_ptr @@ -73,3 +88,39 @@ define void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias store i64 %result, i64 addrspace(1)* %out, align 8 ret void } + +; FUNC-LABEL: {{^}}v_test_sub_v2i64: +; SI: v_sub_i32_e32 +; SI: v_subb_u32_e32 +; SI: v_sub_i32_e32 +; SI: v_subb_u32_e32 +define void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) { + %tid = call i32 @llvm.r600.read.tidig.x() readnone + %a_ptr = getelementptr <2 x i64> addrspace(1)* %inA, i32 %tid + %b_ptr = getelementptr <2 x i64> addrspace(1)* %inB, i32 %tid + %a = load <2 x i64> addrspace(1)* %a_ptr + %b = load <2 x i64> addrspace(1)* %b_ptr + %result = sub <2 x i64> %a, %b + store <2 x i64> %result, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_test_sub_v4i64: +; SI: v_sub_i32_e32 +; SI: v_subb_u32_e32 +; SI: v_sub_i32_e32 +; SI: v_subb_u32_e32 +; SI: v_sub_i32_e32 +; SI: v_subb_u32_e32 +; SI: v_sub_i32_e32 +; SI: v_subb_u32_e32 +define void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* noalias %inA, <4 x i64> addrspace(1)* noalias %inB) { + %tid = call i32 @llvm.r600.read.tidig.x() readnone + %a_ptr = getelementptr <4 x i64> addrspace(1)* %inA, i32 %tid + %b_ptr = getelementptr <4 x i64> addrspace(1)* %inB, i32 %tid + %a = load <4 x i64> addrspace(1)* %a_ptr + %b = load <4 x i64> addrspace(1)* %b_ptr + %result = sub <4 x i64> %a, %b + store <4 x i64> %result, <4 x i64> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/subreg-coalescer-crash.ll b/test/CodeGen/R600/subreg-coalescer-crash.ll new file mode 100644 index 0000000..c4dae47 --- /dev/null +++ b/test/CodeGen/R600/subreg-coalescer-crash.ll @@ -0,0 +1,109 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -o - %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - %s + +; SI-LABEL:{{^}}row_filter_C1_D0: +; SI: s_endpgm +; Function Attrs: nounwind +define void @row_filter_C1_D0() { +entry: + br i1 undef, label %for.inc.1, label %do.body.preheader + +do.body.preheader: ; preds = %entry + %0 = insertelement <4 x i32> zeroinitializer, i32 undef, i32 1 + br i1 undef, label %do.body56.1, label %do.body90 + +do.body90: ; preds = %do.body56.2, %do.body56.1, %do.body.preheader + %1 = phi <4 x i32> [ %6, %do.body56.2 ], [ %5, %do.body56.1 ], [ %0, %do.body.preheader ] + %2 = insertelement <4 x i32> %1, i32 undef, i32 2 + %3 = insertelement <4 x i32> %2, i32 undef, i32 3 + br i1 undef, label %do.body124.1, label %do.body.1562.preheader + +do.body.1562.preheader: ; preds = %do.body124.1, %do.body90 + %storemerge = phi <4 x i32> [ %3, %do.body90 ], [ %7, %do.body124.1 ] + %4 = insertelement <4 x i32> undef, i32 undef, i32 1 + br label %for.inc.1 + +do.body56.1: ; preds = %do.body.preheader + %5 = insertelement <4 x i32> %0, i32 undef, i32 1 + %or.cond472.1 = or i1 undef, undef + br i1 %or.cond472.1, label %do.body56.2, label %do.body90 + +do.body56.2: ; preds = %do.body56.1 + %6 = insertelement <4 x i32> %5, i32 undef, i32 1 + br label %do.body90 + +do.body124.1: ; preds = %do.body90 + %7 = insertelement <4 x i32> %3, i32 undef, i32 3 + br label %do.body.1562.preheader + +for.inc.1: ; preds = %do.body.1562.preheader, %entry + %storemerge591 = phi <4 x i32> [ zeroinitializer, %entry ], [ %storemerge, %do.body.1562.preheader ] + %add.i495 = add <4 x i32> %storemerge591, undef + unreachable +} + +; SI-LABEL: {{^}}foo: +; SI: s_endpgm +define void @foo() #0 { +bb: + br i1 undef, label %bb2, label %bb1 + +bb1: ; preds = %bb + br i1 undef, label %bb4, label %bb6 + +bb2: ; preds = %bb4, %bb + %tmp = phi float [ %tmp5, %bb4 ], [ 0.000000e+00, %bb ] + br i1 undef, label %bb9, label %bb13 + +bb4: ; preds = %bb7, %bb6, %bb1 + %tmp5 = phi float [ undef, %bb1 ], [ undef, %bb6 ], [ %tmp8, %bb7 ] + br label %bb2 + +bb6: ; preds = %bb1 + br i1 undef, label %bb7, label %bb4 + +bb7: ; preds = %bb6 + %tmp8 = fmul float undef, undef + br label %bb4 + +bb9: ; preds = %bb2 + %tmp10 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 2) + %tmp11 = extractelement <4 x float> %tmp10, i32 1 + %tmp12 = extractelement <4 x float> %tmp10, i32 3 + br label %bb14 + +bb13: ; preds = %bb2 + br i1 undef, label %bb23, label %bb24 + +bb14: ; preds = %bb27, %bb24, %bb9 + %tmp15 = phi float [ %tmp12, %bb9 ], [ undef, %bb27 ], [ 0.000000e+00, %bb24 ] + %tmp16 = phi float [ %tmp11, %bb9 ], [ undef, %bb27 ], [ %tmp25, %bb24 ] + %tmp17 = fmul float 10.5, %tmp16 + %tmp18 = fmul float 11.5, %tmp15 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp18, float %tmp17, float %tmp17, float %tmp17) + ret void + +bb23: ; preds = %bb13 + br i1 undef, label %bb24, label %bb26 + +bb24: ; preds = %bb26, %bb23, %bb13 + %tmp25 = phi float [ %tmp, %bb13 ], [ %tmp, %bb26 ], [ 0.000000e+00, %bb23 ] + br i1 undef, label %bb27, label %bb14 + +bb26: ; preds = %bb23 + br label %bb24 + +bb27: ; preds = %bb24 + br label %bb14 +} + +; Function Attrs: nounwind readnone +declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.SI.packf16(float, float) #1 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/R600/swizzle-export.ll b/test/CodeGen/R600/swizzle-export.ll index 3e6f7a7..5eaca76 100644 --- a/test/CodeGen/R600/swizzle-export.ll +++ b/test/CodeGen/R600/swizzle-export.ll @@ -1,10 +1,10 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s -;EG-CHECK: {{^}}main: -;EG-CHECK: EXPORT T{{[0-9]+}}.XYXX -;EG-CHECK: EXPORT T{{[0-9]+}}.ZXXX -;EG-CHECK: EXPORT T{{[0-9]+}}.XXWX -;EG-CHECK: EXPORT T{{[0-9]+}}.XXXW +;EG: {{^}}main: +;EG: EXPORT T{{[0-9]+}}.XYXX +;EG: EXPORT T{{[0-9]+}}.ZXXX +;EG: EXPORT T{{[0-9]+}}.XXWX +;EG: EXPORT T{{[0-9]+}}.XXXW define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { main_body: @@ -92,9 +92,9 @@ main_body: ret void } -; EG-CHECK: {{^}}main2: -; EG-CHECK: T{{[0-9]+}}.XY__ -; EG-CHECK: T{{[0-9]+}}.ZXY0 +; EG: {{^}}main2: +; EG: T{{[0-9]+}}.XY__ +; EG: T{{[0-9]+}}.ZXY0 define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { main_body: diff --git a/test/CodeGen/R600/trunc-cmp-constant.ll b/test/CodeGen/R600/trunc-cmp-constant.ll new file mode 100644 index 0000000..a097ab0 --- /dev/null +++ b/test/CodeGen/R600/trunc-cmp-constant.ll @@ -0,0 +1,170 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +; FUNC-LABEL {{^}}sextload_i1_to_i32_trunc_cmp_eq_0: +; SI: buffer_load_ubyte [[LOAD:v[0-9]+]] +; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]] +; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[TMP]], 1{{$}} +; SI: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -1{{$}} +; SI: v_cndmask_b32_e64 +; SI: buffer_store_byte +define void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %load = load i1 addrspace(1)* %in + %ext = sext i1 %load to i32 + %cmp = icmp eq i32 %ext, 0 + store i1 %cmp, i1 addrspace(1)* %out + ret void +} + +; FIXME: The negate should be inverting the compare. +; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_0: +; SI: buffer_load_ubyte [[LOAD:v[0-9]+]] +; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]] +; SI: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 1{{$}} +; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], [[CMP0]], -1 +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]] +; SI-NEXT: buffer_store_byte [[RESULT]] +define void @zextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %load = load i1 addrspace(1)* %in + %ext = zext i1 %load to i32 + %cmp = icmp eq i32 %ext, 0 + store i1 %cmp, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_1: +; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} +; SI: buffer_store_byte [[RESULT]] +define void @sextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %load = load i1 addrspace(1)* %in + %ext = sext i1 %load to i32 + %cmp = icmp eq i32 %ext, 1 + store i1 %cmp, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_1: +; SI: buffer_load_ubyte [[LOAD:v[0-9]+]] +; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]] +; SI-NEXT: buffer_store_byte [[RESULT]] +define void @zextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %load = load i1 addrspace(1)* %in + %ext = zext i1 %load to i32 + %cmp = icmp eq i32 %ext, 1 + store i1 %cmp, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_neg1: +; SI: buffer_load_ubyte [[LOAD:v[0-9]+]] +; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]] +; SI-NEXT: buffer_store_byte [[RESULT]] +define void @sextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %load = load i1 addrspace(1)* %in + %ext = sext i1 %load to i32 + %cmp = icmp eq i32 %ext, -1 + store i1 %cmp, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_neg1: +; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} +; SI: buffer_store_byte [[RESULT]] +define void @zextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %load = load i1 addrspace(1)* %in + %ext = zext i1 %load to i32 + %cmp = icmp eq i32 %ext, -1 + store i1 %cmp, i1 addrspace(1)* %out + ret void +} + + +; FUNC-LABEL {{^}}sextload_i1_to_i32_trunc_cmp_ne_0: +; SI: buffer_load_ubyte [[LOAD:v[0-9]+]] +; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]] +; SI-NEXT: buffer_store_byte [[RESULT]] +define void @sextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %load = load i1 addrspace(1)* %in + %ext = sext i1 %load to i32 + %cmp = icmp ne i32 %ext, 0 + store i1 %cmp, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_0: +; SI: buffer_load_ubyte [[LOAD:v[0-9]+]] +; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]] +; SI-NEXT: buffer_store_byte [[RESULT]] +define void @zextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %load = load i1 addrspace(1)* %in + %ext = zext i1 %load to i32 + %cmp = icmp ne i32 %ext, 0 + store i1 %cmp, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_1: +; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}} +; SI: buffer_store_byte [[RESULT]] +define void @sextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %load = load i1 addrspace(1)* %in + %ext = sext i1 %load to i32 + %cmp = icmp ne i32 %ext, 1 + store i1 %cmp, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_1: +; SI: buffer_load_ubyte [[LOAD:v[0-9]+]] +; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]] +; SI: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 1{{$}} +; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], [[CMP0]], -1 +; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]] +; SI-NEXT: buffer_store_byte [[RESULT]] +define void @zextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %load = load i1 addrspace(1)* %in + %ext = zext i1 %load to i32 + %cmp = icmp ne i32 %ext, 1 + store i1 %cmp, i1 addrspace(1)* %out + ret void +} + +; FIXME: This should be one compare. +; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_neg1: +; XSI: buffer_load_ubyte [[LOAD:v[0-9]+]] +; XSI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]] +; XSI: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 0{{$}} +; XSI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP0]] +; XSI-NEXT: buffer_store_byte [[RESULT]] +define void @sextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %load = load i1 addrspace(1)* %in + %ext = sext i1 %load to i32 + %cmp = icmp ne i32 %ext, -1 + store i1 %cmp, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_neg1: +; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}} +; SI: buffer_store_byte [[RESULT]] +define void @zextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %load = load i1 addrspace(1)* %in + %ext = zext i1 %load to i32 + %cmp = icmp ne i32 %ext, -1 + store i1 %cmp, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}masked_load_i1_to_i32_trunc_cmp_ne_neg1: +; SI: buffer_load_sbyte [[LOAD:v[0-9]+]] +; SI: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[LOAD]], -1{{$}} +; SI-NEXT: v_cndmask_b32_e64 +; SI-NEXT: buffer_store_byte +define void @masked_load_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { + %load = load i8 addrspace(1)* %in + %masked = and i8 %load, 255 + %ext = sext i8 %masked to i32 + %cmp = icmp ne i32 %ext, -1 + store i1 %cmp, i1 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/trunc-store-i1.ll b/test/CodeGen/R600/trunc-store-i1.ll index 3c1b19f..b71a838 100644 --- a/test/CodeGen/R600/trunc-store-i1.ll +++ b/test/CodeGen/R600/trunc-store-i1.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}global_truncstore_i32_to_i1: diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/R600/trunc.ll index 7519d10..fa44264 100644 --- a/test/CodeGen/R600/trunc.ll +++ b/test/CodeGen/R600/trunc.ll @@ -1,6 +1,8 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s +declare i32 @llvm.r600.read.tidig.x() nounwind readnone + define void @trunc_i64_to_i32_store(i32 addrspace(1)* %out, i64 %in) { ; SI-LABEL: {{^}}trunc_i64_to_i32_store: ; SI: s_load_dword [[SLOAD:s[0-9]+]], s[0:1], 0xb @@ -34,6 +36,8 @@ define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) { ; SI: s_lshl_b64 s{{\[}}[[LO_SHL:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_SREG]]:{{[0-9]+\]}}, 2 ; SI: s_add_u32 s[[LO_SREG2:[0-9]+]], s[[LO_SHL]], ; SI: s_addc_u32 +; SI: v_mov_b32_e32 +; SI: v_mov_b32_e32 ; SI: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]] ; SI: buffer_store_dword v[[LO_VREG]], define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) { @@ -65,3 +69,32 @@ define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) { store i32 %result, i32 addrspace(1)* %out, align 4 ret void } + +; SI-LABEL: {{^}}s_trunc_i64_to_i1: +; SI: s_load_dwordx2 s{{\[}}[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI: v_and_b32_e64 [[MASKED:v[0-9]+]], 1, s[[SLO]] +; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[MASKED]], 1 +; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, [[CMP]] +define void @s_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 %x) { + %trunc = trunc i64 %x to i1 + %sel = select i1 %trunc, i32 63, i32 -12 + store i32 %sel, i32 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}v_trunc_i64_to_i1: +; SI: buffer_load_dwordx2 v{{\[}}[[VLO:[0-9]+]]:{{[0-9]+\]}} +; SI: v_and_b32_e32 [[MASKED:v[0-9]+]], 1, v[[VLO]] +; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[MASKED]], 1 +; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, [[CMP]] +define void @v_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 addrspace(1)* %in) { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep = getelementptr i64 addrspace(1)* %in, i32 %tid + %out.gep = getelementptr i32 addrspace(1)* %out, i32 %tid + %x = load i64 addrspace(1)* %gep + + %trunc = trunc i64 %x to i1 + %sel = select i1 %trunc, i32 63, i32 -12 + store i32 %sel, i32 addrspace(1)* %out.gep + ret void +} diff --git a/test/CodeGen/R600/tti-unroll-prefs.ll b/test/CodeGen/R600/tti-unroll-prefs.ll new file mode 100644 index 0000000..0009c42 --- /dev/null +++ b/test/CodeGen/R600/tti-unroll-prefs.ll @@ -0,0 +1,58 @@ +; RUN: opt -loop-unroll -S -mtriple=amdgcn-- -mcpu=SI %s | FileCheck %s + +; This IR comes from this OpenCL C code: +; +; if (b + 4 > a) { +; for (int i = 0; i < 4; i++, b++) { +; if (b + 1 <= a) +; *(dst + c + b) = 0; +; else +; break; +; } +; } +; +; This test is meant to check that this loop isn't unrolled into more than +; four iterations. The loop unrolling preferences we currently use cause this +; loop to not be unrolled at all, but that may change in the future. + +; CHECK-LABEL: @test +; CHECK: store i8 0, i8 addrspace(1)* +; CHECK-NOT: store i8 0, i8 addrspace(1)* +; CHECK: ret void +define void @test(i8 addrspace(1)* nocapture %dst, i32 %a, i32 %b, i32 %c) { +entry: + %add = add nsw i32 %b, 4 + %cmp = icmp sgt i32 %add, %a + br i1 %cmp, label %for.cond.preheader, label %if.end7 + +for.cond.preheader: ; preds = %entry + %cmp313 = icmp slt i32 %b, %a + br i1 %cmp313, label %if.then4.lr.ph, label %if.end7.loopexit + +if.then4.lr.ph: ; preds = %for.cond.preheader + %0 = sext i32 %c to i64 + br label %if.then4 + +if.then4: ; preds = %if.then4.lr.ph, %if.then4 + %i.015 = phi i32 [ 0, %if.then4.lr.ph ], [ %inc, %if.then4 ] + %b.addr.014 = phi i32 [ %b, %if.then4.lr.ph ], [ %add2, %if.then4 ] + %add2 = add nsw i32 %b.addr.014, 1 + %1 = sext i32 %b.addr.014 to i64 + %add.ptr.sum = add nsw i64 %1, %0 + %add.ptr5 = getelementptr inbounds i8 addrspace(1)* %dst, i64 %add.ptr.sum + store i8 0, i8 addrspace(1)* %add.ptr5, align 1 + %inc = add nsw i32 %i.015, 1 + %cmp1 = icmp slt i32 %inc, 4 + %cmp3 = icmp slt i32 %add2, %a + %or.cond = and i1 %cmp3, %cmp1 + br i1 %or.cond, label %if.then4, label %for.cond.if.end7.loopexit_crit_edge + +for.cond.if.end7.loopexit_crit_edge: ; preds = %if.then4 + br label %if.end7.loopexit + +if.end7.loopexit: ; preds = %for.cond.if.end7.loopexit_crit_edge, %for.cond.preheader + br label %if.end7 + +if.end7: ; preds = %if.end7.loopexit, %entry + ret void +} diff --git a/test/CodeGen/R600/uaddo.ll b/test/CodeGen/R600/uaddo.ll index eb242c1..57d7835 100644 --- a/test/CodeGen/R600/uaddo.ll +++ b/test/CodeGen/R600/uaddo.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone diff --git a/test/CodeGen/R600/udiv.ll b/test/CodeGen/R600/udiv.ll index 59e91f8..0c2c65b 100644 --- a/test/CodeGen/R600/udiv.ll +++ b/test/CodeGen/R600/udiv.ll @@ -1,9 +1,10 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s -;EG-CHECK-LABEL: {{^}}test: -;EG-CHECK-NOT: SETGE_INT -;EG-CHECK: CF_END +;EG-LABEL: {{^}}test: +;EG-NOT: SETGE_INT +;EG: CF_END define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1 @@ -18,10 +19,10 @@ define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { ;The goal of this test is to make sure the ISel doesn't fail when it gets ;a v4i32 udiv -;EG-CHECK-LABEL: {{^}}test2: -;EG-CHECK: CF_END -;SI-CHECK-LABEL: {{^}}test2: -;SI-CHECK: s_endpgm +;EG-LABEL: {{^}}test2: +;EG: CF_END +;SI-LABEL: {{^}}test2: +;SI: s_endpgm define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 @@ -32,10 +33,10 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { ret void } -;EG-CHECK-LABEL: {{^}}test4: -;EG-CHECK: CF_END -;SI-CHECK-LABEL: {{^}}test4: -;SI-CHECK: s_endpgm +;EG-LABEL: {{^}}test4: +;EG: CF_END +;SI-LABEL: {{^}}test4: +;SI: s_endpgm define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 diff --git a/test/CodeGen/R600/udivrem.ll b/test/CodeGen/R600/udivrem.ll index f20705b..b3837f2 100644 --- a/test/CodeGen/R600/udivrem.ll +++ b/test/CodeGen/R600/udivrem.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s ; FUNC-LABEL: {{^}}test_udivrem: @@ -32,8 +33,8 @@ ; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], 0, [[RCP_LO]] ; SI: v_cndmask_b32_e64 ; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]] -; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], [[RCP]], [[E]] -; SI-DAG: v_sub_i32_e32 [[RCP_S_E:v[0-9]+]], [[RCP]], [[E]] +; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], [[E]], [[RCP]] +; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], [[E]], [[RCP]] ; SI: v_cndmask_b32_e64 ; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]] ; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]] @@ -112,12 +113,12 @@ define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]] -; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_RCP]], [[FIRST_E]] -; SI-DAG: v_sub_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_RCP]], [[FIRST_E]] +; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]] +; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_sub_i32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]] +; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]] @@ -135,12 +136,12 @@ define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]] -; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_RCP]], [[SECOND_E]] -; SI-DAG: v_sub_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_RCP]], [[SECOND_E]] +; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]] +; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_sub_i32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]] +; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]] @@ -262,12 +263,12 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3 ; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]] -; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_RCP]], [[FIRST_E]] -; SI-DAG: v_sub_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_RCP]], [[FIRST_E]] +; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]] +; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_sub_i32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]] +; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[l0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]] @@ -285,12 +286,12 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3 ; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]] -; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_RCP]], [[SECOND_E]] -; SI-DAG: v_sub_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_RCP]], [[SECOND_E]] +; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]] +; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_sub_i32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]] +; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]] @@ -308,12 +309,12 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3 ; SI-DAG: v_sub_i32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], 0, [[THIRD_RCP_LO]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[THIRD_E:v[0-9]+]], {{v[0-9]+}}, [[THIRD_RCP]] -; SI-DAG: v_add_i32_e32 [[THIRD_RCP_A_E:v[0-9]+]], [[THIRD_RCP]], [[THIRD_E]] -; SI-DAG: v_sub_i32_e32 [[THIRD_RCP_S_E:v[0-9]+]], [[THIRD_RCP]], [[THIRD_E]] +; SI-DAG: v_add_i32_e32 [[THIRD_RCP_A_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]] +; SI-DAG: v_subrev_i32_e32 [[THIRD_RCP_S_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[THIRD_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[THIRD_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_sub_i32_e32 [[THIRD_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[THIRD_Num_S_Remainder]] +; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder:v[0-9]+]], [[THIRD_Num_S_Remainder]], {{v[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[THIRD_Tmp1:v[0-9]+]] @@ -331,22 +332,8 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3 ; SI-DAG: v_sub_i32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], 0, [[FOURTH_RCP_LO]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[FOURTH_E:v[0-9]+]], {{v[0-9]+}}, [[FOURTH_RCP]] -; SI-DAG: v_add_i32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_RCP]], [[FOURTH_E]] -; SI-DAG: v_sub_i32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_RCP]], [[FOURTH_E]] -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_mul_hi_u32 [[FOURTH_Quotient:v[0-9]+]] -; SI-DAG: v_mul_lo_i32 [[FOURTH_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_sub_i32_e32 [[FOURTH_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FOURTH_Num_S_Remainder]] -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_and_b32_e32 [[FOURTH_Tmp1:v[0-9]+]] -; SI-DAG: v_add_i32_e32 [[FOURTH_Quotient_A_One:v[0-9]+]], {{.*}}, [[FOURTH_Quotient]] -; SI-DAG: v_subrev_i32_e32 [[FOURTH_Quotient_S_One:v[0-9]+]], -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_add_i32_e32 [[FOURTH_Remainder_A_Den:v[0-9]+]], -; SI-DAG: v_subrev_i32_e32 [[FOURTH_Remainder_S_Den:v[0-9]+]], -; SI-DAG: v_cndmask_b32_e64 +; SI-DAG: v_add_i32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]] +; SI-DAG: v_subrev_i32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]] ; SI-DAG: v_cndmask_b32_e64 ; SI: s_endpgm define void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { diff --git a/test/CodeGen/R600/udivrem24.ll b/test/CodeGen/R600/udivrem24.ll index defb3c0..4b98ac6 100644 --- a/test/CodeGen/R600/udivrem24.ll +++ b/test/CodeGen/R600/udivrem24.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}udiv24_i8: diff --git a/test/CodeGen/R600/udivrem64.ll b/test/CodeGen/R600/udivrem64.ll index 8864c83..9f3069b 100644 --- a/test/CodeGen/R600/udivrem64.ll +++ b/test/CodeGen/R600/udivrem64.ll @@ -1,5 +1,6 @@ -;XUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s +;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s +;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s +;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s ;FUNC-LABEL: {{^}}test_udiv: ;EG: RECIP_UINT @@ -34,7 +35,41 @@ ;EG: BFE_UINT ;EG: BFE_UINT ;EG: BFE_UINT -;SI: s_endpgm + +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN-NOT: v_mad_f32 +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: s_endpgm define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { %result = udiv i64 %x, %y store i64 %result, i64 addrspace(1)* %out @@ -74,9 +109,115 @@ define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { ;EG: BFE_UINT ;EG: BFE_UINT ;EG: AND_INT {{.*}}, 1, -;SI: s_endpgm + +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN-NOT: v_mad_f32 +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: s_endpgm define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) { %result = urem i64 %x, %y store i64 %result, i64 addrspace(1)* %out ret void } + +;FUNC-LABEL: {{^}}test_udiv3264: +;EG: RECIP_UINT +;EG-NOT: BFE_UINT + +;GCN-NOT: s_bfe_u32 +;GCN-NOT: v_mad_f32 +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: s_endpgm +define void @test_udiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) { + %1 = lshr i64 %x, 33 + %2 = lshr i64 %y, 33 + %result = udiv i64 %1, %2 + store i64 %result, i64 addrspace(1)* %out + ret void +} + +;FUNC-LABEL: {{^}}test_urem3264: +;EG: RECIP_UINT +;EG-NOT: BFE_UINT + +;GCN-NOT: s_bfe_u32 +;GCN-NOT: v_mad_f32 +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: s_endpgm +define void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) { + %1 = lshr i64 %x, 33 + %2 = lshr i64 %y, 33 + %result = urem i64 %1, %2 + store i64 %result, i64 addrspace(1)* %out + ret void +} + +;FUNC-LABEL: {{^}}test_udiv2464: +;EG: UINT_TO_FLT +;EG: UINT_TO_FLT +;EG: FLT_TO_UINT +;EG-NOT: RECIP_UINT +;EG-NOT: BFE_UINT + +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: v_mad_f32 +;GCN: s_endpgm +define void @test_udiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) { + %1 = lshr i64 %x, 40 + %2 = lshr i64 %y, 40 + %result = udiv i64 %1, %2 + store i64 %result, i64 addrspace(1)* %out + ret void +} + +;FUNC-LABEL: {{^}}test_urem2464: +;EG: UINT_TO_FLT +;EG: UINT_TO_FLT +;EG: FLT_TO_UINT +;EG-NOT: RECIP_UINT +;EG-NOT: BFE_UINT + +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: v_mad_f32 +;GCN: s_endpgm +define void @test_urem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) { + %1 = lshr i64 %x, 40 + %2 = lshr i64 %y, 40 + %result = urem i64 %1, %2 + store i64 %result, i64 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/uint_to_fp.f64.ll b/test/CodeGen/R600/uint_to_fp.f64.ll index bddf700..f715243 100644 --- a/test/CodeGen/R600/uint_to_fp.f64.ll +++ b/test/CodeGen/R600/uint_to_fp.f64.ll @@ -1,47 +1,12 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.r600.read.tidig.x() nounwind readnone -; SI-LABEL: {{^}}uint_to_fp_f64_i32 -; SI: v_cvt_f64_u32_e32 -; SI: s_endpgm -define void @uint_to_fp_f64_i32(double addrspace(1)* %out, i32 %in) { - %cast = uitofp i32 %in to double - store double %cast, double addrspace(1)* %out, align 8 - ret void -} - -; SI-LABEL: {{^}}uint_to_fp_i1_f64: -; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], -; FIXME: We should the VGPR sources for V_CNDMASK are copied from SGPRs, -; we should be able to fold the SGPRs into the V_CNDMASK instructions. -; SI: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]] -; SI: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]] -; SI: buffer_store_dwordx2 -; SI: s_endpgm -define void @uint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) { - %cmp = icmp eq i32 %in, 0 - %fp = uitofp i1 %cmp to double - store double %fp, double addrspace(1)* %out, align 4 - ret void -} - -; SI-LABEL: {{^}}uint_to_fp_i1_f64_load: -; SI: v_cndmask_b32_e64 [[IRESULT:v[0-9]]], 0, 1 -; SI-NEXT: v_cvt_f64_u32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]] -; SI: buffer_store_dwordx2 [[RESULT]] -; SI: s_endpgm -define void @uint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) { - %fp = uitofp i1 %in to double - store double %fp, double addrspace(1)* %out, align 8 - ret void -} - ; SI-LABEL: {{^}}v_uint_to_fp_i64_to_f64 ; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} -; SI-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]] -; SI-DAG: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]] +; SI: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]] ; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32 +; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]] ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]] ; SI: buffer_store_dwordx2 [[RESULT]] define void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) { @@ -53,23 +18,81 @@ define void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1) ret void } -; SI-LABEL: {{^}}s_uint_to_fp_f64_i64 -define void @s_uint_to_fp_f64_i64(double addrspace(1)* %out, i64 %in) { +; SI-LABEL: {{^}}s_uint_to_fp_i64_to_f64 +define void @s_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) { %cast = uitofp i64 %in to double store double %cast, double addrspace(1)* %out, align 8 ret void } -; SI-LABEL: {{^}}s_uint_to_fp_v2f64_v2i64 -define void @s_uint_to_fp_v2f64_v2i64(<2 x double> addrspace(1)* %out, <2 x i64> %in) { +; SI-LABEL: {{^}}s_uint_to_fp_v2i64_to_v2f64 +define void @s_uint_to_fp_v2i64_to_v2f64(<2 x double> addrspace(1)* %out, <2 x i64> %in) { %cast = uitofp <2 x i64> %in to <2 x double> store <2 x double> %cast, <2 x double> addrspace(1)* %out, align 16 ret void } -; SI-LABEL: {{^}}s_uint_to_fp_v4f64_v4i64 -define void @s_uint_to_fp_v4f64_v4i64(<4 x double> addrspace(1)* %out, <4 x i64> %in) { +; SI-LABEL: {{^}}s_uint_to_fp_v4i64_to_v4f64 +define void @s_uint_to_fp_v4i64_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i64> %in) { %cast = uitofp <4 x i64> %in to <4 x double> store <4 x double> %cast, <4 x double> addrspace(1)* %out, align 16 ret void } + +; SI-LABEL: {{^}}s_uint_to_fp_i32_to_f64 +; SI: v_cvt_f64_u32_e32 +; SI: s_endpgm +define void @s_uint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) { + %cast = uitofp i32 %in to double + store double %cast, double addrspace(1)* %out, align 8 + ret void +} + +; SI-LABEL: {{^}}s_uint_to_fp_v2i32_to_v2f64 +; SI: v_cvt_f64_u32_e32 +; SI: v_cvt_f64_u32_e32 +; SI: s_endpgm +define void @s_uint_to_fp_v2i32_to_v2f64(<2 x double> addrspace(1)* %out, <2 x i32> %in) { + %cast = uitofp <2 x i32> %in to <2 x double> + store <2 x double> %cast, <2 x double> addrspace(1)* %out, align 16 + ret void +} + +; SI-LABEL: {{^}}s_uint_to_fp_v4i32_to_v4f64 +; SI: v_cvt_f64_u32_e32 +; SI: v_cvt_f64_u32_e32 +; SI: v_cvt_f64_u32_e32 +; SI: v_cvt_f64_u32_e32 +; SI: s_endpgm +define void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i32> %in) { + %cast = uitofp <4 x i32> %in to <4 x double> + store <4 x double> %cast, <4 x double> addrspace(1)* %out, align 16 + ret void +} + +; FIXME: select on 0, 0 +; SI-LABEL: {{^}}uint_to_fp_i1_to_f64: +; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], +; We can't fold the SGPRs into v_cndmask_b32_e64, because it already +; uses an SGPR for [[CMP]] +; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]] +; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, [[CMP]] +; SI: buffer_store_dwordx2 +; SI: s_endpgm +define void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) { + %cmp = icmp eq i32 %in, 0 + %fp = uitofp i1 %cmp to double + store double %fp, double addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}uint_to_fp_i1_to_f64_load: +; SI: v_cndmask_b32_e64 [[IRESULT:v[0-9]]], 0, 1 +; SI-NEXT: v_cvt_f64_u32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]] +; SI: buffer_store_dwordx2 [[RESULT]] +; SI: s_endpgm +define void @uint_to_fp_i1_to_f64_load(double addrspace(1)* %out, i1 %in) { + %fp = uitofp i1 %in to double + store double %fp, double addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/uint_to_fp.ll b/test/CodeGen/R600/uint_to_fp.ll index f58f10b..1c8a175 100644 --- a/test/CodeGen/R600/uint_to_fp.ll +++ b/test/CodeGen/R600/uint_to_fp.ll @@ -1,20 +1,32 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s -; FUNC-LABEL: {{^}}uint_to_fp_v2i32: +; FUNC-LABEL: {{^}}uint_to_fp_i32_to_f32: +; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z + +; SI: v_cvt_f32_u32_e32 +; SI: s_endpgm +define void @uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) { + %result = uitofp i32 %in to float + store float %result, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}uint_to_fp_v2i32_to_v2f32: ; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W ; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X ; SI: v_cvt_f32_u32_e32 ; SI: v_cvt_f32_u32_e32 ; SI: s_endpgm -define void @uint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) { +define void @uint_to_fp_v2i32_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i32> %in) { %result = uitofp <2 x i32> %in to <2 x float> store <2 x float> %result, <2 x float> addrspace(1)* %out ret void } -; FUNC-LABEL: {{^}}uint_to_fp_v4i32: +; FUNC-LABEL: {{^}}uint_to_fp_v4i32_to_v4f32: ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} @@ -25,45 +37,45 @@ define void @uint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) { ; SI: v_cvt_f32_u32_e32 ; SI: v_cvt_f32_u32_e32 ; SI: s_endpgm -define void @uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { +define void @uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %value = load <4 x i32> addrspace(1) * %in %result = uitofp <4 x i32> %value to <4 x float> store <4 x float> %result, <4 x float> addrspace(1)* %out ret void } -; FUNC-LABEL: {{^}}uint_to_fp_i64_f32: +; FUNC-LABEL: {{^}}uint_to_fp_i64_to_f32: ; R600: UINT_TO_FLT ; R600: UINT_TO_FLT ; R600: MULADD_IEEE ; SI: v_cvt_f32_u32_e32 ; SI: v_cvt_f32_u32_e32 -; SI: v_mad_f32 +; SI: v_madmk_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, 0x4f800000 ; SI: s_endpgm -define void @uint_to_fp_i64_f32(float addrspace(1)* %out, i64 %in) { +define void @uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) { entry: %0 = uitofp i64 %in to float store float %0, float addrspace(1)* %out ret void } -; FUNC-LABEL: {{^}}uint_to_fp_i1_f32: +; FUNC-LABEL: {{^}}uint_to_fp_i1_to_f32: ; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm -define void @uint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) { +define void @uint_to_fp_i1_to_f32(float addrspace(1)* %out, i32 %in) { %cmp = icmp eq i32 %in, 0 %fp = uitofp i1 %cmp to float store float %fp, float addrspace(1)* %out, align 4 ret void } -; FUNC-LABEL: {{^}}uint_to_fp_i1_f32_load: +; FUNC-LABEL: {{^}}uint_to_fp_i1_to_f32_load: ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0 ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm -define void @uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) { +define void @uint_to_fp_i1_to_f32_load(float addrspace(1)* %out, i1 %in) { %fp = uitofp i1 %in to float store float %fp, float addrspace(1)* %out, align 4 ret void diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll index f8737e6..665dc37 100644 --- a/test/CodeGen/R600/unaligned-load-store.ll +++ b/test/CodeGen/R600/unaligned-load-store.ll @@ -1,37 +1,179 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s -; FIXME: This is probably wrong. This probably needs to expand to 8-bit reads and writes. -; SI-LABEL: {{^}}unaligned_load_store_i32: -; SI: ds_read_u16 -; SI: ds_read_u16 -; SI: ds_write_b32 +; SI-LABEL: {{^}}unaligned_load_store_i16_local: +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_write_b8 +; SI: ds_write_b8 ; SI: s_endpgm -define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind { +define void @unaligned_load_store_i16_local(i16 addrspace(3)* %p, i16 addrspace(3)* %r) nounwind { + %v = load i16 addrspace(3)* %p, align 1 + store i16 %v, i16 addrspace(3)* %r, align 1 + ret void +} + +; SI-LABEL: {{^}}unaligned_load_store_i16_global: +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: s_endpgm +define void @unaligned_load_store_i16_global(i16 addrspace(1)* %p, i16 addrspace(1)* %r) nounwind { + %v = load i16 addrspace(1)* %p, align 1 + store i16 %v, i16 addrspace(1)* %r, align 1 + ret void +} + +; SI-LABEL: {{^}}unaligned_load_store_i32_local: +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: s_endpgm +define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind { %v = load i32 addrspace(3)* %p, align 1 store i32 %v, i32 addrspace(3)* %r, align 1 ret void } -; SI-LABEL: {{^}}unaligned_load_store_v4i32: -; SI: ds_read_u16 -; SI: ds_read_u16 -; SI: ds_read_u16 -; SI: ds_read_u16 -; SI: ds_read_u16 -; SI: ds_read_u16 -; SI: ds_read_u16 -; SI: ds_read_u16 -; SI: ds_write_b32 -; SI: ds_write_b32 -; SI: ds_write_b32 -; SI: ds_write_b32 +; SI-LABEL: {{^}}unaligned_load_store_i32_global: +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind { + %v = load i32 addrspace(1)* %p, align 1 + store i32 %v, i32 addrspace(1)* %r, align 1 + ret void +} + +; SI-LABEL: {{^}}unaligned_load_store_i64_local: +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 ; SI: s_endpgm -define void @unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind { +define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) { + %v = load i64 addrspace(3)* %p, align 1 + store i64 %v, i64 addrspace(3)* %r, align 1 + ret void +} + +; SI-LABEL: {{^}}unaligned_load_store_i64_global: +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) { + %v = load i64 addrspace(1)* %p, align 1 + store i64 %v, i64 addrspace(1)* %r, align 1 + ret void +} + +; SI-LABEL: {{^}}unaligned_load_store_v4i32_local: +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 + +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 + +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 + +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 + +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 + +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 + +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 + +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: s_endpgm +define void @unaligned_load_store_v4i32_local(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind { %v = load <4 x i32> addrspace(3)* %p, align 1 store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1 ret void } +; FIXME: We mark v4i32 as custom, so misaligned loads are never expanded. +; FIXME-SI-LABEL: {{^}}unaligned_load_store_v4i32_global +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +define void @unaligned_load_store_v4i32_global(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) nounwind { + %v = load <4 x i32> addrspace(1)* %p, align 1 + store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1 + ret void +} + ; SI-LABEL: {{^}}load_lds_i64_align_4: ; SI: ds_read2_b32 ; SI: s_endpgm @@ -64,12 +206,23 @@ define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture ret void } -; FIXME: Need to fix this case. -; define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { -; %val = load i64 addrspace(3)* %in, align 1 -; store i64 %val, i64 addrspace(1)* %out, align 8 -; ret void -; } +; SI-LABEL: {{^}}load_lds_i64_align_1: +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: buffer_store_dwordx2 +; SI: s_endpgm + +define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { + %val = load i64 addrspace(3)* %in, align 1 + store i64 %val, i64 addrspace(1)* %out, align 8 + ret void +} ; SI-LABEL: {{^}}store_lds_i64_align_4: ; SI: ds_write2_b32 diff --git a/test/CodeGen/R600/unhandled-loop-condition-assertion.ll b/test/CodeGen/R600/unhandled-loop-condition-assertion.ll index ff01a1e..c615f0b 100644 --- a/test/CodeGen/R600/unhandled-loop-condition-assertion.ll +++ b/test/CodeGen/R600/unhandled-loop-condition-assertion.ll @@ -1,6 +1,7 @@ ; REQUIRES: asserts ; XFAIL: * -; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s +; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s +; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s ; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=COMMON %s ; SI hits an assertion at -O0, evergreen hits a not implemented unreachable. diff --git a/test/CodeGen/R600/urecip.ll b/test/CodeGen/R600/urecip.ll index 4d953b5..daacc77 100644 --- a/test/CodeGen/R600/urecip.ll +++ b/test/CodeGen/R600/urecip.ll @@ -1,4 +1,5 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK: v_rcp_iflag_f32_e32 diff --git a/test/CodeGen/R600/urem.ll b/test/CodeGen/R600/urem.ll index 914f5d0..aa2a3eb 100644 --- a/test/CodeGen/R600/urem.ll +++ b/test/CodeGen/R600/urem.ll @@ -1,34 +1,94 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -;The code generated by urem is long and complex and may frequently change. -;The goal of this test is to make sure the ISel doesn't fail when it gets -;a v2i32/v4i32 urem +; The code generated by urem is long and complex and may frequently +; change. The goal of this test is to make sure the ISel doesn't fail +; when it gets a v2i32/v4i32 urem -;EG-CHECK: {{^}}test2: -;EG-CHECK: CF_END -;SI-CHECK: {{^}}test2: -;SI-CHECK: s_endpgm +; FUNC-LABEL: {{^}}test_urem_i32: +; SI: s_endpgm +; EG: CF_END +define void @test_urem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1 + %a = load i32 addrspace(1)* %in + %b = load i32 addrspace(1)* %b_ptr + %result = urem i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_urem_i32_7: +; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x24924925 +; SI: v_mul_hi_u32 {{v[0-9]+}}, [[MAGIC]] +; SI: v_subrev_i32 +; SI: v_mul_lo_i32 +; SI: v_sub_i32 +; SI: buffer_store_dword +; SI: s_endpgm +define void @test_urem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %num = load i32 addrspace(1) * %in + %result = urem i32 %num, 7 + store i32 %result, i32 addrspace(1)* %out + ret void +} -define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { +; FUNC-LABEL: {{^}}test_urem_v2i32: +; SI: s_endpgm +; EG: CF_END +define void @test_urem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 - %a = load <2 x i32> addrspace(1) * %in - %b = load <2 x i32> addrspace(1) * %b_ptr + %a = load <2 x i32> addrspace(1)* %in + %b = load <2 x i32> addrspace(1)* %b_ptr %result = urem <2 x i32> %a, %b store <2 x i32> %result, <2 x i32> addrspace(1)* %out ret void } -;EG-CHECK: {{^}}test4: -;EG-CHECK: CF_END -;SI-CHECK: {{^}}test4: -;SI-CHECK: s_endpgm - -define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { +; FUNC-LABEL: {{^}}test_urem_v4i32: +; SI: s_endpgm +; EG: CF_END +define void @test_urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 - %a = load <4 x i32> addrspace(1) * %in - %b = load <4 x i32> addrspace(1) * %b_ptr + %a = load <4 x i32> addrspace(1)* %in + %b = load <4 x i32> addrspace(1)* %b_ptr %result = urem <4 x i32> %a, %b store <4 x i32> %result, <4 x i32> addrspace(1)* %out ret void } + +; FUNC-LABEL: {{^}}test_urem_i64: +; SI: s_endpgm +; EG: CF_END +define void @test_urem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { + %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1 + %a = load i64 addrspace(1)* %in + %b = load i64 addrspace(1)* %b_ptr + %result = urem i64 %a, %b + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_urem_v2i64: +; SI: s_endpgm +; EG: CF_END +define void @test_urem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1 + %a = load <2 x i64> addrspace(1)* %in + %b = load <2 x i64> addrspace(1)* %b_ptr + %result = urem <2 x i64> %a, %b + store <2 x i64> %result, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_urem_v4i64: +; SI: s_endpgm +; EG: CF_END +define void @test_urem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1 + %a = load <4 x i64> addrspace(1)* %in + %b = load <4 x i64> addrspace(1)* %b_ptr + %result = urem <4 x i64> %a, %b + store <4 x i64> %result, <4 x i64> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/use-sgpr-multiple-times.ll b/test/CodeGen/R600/use-sgpr-multiple-times.ll index aa94a0e..f26f300 100644 --- a/test/CodeGen/R600/use-sgpr-multiple-times.ll +++ b/test/CodeGen/R600/use-sgpr-multiple-times.ll @@ -1,80 +1,87 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s declare float @llvm.fma.f32(float, float, float) #1 declare float @llvm.fmuladd.f32(float, float, float) #1 declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) #1 -; SI-LABEL: {{^}}test_sgpr_use_twice_binop: -; SI: s_load_dword [[SGPR:s[0-9]+]], -; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]] -; SI: buffer_store_dword [[RESULT]] +; GCN-LABEL: {{^}}test_sgpr_use_twice_binop: +; GCN: s_load_dword [[SGPR:s[0-9]+]], +; GCN: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]] +; GCN: buffer_store_dword [[RESULT]] define void @test_sgpr_use_twice_binop(float addrspace(1)* %out, float %a) #0 { %dbl = fadd float %a, %a store float %dbl, float addrspace(1)* %out, align 4 ret void } -; SI-LABEL: {{^}}test_sgpr_use_three_ternary_op: -; SI: s_load_dword [[SGPR:s[0-9]+]], -; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]] -; SI: buffer_store_dword [[RESULT]] +; GCN-LABEL: {{^}}test_sgpr_use_three_ternary_op: +; GCN: s_load_dword [[SGPR:s[0-9]+]], +; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]] +; GCN: buffer_store_dword [[RESULT]] define void @test_sgpr_use_three_ternary_op(float addrspace(1)* %out, float %a) #0 { %fma = call float @llvm.fma.f32(float %a, float %a, float %a) #1 store float %fma, float addrspace(1)* %out, align 4 ret void } -; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b: +; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b: ; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc -; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] -; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]] -; SI: buffer_store_dword [[RESULT]] +; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 +; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] +; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]] +; GCN: buffer_store_dword [[RESULT]] define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, float %a, float %b) #0 { %fma = call float @llvm.fma.f32(float %a, float %a, float %b) #1 store float %fma, float addrspace(1)* %out, align 4 ret void } -; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a: +; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a: ; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc -; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] -; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]] -; SI: buffer_store_dword [[RESULT]] +; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 +; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] +; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]] +; GCN: buffer_store_dword [[RESULT]] define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 { %fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1 store float %fma, float addrspace(1)* %out, align 4 ret void } -; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a: +; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a: ; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc -; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] -; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]] -; SI: buffer_store_dword [[RESULT]] +; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 +; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] +; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]] +; GCN: buffer_store_dword [[RESULT]] define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 { %fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1 store float %fma, float addrspace(1)* %out, align 4 ret void } -; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm: -; SI: s_load_dword [[SGPR:s[0-9]+]] -; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0 -; SI: buffer_store_dword [[RESULT]] +; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm: +; GCN: s_load_dword [[SGPR:s[0-9]+]] +; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0 +; GCN: buffer_store_dword [[RESULT]] define void @test_sgpr_use_twice_ternary_op_a_a_imm(float addrspace(1)* %out, float %a) #0 { %fma = call float @llvm.fma.f32(float %a, float %a, float 2.0) #1 store float %fma, float addrspace(1)* %out, align 4 ret void } -; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a: -; SI: s_load_dword [[SGPR:s[0-9]+]] -; SI: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]] -; SI: buffer_store_dword [[RESULT]] +; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a: +; GCN: s_load_dword [[SGPR:s[0-9]+]] +; GCN: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]] +; GCN: buffer_store_dword [[RESULT]] define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, float %a) #0 { %fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1 store float %fma, float addrspace(1)* %out, align 4 @@ -82,10 +89,10 @@ define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, fl } ; Don't use fma since fma c, x, y is canonicalized to fma x, c, y -; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a: -; SI: s_load_dword [[SGPR:s[0-9]+]] -; SI: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]] -; SI: buffer_store_dword [[RESULT]] +; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a: +; GCN: s_load_dword [[SGPR:s[0-9]+]] +; GCN: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]] +; GCN: buffer_store_dword [[RESULT]] define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32 %a) #0 { %fma = call i32 @llvm.AMDGPU.imad24(i32 2, i32 %a, i32 %a) #1 store i32 %fma, i32 addrspace(1)* %out, align 4 diff --git a/test/CodeGen/R600/usubo.ll b/test/CodeGen/R600/usubo.ll index abc5bd2..be1e666 100644 --- a/test/CodeGen/R600/usubo.ll +++ b/test/CodeGen/R600/usubo.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone @@ -27,7 +28,7 @@ define void @s_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 } ; FUNC-LABEL: {{^}}v_usubo_i32: -; SI: v_sub_i32_e32 +; SI: v_subrev_i32_e32 define void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { %a = load i32 addrspace(1)* %aptr, align 4 %b = load i32 addrspace(1)* %bptr, align 4 diff --git a/test/CodeGen/R600/v_cndmask.ll b/test/CodeGen/R600/v_cndmask.ll index a24dcc7..85936ec 100644 --- a/test/CodeGen/R600/v_cndmask.ll +++ b/test/CodeGen/R600/v_cndmask.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.r600.read.tidig.x() #1 diff --git a/test/CodeGen/R600/valu-i1.ll b/test/CodeGen/R600/valu-i1.ll index 2c209fc..5a3c2ec 100644 --- a/test/CodeGen/R600/valu-i1.ll +++ b/test/CodeGen/R600/valu-i1.ll @@ -1,10 +1,13 @@ -; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s +declare i32 @llvm.r600.read.tidig.x() nounwind readnone + +; SI-LABEL: @test_if ; Make sure the i1 values created by the cfg structurizer pass are ; moved using VALU instructions ; SI-NOT: s_mov_b64 s[{{[0-9]:[0-9]}}], -1 ; SI: v_mov_b32_e32 v{{[0-9]}}, -1 -define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) { +define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) #1 { entry: switch i32 %a, label %default [ i32 0, label %case0 @@ -37,3 +40,149 @@ else: end: ret void } + +; SI-LABEL: @simple_test_v_if +; SI: v_cmp_ne_i32_e64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0 +; SI: s_and_saveexec_b64 [[BR_SREG]], [[BR_SREG]] +; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]] + +; SI: ; BB#1 +; SI: buffer_store_dword +; SI: s_endpgm + +; SI: BB1_2: +; SI: s_or_b64 exec, exec, [[BR_SREG]] +; SI: s_endpgm +define void @simple_test_v_if(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %is.0 = icmp ne i32 %tid, 0 + br i1 %is.0, label %store, label %exit + +store: + %gep = getelementptr i32 addrspace(1)* %dst, i32 %tid + store i32 999, i32 addrspace(1)* %gep + ret void + +exit: + ret void +} + +; SI-LABEL: @simple_test_v_loop +; SI: v_cmp_ne_i32_e64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0 +; SI: s_and_saveexec_b64 [[BR_SREG]], [[BR_SREG]] +; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]] +; SI: s_cbranch_execz BB2_2 + +; SI: ; BB#1: +; SI: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0{{$}} + +; SI: BB2_3: +; SI: buffer_load_dword +; SI: buffer_store_dword +; SI: v_cmp_eq_i32_e32 vcc, +; SI: s_or_b64 [[OR_SREG:s\[[0-9]+:[0-9]+\]]] +; SI: s_andn2_b64 exec, exec, [[OR_SREG]] +; SI: s_cbranch_execnz BB2_3 + +define void @simple_test_v_loop(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 { +entry: + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %is.0 = icmp ne i32 %tid, 0 + %limit = add i32 %tid, 64 + br i1 %is.0, label %loop, label %exit + +loop: + %i = phi i32 [%tid, %entry], [%i.inc, %loop] + %gep.src = getelementptr i32 addrspace(1)* %src, i32 %i + %gep.dst = getelementptr i32 addrspace(1)* %dst, i32 %i + %load = load i32 addrspace(1)* %src + store i32 %load, i32 addrspace(1)* %gep.dst + %i.inc = add nsw i32 %i, 1 + %cmp = icmp eq i32 %limit, %i.inc + br i1 %cmp, label %exit, label %loop + +exit: + ret void +} + +; SI-LABEL: @multi_vcond_loop + +; Load loop limit from buffer +; Branch to exit if uniformly not taken +; SI: ; BB#0: +; SI: buffer_load_dword [[VBOUND:v[0-9]+]] +; SI: v_cmp_gt_i32_e64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]] +; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG]], [[OUTER_CMP_SREG]] +; SI: s_xor_b64 [[OUTER_CMP_SREG]], exec, [[OUTER_CMP_SREG]] +; SI: s_cbranch_execz BB3_2 + +; Initialize inner condition to false +; SI: ; BB#1: +; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], [[ZERO]] + +; Clear exec bits for workitems that load -1s +; SI: BB3_3: +; SI: buffer_load_dword [[B:v[0-9]+]] +; SI: buffer_load_dword [[A:v[0-9]+]] +; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], [[A]], -1 +; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_1:s\[[0-9]+:[0-9]+\]]], [[B]], -1 +; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]] +; SI: s_and_saveexec_b64 [[ORNEG1]], [[ORNEG1]] +; SI: s_xor_b64 [[ORNEG1]], exec, [[ORNEG1]] +; SI: s_cbranch_execz BB3_5 + +; SI: BB#4: +; SI: buffer_store_dword +; SI: v_cmp_ge_i64_e32 vcc +; SI: s_or_b64 [[COND_STATE]], vcc, [[COND_STATE]] + +; SI: BB3_5: +; SI: s_or_b64 exec, exec, [[ORNEG1]] +; SI: s_or_b64 [[COND_STATE]], [[ORNEG1]], [[COND_STATE]] +; SI: s_andn2_b64 exec, exec, [[COND_STATE]] +; SI: s_cbranch_execnz BB3_3 + +; SI: BB#6 +; SI: s_or_b64 exec, exec, [[COND_STATE]] + +; SI: BB3_2: +; SI-NOT: [[COND_STATE]] +; SI: s_endpgm + +define void @multi_vcond_loop(i32 addrspace(1)* noalias nocapture %arg, i32 addrspace(1)* noalias nocapture readonly %arg1, i32 addrspace(1)* noalias nocapture readonly %arg2, i32 addrspace(1)* noalias nocapture readonly %arg3) #1 { +bb: + %tmp = tail call i32 @llvm.r600.read.tidig.x() #0 + %tmp4 = sext i32 %tmp to i64 + %tmp5 = getelementptr inbounds i32 addrspace(1)* %arg3, i64 %tmp4 + %tmp6 = load i32 addrspace(1)* %tmp5, align 4 + %tmp7 = icmp sgt i32 %tmp6, 0 + %tmp8 = sext i32 %tmp6 to i64 + br i1 %tmp7, label %bb10, label %bb26 + +bb10: ; preds = %bb, %bb20 + %tmp11 = phi i64 [ %tmp23, %bb20 ], [ 0, %bb ] + %tmp12 = add nsw i64 %tmp11, %tmp4 + %tmp13 = getelementptr inbounds i32 addrspace(1)* %arg1, i64 %tmp12 + %tmp14 = load i32 addrspace(1)* %tmp13, align 4 + %tmp15 = getelementptr inbounds i32 addrspace(1)* %arg2, i64 %tmp12 + %tmp16 = load i32 addrspace(1)* %tmp15, align 4 + %tmp17 = icmp ne i32 %tmp14, -1 + %tmp18 = icmp ne i32 %tmp16, -1 + %tmp19 = and i1 %tmp17, %tmp18 + br i1 %tmp19, label %bb20, label %bb26 + +bb20: ; preds = %bb10 + %tmp21 = add nsw i32 %tmp16, %tmp14 + %tmp22 = getelementptr inbounds i32 addrspace(1)* %arg, i64 %tmp12 + store i32 %tmp21, i32 addrspace(1)* %tmp22, align 4 + %tmp23 = add nuw nsw i64 %tmp11, 1 + %tmp24 = icmp slt i64 %tmp23, %tmp8 + br i1 %tmp24, label %bb10, label %bb26 + +bb26: ; preds = %bb10, %bb20, %bb + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } diff --git a/test/CodeGen/R600/vector-alloca.ll b/test/CodeGen/R600/vector-alloca.ll index 0b457a8..228868a 100644 --- a/test/CodeGen/R600/vector-alloca.ll +++ b/test/CodeGen/R600/vector-alloca.ll @@ -1,6 +1,8 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}vector_read: ; EG: MOV diff --git a/test/CodeGen/R600/vertex-fetch-encoding.ll b/test/CodeGen/R600/vertex-fetch-encoding.ll index e24744e..e4d117f 100644 --- a/test/CodeGen/R600/vertex-fetch-encoding.ll +++ b/test/CodeGen/R600/vertex-fetch-encoding.ll @@ -1,10 +1,10 @@ -; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=barts | FileCheck --check-prefix=NI-CHECK %s -; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=cayman | FileCheck --check-prefix=CM-CHECK %s +; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=barts | FileCheck --check-prefix=NI %s +; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=cayman | FileCheck --check-prefix=CM %s -; NI-CHECK: {{^}}vtx_fetch32: -; NI-CHECK: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0 ; encoding: [0x40,0x01,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00 -; CM-CHECK: {{^}}vtx_fetch32: -; CM-CHECK: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0 ; encoding: [0x40,0x01,0x0[[GPR]],0x00,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x00,0x00 +; NI: {{^}}vtx_fetch32: +; NI: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0 ; encoding: [0x40,0x01,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00 +; CM: {{^}}vtx_fetch32: +; CM: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0 ; encoding: [0x40,0x01,0x0[[GPR]],0x00,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x00,0x00 define void @vtx_fetch32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: @@ -13,8 +13,8 @@ entry: ret void } -; NI-CHECK: {{^}}vtx_fetch128: -; NI-CHECK: VTX_READ_128 T[[DST:[0-9]]].XYZW, T[[SRC:[0-9]]].X, 0 ; encoding: [0x40,0x01,0x0[[SRC]],0x40,0x0[[DST]],0x10,0x8d,0x18,0x00,0x00,0x08,0x00 +; NI: {{^}}vtx_fetch128: +; NI: VTX_READ_128 T[[DST:[0-9]]].XYZW, T[[SRC:[0-9]]].X, 0 ; encoding: [0x40,0x01,0x0[[SRC]],0x40,0x0[[DST]],0x10,0x8d,0x18,0x00,0x00,0x08,0x00 ; XXX: Add a case for Cayman when v4i32 stores are supported. define void @vtx_fetch128(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { diff --git a/test/CodeGen/R600/vop-shrink.ll b/test/CodeGen/R600/vop-shrink.ll index e7f0288..d5a46e3 100644 --- a/test/CodeGen/R600/vop-shrink.ll +++ b/test/CodeGen/R600/vop-shrink.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; Test that we correctly commute a sub instruction ; FUNC-LABEL: {{^}}sub_rev: diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/R600/vselect.ll index e84b8f7..a6152f7 100644 --- a/test/CodeGen/R600/vselect.ll +++ b/test/CodeGen/R600/vselect.ll @@ -1,13 +1,14 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s -;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s +;RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s -;EG-CHECK: {{^}}test_select_v2i32: -;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: {{^}}test_select_v2i32: +;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: {{^}}test_select_v2i32: -;SI-CHECK: v_cndmask_b32_e64 -;SI-CHECK: v_cndmask_b32_e64 +;SI: {{^}}test_select_v2i32: +;SI: v_cndmask_b32_e64 +;SI: v_cndmask_b32_e64 define void @test_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) { entry: @@ -19,13 +20,13 @@ entry: ret void } -;EG-CHECK: {{^}}test_select_v2f32: -;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: {{^}}test_select_v2f32: +;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: {{^}}test_select_v2f32: -;SI-CHECK: v_cndmask_b32_e64 -;SI-CHECK: v_cndmask_b32_e64 +;SI: {{^}}test_select_v2f32: +;SI: v_cndmask_b32_e64 +;SI: v_cndmask_b32_e64 define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in0, <2 x float> addrspace(1)* %in1) { entry: @@ -37,17 +38,17 @@ entry: ret void } -;EG-CHECK: {{^}}test_select_v4i32: -;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: {{^}}test_select_v4i32: +;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: {{^}}test_select_v4i32: -;SI-CHECK: v_cndmask_b32_e64 -;SI-CHECK: v_cndmask_b32_e64 -;SI-CHECK: v_cndmask_b32_e64 -;SI-CHECK: v_cndmask_b32_e64 +;SI: {{^}}test_select_v4i32: +;SI: v_cndmask_b32_e64 +;SI: v_cndmask_b32_e64 +;SI: v_cndmask_b32_e64 +;SI: v_cndmask_b32_e64 define void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) { entry: @@ -59,11 +60,11 @@ entry: ret void } -;EG-CHECK: {{^}}test_select_v4f32: -;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: {{^}}test_select_v4f32: +;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in0, <4 x float> addrspace(1)* %in1) { entry: diff --git a/test/CodeGen/R600/wait.ll b/test/CodeGen/R600/wait.ll index 735eabd..43561aa 100644 --- a/test/CodeGen/R600/wait.ll +++ b/test/CodeGen/R600/wait.ll @@ -1,11 +1,11 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace %s ; CHECK-LABEL: {{^}}main: ; CHECK: s_load_dwordx4 ; CHECK: s_load_dwordx4 -; CHECK: s_waitcnt lgkmcnt(0){{$}} -; CHECK: s_waitcnt vmcnt(0){{$}} -; CHECK: s_waitcnt expcnt(0) lgkmcnt(0){{$}} +; CHECK: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; CHECK: s_endpgm define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 { main_body: %tmp = getelementptr <16 x i8> addrspace(2)* %arg3, i32 0 @@ -41,5 +41,5 @@ attributes #0 = { "ShaderType"="1" } attributes #1 = { noduplicate nounwind } attributes #2 = { nounwind readnone } -!0 = metadata !{metadata !1, metadata !1, i64 0, i32 1} -!1 = metadata !{metadata !"const", null} +!0 = !{!1, !1, i64 0, i32 1} +!1 = !{!"const", null} diff --git a/test/CodeGen/R600/work-item-intrinsics.ll b/test/CodeGen/R600/work-item-intrinsics.ll index 47f65f5..4328e96 100644 --- a/test/CodeGen/R600/work-item-intrinsics.ll +++ b/test/CodeGen/R600/work-item-intrinsics.ll @@ -1,14 +1,15 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}ngroups_x: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] ; EG: MOV [[VAL]], KC0[0].X -; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0 -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[VVAL]] +; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0 +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] define void @ngroups_x (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.ngroups.x() #0 @@ -21,8 +22,9 @@ entry: ; EG: MOV [[VAL]], KC0[0].Y ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1 -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[VVAL]] +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] define void @ngroups_y (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.ngroups.y() #0 @@ -35,8 +37,9 @@ entry: ; EG: MOV [[VAL]], KC0[0].Z ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2 -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[VVAL]] +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] define void @ngroups_z (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.ngroups.z() #0 @@ -49,8 +52,9 @@ entry: ; EG: MOV [[VAL]], KC0[0].W ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3 -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[VVAL]] +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] define void @global_size_x (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.global.size.x() #0 @@ -63,8 +67,9 @@ entry: ; EG: MOV [[VAL]], KC0[1].X ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[VVAL]] +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10 +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] define void @global_size_y (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.global.size.y() #0 @@ -77,8 +82,9 @@ entry: ; EG: MOV [[VAL]], KC0[1].Y ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5 -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[VVAL]] +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14 +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] define void @global_size_z (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.global.size.z() #0 @@ -91,8 +97,9 @@ entry: ; EG: MOV [[VAL]], KC0[1].Z ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6 -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[VVAL]] +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18 +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] define void @local_size_x (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.local.size.x() #0 @@ -105,8 +112,9 @@ entry: ; EG: MOV [[VAL]], KC0[1].W ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7 -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[VVAL]] +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] define void @local_size_y (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.local.size.y() #0 @@ -119,8 +127,9 @@ entry: ; EG: MOV [[VAL]], KC0[2].X ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[VVAL]] +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] define void @local_size_z (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.local.size.z() #0 @@ -133,8 +142,9 @@ entry: ; EG: MOV [[VAL]], KC0[2].Z ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[VVAL]] +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] define void @get_work_dim (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.AMDGPU.read.workdim() #0 @@ -147,8 +157,8 @@ entry: ; kernel arguments, but this may change in the future. ; FUNC-LABEL: {{^}}tgid_x: -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4 -; SI: buffer_store_dword [[VVAL]] +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4 +; GCN: buffer_store_dword [[VVAL]] define void @tgid_x (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tgid.x() #0 @@ -157,8 +167,8 @@ entry: } ; FUNC-LABEL: {{^}}tgid_y: -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5 -; SI: buffer_store_dword [[VVAL]] +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5 +; GCN: buffer_store_dword [[VVAL]] define void @tgid_y (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tgid.y() #0 @@ -167,8 +177,8 @@ entry: } ; FUNC-LABEL: {{^}}tgid_z: -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6 -; SI: buffer_store_dword [[VVAL]] +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6 +; GCN: buffer_store_dword [[VVAL]] define void @tgid_z (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tgid.z() #0 @@ -177,7 +187,7 @@ entry: } ; FUNC-LABEL: {{^}}tidig_x: -; SI: buffer_store_dword v0 +; GCN: buffer_store_dword v0 define void @tidig_x (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tidig.x() #0 @@ -186,7 +196,7 @@ entry: } ; FUNC-LABEL: {{^}}tidig_y: -; SI: buffer_store_dword v1 +; GCN: buffer_store_dword v1 define void @tidig_y (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tidig.y() #0 @@ -195,7 +205,7 @@ entry: } ; FUNC-LABEL: {{^}}tidig_z: -; SI: buffer_store_dword v2 +; GCN: buffer_store_dword v2 define void @tidig_z (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tidig.z() #0 diff --git a/test/CodeGen/R600/wrong-transalu-pos-fix.ll b/test/CodeGen/R600/wrong-transalu-pos-fix.ll index d652d2d..4e77c07 100644 --- a/test/CodeGen/R600/wrong-transalu-pos-fix.ll +++ b/test/CodeGen/R600/wrong-transalu-pos-fix.ll @@ -81,6 +81,6 @@ attributes #1 = { nounwind readnone } !opencl.kernels = !{!0, !1, !2} -!0 = metadata !{null} -!1 = metadata !{null} -!2 = metadata !{void (i32 addrspace(1)*)* @fill3d} +!0 = !{null} +!1 = !{null} +!2 = !{void (i32 addrspace(1)*)* @fill3d} diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/R600/xor.ll index fa54e38..1526e28 100644 --- a/test/CodeGen/R600/xor.ll +++ b/test/CodeGen/R600/xor.ll @@ -1,14 +1,14 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -;EG-CHECK: {{^}}xor_v2i32: -;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: {{^}}xor_v2i32: -;SI-CHECK: v_xor_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_xor_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; FUNC-LABEL: {{^}}xor_v2i32: +; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; SI: v_xor_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_xor_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @xor_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) { %a = load <2 x i32> addrspace(1) * %in0 @@ -18,17 +18,16 @@ define void @xor_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in ret void } -;EG-CHECK: {{^}}xor_v4i32: -;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; FUNC-LABEL: {{^}}xor_v4i32: +; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: {{^}}xor_v4i32: -;SI-CHECK: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}} define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) { %a = load <4 x i32> addrspace(1) * %in0 @@ -38,25 +37,42 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in ret void } -;EG-CHECK: {{^}}xor_i1: -;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}} - -;SI-CHECK: {{^}}xor_i1: -;SI-CHECK: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; FUNC-LABEL: {{^}}xor_i1: +; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}} +; SI-DAG: v_cmp_ge_f32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 0 +; SI-DAG: v_cmp_ge_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 1.0 +; SI: s_xor_b64 [[XOR:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]] +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[XOR]] +; SI: buffer_store_dword [[RESULT]] +; SI: s_endpgm define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) { %a = load float addrspace(1) * %in0 %b = load float addrspace(1) * %in1 %acmp = fcmp oge float %a, 0.000000e+00 - %bcmp = fcmp oge float %b, 0.000000e+00 + %bcmp = fcmp oge float %b, 1.000000e+00 %xor = xor i1 %acmp, %bcmp %result = select i1 %xor, float %a, float %b store float %result, float addrspace(1)* %out ret void } -; SI-CHECK-LABEL: {{^}}vector_xor_i32: -; SI-CHECK: v_xor_b32_e32 +; FUNC-LABEL: {{^}}v_xor_i1: +; SI: buffer_load_ubyte [[B:v[0-9]+]] +; SI: buffer_load_ubyte [[A:v[0-9]+]] +; SI: v_xor_b32_e32 [[XOR:v[0-9]+]], [[A]], [[B]] +; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[XOR]] +; SI: buffer_store_byte [[RESULT]] +define void @v_xor_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in0, i1 addrspace(1)* %in1) { + %a = load i1 addrspace(1)* %in0 + %b = load i1 addrspace(1)* %in1 + %xor = xor i1 %a, %b + store i1 %xor, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}vector_xor_i32: +; SI: v_xor_b32_e32 define void @vector_xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) { %a = load i32 addrspace(1)* %in0 %b = load i32 addrspace(1)* %in1 @@ -65,24 +81,24 @@ define void @vector_xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 ret void } -; SI-CHECK-LABEL: {{^}}scalar_xor_i32: -; SI-CHECK: s_xor_b32 +; FUNC-LABEL: {{^}}scalar_xor_i32: +; SI: s_xor_b32 define void @scalar_xor_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { %result = xor i32 %a, %b store i32 %result, i32 addrspace(1)* %out ret void } -; SI-CHECK-LABEL: {{^}}scalar_not_i32: -; SI-CHECK: s_not_b32 +; FUNC-LABEL: {{^}}scalar_not_i32: +; SI: s_not_b32 define void @scalar_not_i32(i32 addrspace(1)* %out, i32 %a) { %result = xor i32 %a, -1 store i32 %result, i32 addrspace(1)* %out ret void } -; SI-CHECK-LABEL: {{^}}vector_not_i32: -; SI-CHECK: v_not_b32 +; FUNC-LABEL: {{^}}vector_not_i32: +; SI: v_not_b32 define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) { %a = load i32 addrspace(1)* %in0 %b = load i32 addrspace(1)* %in1 @@ -91,10 +107,10 @@ define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 ret void } -; SI-CHECK-LABEL: {{^}}vector_xor_i64: -; SI-CHECK: v_xor_b32_e32 -; SI-CHECK: v_xor_b32_e32 -; SI-CHECK: s_endpgm +; FUNC-LABEL: {{^}}vector_xor_i64: +; SI: v_xor_b32_e32 +; SI: v_xor_b32_e32 +; SI: s_endpgm define void @vector_xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) { %a = load i64 addrspace(1)* %in0 %b = load i64 addrspace(1)* %in1 @@ -103,26 +119,26 @@ define void @vector_xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 ret void } -; SI-CHECK-LABEL: {{^}}scalar_xor_i64: -; SI-CHECK: s_xor_b64 -; SI-CHECK: s_endpgm +; FUNC-LABEL: {{^}}scalar_xor_i64: +; SI: s_xor_b64 +; SI: s_endpgm define void @scalar_xor_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { %result = xor i64 %a, %b store i64 %result, i64 addrspace(1)* %out ret void } -; SI-CHECK-LABEL: {{^}}scalar_not_i64: -; SI-CHECK: s_not_b64 +; FUNC-LABEL: {{^}}scalar_not_i64: +; SI: s_not_b64 define void @scalar_not_i64(i64 addrspace(1)* %out, i64 %a) { %result = xor i64 %a, -1 store i64 %result, i64 addrspace(1)* %out ret void } -; SI-CHECK-LABEL: {{^}}vector_not_i64: -; SI-CHECK: v_not_b32 -; SI-CHECK: v_not_b32 +; FUNC-LABEL: {{^}}vector_not_i64: +; SI: v_not_b32 +; SI: v_not_b32 define void @vector_not_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) { %a = load i64 addrspace(1)* %in0 %b = load i64 addrspace(1)* %in1 @@ -135,8 +151,8 @@ define void @vector_not_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 ; Note that in the future the backend may be smart enough to ; use an SALU instruction for this. -; SI-CHECK-LABEL: {{^}}xor_cf: -; SI-CHECK: s_xor_b64 +; FUNC-LABEL: {{^}}xor_cf: +; SI: s_xor_b64 define void @xor_cf(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b) { entry: %0 = icmp eq i64 %a, 0 diff --git a/test/CodeGen/R600/zero_extend.ll b/test/CodeGen/R600/zero_extend.ll index 0fe1f15..033055d 100644 --- a/test/CodeGen/R600/zero_extend.ll +++ b/test/CodeGen/R600/zero_extend.ll @@ -1,14 +1,15 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=SI -; R600-CHECK: {{^}}test: -; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW -; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW +; R600: {{^}}test: +; R600: MEM_RAT_CACHELESS STORE_RAW +; R600: MEM_RAT_CACHELESS STORE_RAW -; SI-CHECK: {{^}}test: -; SI-CHECK: s_mov_b32 [[ZERO:s[0-9]]], 0{{$}} -; SI-CHECK: v_mov_b32_e32 v[[V_ZERO:[0-9]]], [[ZERO]] -; SI-CHECK: buffer_store_dwordx2 v[0:[[V_ZERO]]{{\]}} +; SI: {{^}}test: +; SI: s_mov_b32 [[ZERO:s[0-9]]], 0{{$}} +; SI: v_mov_b32_e32 v[[V_ZERO:[0-9]]], [[ZERO]] +; SI: buffer_store_dwordx2 v[0:[[V_ZERO]]{{\]}} define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { entry: %0 = mul i32 %a, %b @@ -18,8 +19,8 @@ entry: ret void } -; SI-CHECK-LABEL: {{^}}testi1toi32: -; SI-CHECK: v_cndmask_b32 +; SI-LABEL: {{^}}testi1toi32: +; SI: v_cndmask_b32 define void @testi1toi32(i32 addrspace(1)* %out, i32 %a, i32 %b) { entry: %0 = icmp eq i32 %a, %b @@ -28,10 +29,10 @@ entry: ret void } -; SI-CHECK-LABEL: {{^}}zext_i1_to_i64: -; SI-CHECK: v_cmp_eq_i32 -; SI-CHECK: v_cndmask_b32 -; SI-CHECK: s_mov_b32 s{{[0-9]+}}, 0 +; SI-LABEL: {{^}}zext_i1_to_i64: +; SI: s_mov_b32 s{{[0-9]+}}, 0 +; SI: v_cmp_eq_i32 +; SI: v_cndmask_b32 define void @zext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %cmp = icmp eq i32 %a, %b %ext = zext i1 %cmp to i64 |