diff options
Diffstat (limited to 'test/CodeGen/R600')
99 files changed, 5427 insertions, 442 deletions
diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/R600/add_i64.ll index c9eaeda..f733d90 100644 --- a/test/CodeGen/R600/add_i64.ll +++ b/test/CodeGen/R600/add_i64.ll @@ -70,9 +70,9 @@ define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> add } ; SI-LABEL: @trunc_i64_add_to_i32 -; SI: S_LOAD_DWORD [[SREG0:s[0-9]+]], -; SI: S_LOAD_DWORD [[SREG1:s[0-9]+]], -; SI: S_ADD_I32 [[SRESULT:s[0-9]+]], [[SREG1]], [[SREG0]] +; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG0:[0-9]+]] +; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG1:[0-9]+]] +; SI: S_ADD_I32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]] ; SI-NOT: ADDC ; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] ; SI: BUFFER_STORE_DWORD [[VRESULT]], diff --git a/test/CodeGen/R600/and.ll b/test/CodeGen/R600/and.ll index ee9bc83..cf11481 100644 --- a/test/CodeGen/R600/and.ll +++ b/test/CodeGen/R600/and.ll @@ -1,13 +1,12 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -;EG-CHECK: @test2 -;EG-CHECK: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; FUNC-LABEL: @test2 +; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: @test2 -;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 @@ -18,17 +17,16 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { ret void } -;EG-CHECK: @test4 -;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; FUNC-LABEL: @test4 +; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: @test4 -;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 @@ -38,3 +36,75 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { store <4 x i32> %result, <4 x i32> addrspace(1)* %out ret void } + +; FUNC-LABEL: @s_and_i32 +; SI: S_AND_B32 +define void @s_and_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %and = and i32 %a, %b + store i32 %and, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @s_and_constant_i32 +; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687 +define void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) { + %and = and i32 %a, 1234567 + store i32 %and, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_and_i32 +; SI: V_AND_B32 +define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) { + %a = load i32 addrspace(1)* %aptr, align 4 + %b = load i32 addrspace(1)* %bptr, align 4 + %and = and i32 %a, %b + store i32 %and, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_and_constant_i32 +; SI: V_AND_B32 +define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) { + %a = load i32 addrspace(1)* %aptr, align 4 + %and = and i32 %a, 1234567 + store i32 %and, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @s_and_i64 +; SI: S_AND_B64 +define void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { + %and = and i64 %a, %b + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @s_and_constant_i64 +; SI: S_AND_B64 +define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) { + %and = and i64 %a, 281474976710655 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @v_and_i64 +; SI: V_AND_B32 +; SI: V_AND_B32 +define void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) { + %a = load i64 addrspace(1)* %aptr, align 8 + %b = load i64 addrspace(1)* %bptr, align 8 + %and = and i64 %a, %b + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @v_and_constant_i64 +; SI: V_AND_B32 +; SI: V_AND_B32 +define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { + %a = load i64 addrspace(1)* %aptr, align 8 + %and = and i64 %a, 1234567 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/array-ptr-calc-i32.ll b/test/CodeGen/R600/array-ptr-calc-i32.ll index c2362da..3230353 100644 --- a/test/CodeGen/R600/array-ptr-calc-i32.ll +++ b/test/CodeGen/R600/array-ptr-calc-i32.ll @@ -10,7 +10,12 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate ; SI-LABEL: @test_private_array_ptr_calc: ; SI: V_ADD_I32_e32 [[PTRREG:v[0-9]+]] -; SI: V_MOVRELD_B32_e32 {{v[0-9]+}}, [[PTRREG]] +; +; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this +; alloca to a vector. It currently fails because it does not know how +; to interpret: +; getelementptr [4 x i32]* %alloca, i32 1, i32 %b +; SI: DS_WRITE_B32 {{v[0-9]+}}, [[PTRREG]] define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) { %alloca = alloca [4 x i32], i32 4, align 16 %tid = call i32 @llvm.SI.tid() readnone diff --git a/test/CodeGen/R600/atomic_cmp_swap_local.ll b/test/CodeGen/R600/atomic_cmp_swap_local.ll new file mode 100644 index 0000000..eb9539e --- /dev/null +++ b/test/CodeGen/R600/atomic_cmp_swap_local.ll @@ -0,0 +1,37 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_offset: +; SI: S_LOAD_DWORD [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc +; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: V_MOV_B32_e32 [[VCMP:v[0-9]+]], 7 +; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; SI-DAG: V_MOV_B32_e32 [[VSWAP:v[0-9]+]], [[SWAP]] +; SI: DS_CMPST_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]], 0x10, [M0] +; SI: S_ENDPGM +define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind { + %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 + %pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic + %result = extractvalue { i32, i1 } %pair, 0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i64_offset: +; SI: S_LOAD_DWORDX2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd +; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI: S_MOV_B64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7 +; SI-DAG: V_MOV_B32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]] +; SI-DAG: V_MOV_B32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]] +; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; SI-DAG: V_MOV_B32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]] +; SI-DAG: V_MOV_B32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]] +; SI: DS_CMPST_RTN_B64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}}, 0x20, [M0] +; SI: BUFFER_STORE_DWORDX2 [[RESULT]], +; SI: S_ENDPGM +define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind { + %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 + %pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic + %result = extractvalue { i64, i1 } %pair, 0 + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/atomic_load_add.ll b/test/CodeGen/R600/atomic_load_add.ll index cb0242c..c26f9cd 100644 --- a/test/CodeGen/R600/atomic_load_add.ll +++ b/test/CodeGen/R600/atomic_load_add.ll @@ -1,23 +1,38 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s -; R600-CHECK-LABEL: @atomic_add_local -; R600-CHECK: LDS_ADD * -; SI-CHECK-LABEL: @atomic_add_local -; SI-CHECK: DS_ADD_U32_RTN +; FUNC-LABEL: @atomic_add_local +; R600: LDS_ADD * +; SI: DS_ADD_RTN_U32 define void @atomic_add_local(i32 addrspace(3)* %local) { -entry: - %0 = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst + %unused = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst ret void } -; R600-CHECK-LABEL: @atomic_add_ret_local -; R600-CHECK: LDS_ADD_RET * -; SI-CHECK-LABEL: @atomic_add_ret_local -; SI-CHECK: DS_ADD_U32_RTN +; FUNC-LABEL: @atomic_add_local_const_offset +; R600: LDS_ADD * +; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +define void @atomic_add_local_const_offset(i32 addrspace(3)* %local) { + %gep = getelementptr i32 addrspace(3)* %local, i32 4 + %val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst + ret void +} + +; FUNC-LABEL: @atomic_add_ret_local +; R600: LDS_ADD_RET * +; SI: DS_ADD_RTN_U32 define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) { -entry: - %0 = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst - store i32 %0, i32 addrspace(1)* %out + %val = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @atomic_add_ret_local_const_offset +; R600: LDS_ADD_RET * +; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x14 +define void @atomic_add_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) { + %gep = getelementptr i32 addrspace(3)* %local, i32 5 + %val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst + store i32 %val, i32 addrspace(1)* %out ret void } diff --git a/test/CodeGen/R600/atomic_load_sub.ll b/test/CodeGen/R600/atomic_load_sub.ll index 7c26e52..3569d91 100644 --- a/test/CodeGen/R600/atomic_load_sub.ll +++ b/test/CodeGen/R600/atomic_load_sub.ll @@ -1,23 +1,38 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; R600-CHECK-LABEL: @atomic_sub_local -; R600-CHECK: LDS_SUB * -; SI-CHECK-LABEL: @atomic_sub_local -; SI-CHECK: DS_SUB_U32_RTN +; FUNC-LABEL: @atomic_sub_local +; R600: LDS_SUB * +; SI: DS_SUB_RTN_U32 define void @atomic_sub_local(i32 addrspace(3)* %local) { -entry: - %0 = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst + %unused = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst ret void } -; R600-CHECK-LABEL: @atomic_sub_ret_local -; R600-CHECK: LDS_SUB_RET * -; SI-CHECK-LABEL: @atomic_sub_ret_local -; SI-CHECK: DS_SUB_U32_RTN +; FUNC-LABEL: @atomic_sub_local_const_offset +; R600: LDS_SUB * +; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +define void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) { + %gep = getelementptr i32 addrspace(3)* %local, i32 4 + %val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst + ret void +} + +; FUNC-LABEL: @atomic_sub_ret_local +; R600: LDS_SUB_RET * +; SI: DS_SUB_RTN_U32 define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) { -entry: - %0 = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst - store i32 %0, i32 addrspace(1)* %out + %val = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @atomic_sub_ret_local_const_offset +; R600: LDS_SUB_RET * +; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x14 +define void @atomic_sub_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) { + %gep = getelementptr i32 addrspace(3)* %local, i32 5 + %val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst + store i32 %val, i32 addrspace(1)* %out ret void } diff --git a/test/CodeGen/R600/big_alu.ll b/test/CodeGen/R600/big_alu.ll index 6b68376..511e8ef 100644 --- a/test/CodeGen/R600/big_alu.ll +++ b/test/CodeGen/R600/big_alu.ll @@ -101,7 +101,7 @@ IF137: ; preds = %main_body %88 = insertelement <4 x float> %87, float %32, i32 2 %89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3 %90 = call float @llvm.AMDGPU.dp4(<4 x float> %85, <4 x float> %89) - %91 = call float @llvm.AMDGPU.rsq(float %90) + %91 = call float @llvm.AMDGPU.rsq.f32(float %90) %92 = fmul float %30, %91 %93 = fmul float %31, %91 %94 = fmul float %32, %91 @@ -344,7 +344,7 @@ ENDIF136: ; preds = %main_body, %ENDIF15 %325 = insertelement <4 x float> %324, float %318, i32 2 %326 = insertelement <4 x float> %325, float 0.000000e+00, i32 3 %327 = call float @llvm.AMDGPU.dp4(<4 x float> %322, <4 x float> %326) - %328 = call float @llvm.AMDGPU.rsq(float %327) + %328 = call float @llvm.AMDGPU.rsq.f32(float %327) %329 = fmul float %314, %328 %330 = fmul float %316, %328 %331 = fmul float %318, %328 @@ -377,7 +377,7 @@ ENDIF136: ; preds = %main_body, %ENDIF15 %358 = insertelement <4 x float> %357, float %45, i32 2 %359 = insertelement <4 x float> %358, float 0.000000e+00, i32 3 %360 = call float @llvm.AMDGPU.dp4(<4 x float> %355, <4 x float> %359) - %361 = call float @llvm.AMDGPU.rsq(float %360) + %361 = call float @llvm.AMDGPU.rsq.f32(float %360) %362 = fmul float %45, %361 %363 = call float @fabs(float %362) %364 = fmul float %176, 0x3FECCCCCC0000000 @@ -403,7 +403,7 @@ ENDIF136: ; preds = %main_body, %ENDIF15 %384 = insertelement <4 x float> %383, float %45, i32 2 %385 = insertelement <4 x float> %384, float 0.000000e+00, i32 3 %386 = call float @llvm.AMDGPU.dp4(<4 x float> %381, <4 x float> %385) - %387 = call float @llvm.AMDGPU.rsq(float %386) + %387 = call float @llvm.AMDGPU.rsq.f32(float %386) %388 = fmul float %45, %387 %389 = call float @fabs(float %388) %390 = fmul float %176, 0x3FF51EB860000000 @@ -1041,7 +1041,7 @@ IF179: ; preds = %ENDIF175 %896 = insertelement <4 x float> %895, float %45, i32 2 %897 = insertelement <4 x float> %896, float 0.000000e+00, i32 3 %898 = call float @llvm.AMDGPU.dp4(<4 x float> %893, <4 x float> %897) - %899 = call float @llvm.AMDGPU.rsq(float %898) + %899 = call float @llvm.AMDGPU.rsq.f32(float %898) %900 = fmul float %45, %899 %901 = call float @fabs(float %900) %902 = fmul float %176, 0x3FECCCCCC0000000 @@ -1150,7 +1150,7 @@ ENDIF178: ; preds = %ENDIF175, %IF179 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 ; Function Attrs: readnone -declare float @llvm.AMDGPU.rsq(float) #1 +declare float @llvm.AMDGPU.rsq.f32(float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #1 diff --git a/test/CodeGen/R600/bitcast.ll b/test/CodeGen/R600/bitcast.ll index 5bfc008..0be79e6 100644 --- a/test/CodeGen/R600/bitcast.ll +++ b/test/CodeGen/R600/bitcast.ll @@ -1,9 +1,11 @@ -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; This test just checks that the compiler doesn't crash. -; CHECK-LABEL: @v32i8_to_v8i32 -; CHECK: S_ENDPGM +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +; FUNC-LABEL: @v32i8_to_v8i32 +; SI: S_ENDPGM define void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 { entry: %1 = load <32 x i8> addrspace(2)* %0 @@ -15,12 +17,8 @@ entry: ret void } -declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) - -attributes #0 = { "ShaderType"="0" } - -; CHECK-LABEL: @i8ptr_v16i8ptr -; CHECK: S_ENDPGM +; FUNC-LABEL: @i8ptr_v16i8ptr +; SI: S_ENDPGM define void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) { entry: %0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)* @@ -28,3 +26,53 @@ entry: store <16 x i8> %1, <16 x i8> addrspace(1)* %out ret void } + +define void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind { + %load = load float addrspace(1)* %in, align 4 + %bc = bitcast float %load to <2 x i16> + store <2 x i16> %bc, <2 x i16> addrspace(1)* %out, align 4 + ret void +} + +define void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind { + %load = load <2 x i16> addrspace(1)* %in, align 4 + %bc = bitcast <2 x i16> %load to float + store float %bc, float addrspace(1)* %out, align 4 + ret void +} + +define void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind { + %load = load <4 x i8> addrspace(1)* %in, align 4 + %bc = bitcast <4 x i8> %load to i32 + store i32 %bc, i32 addrspace(1)* %out, align 4 + ret void +} + +define void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %bc = bitcast i32 %load to <4 x i8> + store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bitcast_v2i32_to_f64 +; SI: S_ENDPGM +define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %val = load <2 x i32> addrspace(1)* %in, align 8 + %add = add <2 x i32> %val, <i32 4, i32 9> + %bc = bitcast <2 x i32> %add to double + store double %bc, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @bitcast_f64_to_v2i32 +; SI: S_ENDPGM +define void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) { + %val = load double addrspace(1)* %in, align 8 + %add = fadd double %val, 4.0 + %bc = bitcast double %add to <2 x i32> + store <2 x i32> %bc, <2 x i32> addrspace(1)* %out, align 8 + ret void +} + +attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/bswap.ll b/test/CodeGen/R600/bswap.ll new file mode 100644 index 0000000..6aebe85 --- /dev/null +++ b/test/CodeGen/R600/bswap.ll @@ -0,0 +1,50 @@ +; RUN: llc -march=r600 -mcpu=SI < %s + +declare i32 @llvm.bswap.i32(i32) nounwind readnone +declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) nounwind readnone +declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone +declare i64 @llvm.bswap.i64(i64) nounwind readnone +declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) nounwind readnone +declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) nounwind readnone + +define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %val = load i32 addrspace(1)* %in, align 4 + %bswap = call i32 @llvm.bswap.i32(i32 %val) nounwind readnone + store i32 %bswap, i32 addrspace(1)* %out, align 4 + ret void +} + +define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind { + %val = load <2 x i32> addrspace(1)* %in, align 8 + %bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %val) nounwind readnone + store <2 x i32> %bswap, <2 x i32> addrspace(1)* %out, align 8 + ret void +} + +define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) nounwind { + %val = load <4 x i32> addrspace(1)* %in, align 16 + %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) nounwind readnone + store <4 x i32> %bswap, <4 x i32> addrspace(1)* %out, align 16 + ret void +} + +define void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind { + %val = load i64 addrspace(1)* %in, align 8 + %bswap = call i64 @llvm.bswap.i64(i64 %val) nounwind readnone + store i64 %bswap, i64 addrspace(1)* %out, align 8 + ret void +} + +define void @test_bswap_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) nounwind { + %val = load <2 x i64> addrspace(1)* %in, align 16 + %bswap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val) nounwind readnone + store <2 x i64> %bswap, <2 x i64> addrspace(1)* %out, align 16 + ret void +} + +define void @test_bswap_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) nounwind { + %val = load <4 x i64> addrspace(1)* %in, align 32 + %bswap = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %val) nounwind readnone + store <4 x i64> %bswap, <4 x i64> addrspace(1)* %out, align 32 + ret void +} diff --git a/test/CodeGen/R600/ctlz_zero_undef.ll b/test/CodeGen/R600/ctlz_zero_undef.ll new file mode 100644 index 0000000..15b5188 --- /dev/null +++ b/test/CodeGen/R600/ctlz_zero_undef.ll @@ -0,0 +1,57 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone +declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone +declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone + +; FUNC-LABEL: @s_ctlz_zero_undef_i32: +; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], +; SI: S_FLBIT_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]] +; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] +; SI: BUFFER_STORE_DWORD [[VRESULT]], +; SI: S_ENDPGM +define void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { + %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone + store i32 %ctlz, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_ctlz_zero_undef_i32: +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], +; SI: V_FFBH_U32_e32 [[RESULT:v[0-9]+]], [[VAL]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { + %val = load i32 addrspace(1)* %valptr, align 4 + %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone + store i32 %ctlz, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_ctlz_zero_undef_v2i32: +; SI: BUFFER_LOAD_DWORDX2 +; SI: V_FFBH_U32_e32 +; SI: V_FFBH_U32_e32 +; SI: BUFFER_STORE_DWORDX2 +; SI: S_ENDPGM +define void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind { + %val = load <2 x i32> addrspace(1)* %valptr, align 8 + %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 true) nounwind readnone + store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @v_ctlz_zero_undef_v4i32: +; SI: BUFFER_LOAD_DWORDX4 +; SI: V_FFBH_U32_e32 +; SI: V_FFBH_U32_e32 +; SI: V_FFBH_U32_e32 +; SI: V_FFBH_U32_e32 +; SI: BUFFER_STORE_DWORDX4 +; SI: S_ENDPGM +define void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind { + %val = load <4 x i32> addrspace(1)* %valptr, align 16 + %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 true) nounwind readnone + store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16 + ret void +} diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll new file mode 100644 index 0000000..15be8e1 --- /dev/null +++ b/test/CodeGen/R600/ctpop.ll @@ -0,0 +1,284 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +declare i32 @llvm.ctpop.i32(i32) nounwind readnone +declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone +declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone +declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone +declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone + +; FUNC-LABEL: @s_ctpop_i32: +; SI: S_LOAD_DWORD [[SVAL:s[0-9]+]], +; SI: S_BCNT1_I32_B32 [[SRESULT:s[0-9]+]], [[SVAL]] +; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] +; SI: BUFFER_STORE_DWORD [[VRESULT]], +; SI: S_ENDPGM + +; EG: BCNT_INT +define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { + %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone + store i32 %ctpop, i32 addrspace(1)* %out, align 4 + ret void +} + +; XXX - Why 0 in register? +; FUNC-LABEL: @v_ctpop_i32: +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], +; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0 +; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM + +; EG: BCNT_INT +define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { + %val = load i32 addrspace(1)* %in, align 4 + %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone + store i32 %ctpop, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_ctpop_add_chain_i32 +; SI: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], +; SI: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], +; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0 +; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]] +; SI-NOT: ADD +; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM + +; EG: BCNT_INT +; EG: BCNT_INT +define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind { + %val0 = load i32 addrspace(1)* %in0, align 4 + %val1 = load i32 addrspace(1)* %in1, align 4 + %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone + %ctpop1 = call i32 @llvm.ctpop.i32(i32 %val1) nounwind readnone + %add = add i32 %ctpop0, %ctpop1 + store i32 %add, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_ctpop_v2i32: +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: S_ENDPGM + +; EG: BCNT_INT +; EG: BCNT_INT +define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind { + %val = load <2 x i32> addrspace(1)* %in, align 8 + %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone + store <2 x i32> %ctpop, <2 x i32> addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @v_ctpop_v4i32: +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: S_ENDPGM + +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind { + %val = load <4 x i32> addrspace(1)* %in, align 16 + %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone + store <4 x i32> %ctpop, <4 x i32> addrspace(1)* %out, align 16 + ret void +} + +; FUNC-LABEL: @v_ctpop_v8i32: +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: S_ENDPGM + +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind { + %val = load <8 x i32> addrspace(1)* %in, align 32 + %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone + store <8 x i32> %ctpop, <8 x i32> addrspace(1)* %out, align 32 + ret void +} + +; FUNC-LABEL: @v_ctpop_v16i32: +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: V_BCNT_U32_B32_e32 +; SI: S_ENDPGM + +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind { + %val = load <16 x i32> addrspace(1)* %in, align 32 + %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone + store <16 x i32> %ctpop, <16 x i32> addrspace(1)* %out, align 32 + ret void +} + +; FUNC-LABEL: @v_ctpop_i32_add_inline_constant: +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], +; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM + +; EG: BCNT_INT +define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { + %val = load i32 addrspace(1)* %in, align 4 + %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone + %add = add i32 %ctpop, 4 + store i32 %add, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_ctpop_i32_add_inline_constant_inv: +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], +; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM + +; EG: BCNT_INT +define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { + %val = load i32 addrspace(1)* %in, align 4 + %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone + %add = add i32 4, %ctpop + store i32 %add, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_ctpop_i32_add_literal: +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], +; SI: V_MOV_B32_e32 [[LIT:v[0-9]+]], 0x1869f +; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { + %val = load i32 addrspace(1)* %in, align 4 + %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone + %add = add i32 %ctpop, 99999 + store i32 %add, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_ctpop_i32_add_var: +; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], +; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]], +; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM + +; EG: BCNT_INT +define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { + %val = load i32 addrspace(1)* %in, align 4 + %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone + %add = add i32 %ctpop, %const + store i32 %add, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_ctpop_i32_add_var_inv: +; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], +; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]], +; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM + +; EG: BCNT_INT +define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { + %val = load i32 addrspace(1)* %in, align 4 + %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone + %add = add i32 %const, %ctpop + store i32 %add, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_ctpop_i32_add_vvar_inv +; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], {{.*}} + 0x0 +; SI-DAG: BUFFER_LOAD_DWORD [[VAR:v[0-9]+]], {{.*}} + 0x10 +; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM + +; EG: BCNT_INT +define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind { + %val = load i32 addrspace(1)* %in, align 4 + %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone + %gep = getelementptr i32 addrspace(1)* %constptr, i32 4 + %const = load i32 addrspace(1)* %gep, align 4 + %add = add i32 %const, %ctpop + store i32 %add, i32 addrspace(1)* %out, align 4 + ret void +} + +; FIXME: We currently disallow SALU instructions in all branches, +; but there are some cases when the should be allowed. + +; FUNC-LABEL: @ctpop_i32_in_br +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], +; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0 +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +; EG: BCNT_INT +define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) { +entry: + %0 = icmp eq i32 %cond, 0 + br i1 %0, label %if, label %else + +if: + %1 = load i32 addrspace(1)* %in + %2 = call i32 @llvm.ctpop.i32(i32 %1) + br label %endif + +else: + %3 = getelementptr i32 addrspace(1)* %in, i32 1 + %4 = load i32 addrspace(1)* %3 + br label %endif + +endif: + %5 = phi i32 [%2, %if], [%4, %else] + store i32 %5, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/ctpop64.ll b/test/CodeGen/R600/ctpop64.ll new file mode 100644 index 0000000..b36ecc6 --- /dev/null +++ b/test/CodeGen/R600/ctpop64.ll @@ -0,0 +1,122 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare i64 @llvm.ctpop.i64(i64) nounwind readnone +declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone +declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) nounwind readnone +declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>) nounwind readnone +declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>) nounwind readnone + +; FUNC-LABEL: @s_ctpop_i64: +; SI: S_LOAD_DWORDX2 [[SVAL:s\[[0-9]+:[0-9]+\]]], +; SI: S_BCNT1_I32_B64 [[SRESULT:s[0-9]+]], [[SVAL]] +; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] +; SI: BUFFER_STORE_DWORD [[VRESULT]], +; SI: S_ENDPGM +define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind { + %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone + %truncctpop = trunc i64 %ctpop to i32 + store i32 %truncctpop, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_ctpop_i64: +; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, +; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0 +; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], [[VZERO]] +; SI-NEXT: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { + %val = load i64 addrspace(1)* %in, align 8 + %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone + %truncctpop = trunc i64 %ctpop to i32 + store i32 %truncctpop, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @s_ctpop_v2i64: +; SI: S_BCNT1_I32_B64 +; SI: S_BCNT1_I32_B64 +; SI: S_ENDPGM +define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val) nounwind { + %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone + %truncctpop = trunc <2 x i64> %ctpop to <2 x i32> + store <2 x i32> %truncctpop, <2 x i32> addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @s_ctpop_v4i64: +; SI: S_BCNT1_I32_B64 +; SI: S_BCNT1_I32_B64 +; SI: S_BCNT1_I32_B64 +; SI: S_BCNT1_I32_B64 +; SI: S_ENDPGM +define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val) nounwind { + %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone + %truncctpop = trunc <4 x i64> %ctpop to <4 x i32> + store <4 x i32> %truncctpop, <4 x i32> addrspace(1)* %out, align 16 + ret void +} + +; FUNC-LABEL: @v_ctpop_v2i64: +; SI: V_BCNT_U32_B32 +; SI: V_BCNT_U32_B32 +; SI: V_BCNT_U32_B32 +; SI: V_BCNT_U32_B32 +; SI: S_ENDPGM +define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind { + %val = load <2 x i64> addrspace(1)* %in, align 16 + %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone + %truncctpop = trunc <2 x i64> %ctpop to <2 x i32> + store <2 x i32> %truncctpop, <2 x i32> addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @v_ctpop_v4i64: +; SI: V_BCNT_U32_B32 +; SI: V_BCNT_U32_B32 +; SI: V_BCNT_U32_B32 +; SI: V_BCNT_U32_B32 +; SI: V_BCNT_U32_B32 +; SI: V_BCNT_U32_B32 +; SI: V_BCNT_U32_B32 +; SI: V_BCNT_U32_B32 +; SI: S_ENDPGM +define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind { + %val = load <4 x i64> addrspace(1)* %in, align 32 + %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone + %truncctpop = trunc <4 x i64> %ctpop to <4 x i32> + store <4 x i32> %truncctpop, <4 x i32> addrspace(1)* %out, align 16 + ret void +} + +; FIXME: We currently disallow SALU instructions in all branches, +; but there are some cases when the should be allowed. + +; FUNC-LABEL: @ctpop_i64_in_br +; SI: V_BCNT_U32_B32_e64 [[BCNT_LO:v[0-9]+]], v{{[0-9]+}}, 0 +; SI: V_BCNT_U32_B32_e32 v[[BCNT:[0-9]+]], v{{[0-9]+}}, [[BCNT_LO]] +; SI: V_MOV_B32_e32 v[[ZERO:[0-9]+]], 0 +; SI: BUFFER_STORE_DWORDX2 v[ +; SI: [[BCNT]]:[[ZERO]]] +; SI: S_ENDPGM +define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i32 %cond) { +entry: + %0 = icmp eq i32 %cond, 0 + br i1 %0, label %if, label %else + +if: + %1 = load i64 addrspace(1)* %in + %2 = call i64 @llvm.ctpop.i64(i64 %1) + br label %endif + +else: + %3 = getelementptr i64 addrspace(1)* %in, i32 1 + %4 = load i64 addrspace(1)* %3 + br label %endif + +endif: + %5 = phi i64 [%2, %if], [%4, %else] + store i64 %5, i64 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/cttz_zero_undef.ll b/test/CodeGen/R600/cttz_zero_undef.ll new file mode 100644 index 0000000..cf44f8e --- /dev/null +++ b/test/CodeGen/R600/cttz_zero_undef.ll @@ -0,0 +1,57 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone +declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone +declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone + +; FUNC-LABEL: @s_cttz_zero_undef_i32: +; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], +; SI: S_FF1_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]] +; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] +; SI: BUFFER_STORE_DWORD [[VRESULT]], +; SI: S_ENDPGM +define void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { + %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone + store i32 %cttz, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_cttz_zero_undef_i32: +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], +; SI: V_FFBL_B32_e32 [[RESULT:v[0-9]+]], [[VAL]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { + %val = load i32 addrspace(1)* %valptr, align 4 + %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone + store i32 %cttz, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_cttz_zero_undef_v2i32: +; SI: BUFFER_LOAD_DWORDX2 +; SI: V_FFBL_B32_e32 +; SI: V_FFBL_B32_e32 +; SI: BUFFER_STORE_DWORDX2 +; SI: S_ENDPGM +define void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind { + %val = load <2 x i32> addrspace(1)* %valptr, align 8 + %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 true) nounwind readnone + store <2 x i32> %cttz, <2 x i32> addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @v_cttz_zero_undef_v4i32: +; SI: BUFFER_LOAD_DWORDX4 +; SI: V_FFBL_B32_e32 +; SI: V_FFBL_B32_e32 +; SI: V_FFBL_B32_e32 +; SI: V_FFBL_B32_e32 +; SI: BUFFER_STORE_DWORDX4 +; SI: S_ENDPGM +define void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind { + %val = load <4 x i32> addrspace(1)* %valptr, align 16 + %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 true) nounwind readnone + store <4 x i32> %cttz, <4 x i32> addrspace(1)* %out, align 16 + ret void +} diff --git a/test/CodeGen/R600/cvt_f32_ubyte.ll b/test/CodeGen/R600/cvt_f32_ubyte.ll new file mode 100644 index 0000000..fe97a44 --- /dev/null +++ b/test/CodeGen/R600/cvt_f32_ubyte.ll @@ -0,0 +1,171 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +; SI-LABEL: @load_i8_to_f32: +; SI: BUFFER_LOAD_UBYTE [[LOADREG:v[0-9]+]], +; SI-NOT: BFE +; SI-NOT: LSHR +; SI: V_CVT_F32_UBYTE0_e32 [[CONV:v[0-9]+]], [[LOADREG]] +; SI: BUFFER_STORE_DWORD [[CONV]], +define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind { + %load = load i8 addrspace(1)* %in, align 1 + %cvt = uitofp i8 %load to float + store float %cvt, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @load_v2i8_to_v2f32: +; SI: BUFFER_LOAD_USHORT [[LOADREG:v[0-9]+]], +; SI-NOT: BFE +; SI-NOT: LSHR +; SI-NOT: AND +; SI-DAG: V_CVT_F32_UBYTE1_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]] +; SI-DAG: V_CVT_F32_UBYTE0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]] +; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}}, +define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind { + %load = load <2 x i8> addrspace(1)* %in, align 1 + %cvt = uitofp <2 x i8> %load to <2 x float> + store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16 + ret void +} + +; SI-LABEL: @load_v3i8_to_v3f32: +; SI-NOT: BFE +; SI-NOT: V_CVT_F32_UBYTE3_e32 +; SI-DAG: V_CVT_F32_UBYTE2_e32 +; SI-DAG: V_CVT_F32_UBYTE1_e32 +; SI-DAG: V_CVT_F32_UBYTE0_e32 +; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}}, +define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind { + %load = load <3 x i8> addrspace(1)* %in, align 1 + %cvt = uitofp <3 x i8> %load to <3 x float> + store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16 + ret void +} + +; SI-LABEL: @load_v4i8_to_v4f32: +; SI: BUFFER_LOAD_DWORD [[LOADREG:v[0-9]+]], +; SI-NOT: BFE +; SI-NOT: LSHR +; SI-DAG: V_CVT_F32_UBYTE3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]] +; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, [[LOADREG]] +; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, [[LOADREG]] +; SI-DAG: V_CVT_F32_UBYTE0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]] +; SI: BUFFER_STORE_DWORDX4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}}, +define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { + %load = load <4 x i8> addrspace(1)* %in, align 1 + %cvt = uitofp <4 x i8> %load to <4 x float> + store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 + ret void +} + +; XXX - This should really still be able to use the V_CVT_F32_UBYTE0 +; for each component, but computeKnownBits doesn't handle vectors very +; well. + +; SI-LABEL: @load_v4i8_to_v4f32_2_uses: +; SI: BUFFER_LOAD_UBYTE +; SI: V_CVT_F32_UBYTE0_e32 +; SI: BUFFER_LOAD_UBYTE +; SI: V_CVT_F32_UBYTE0_e32 +; SI: BUFFER_LOAD_UBYTE +; SI: V_CVT_F32_UBYTE0_e32 +; SI: BUFFER_LOAD_UBYTE +; SI: V_CVT_F32_UBYTE0_e32 + +; XXX - replace with this when v4i8 loads aren't scalarized anymore. +; XSI: BUFFER_LOAD_DWORD +; XSI: V_CVT_F32_U32_e32 +; XSI: V_CVT_F32_U32_e32 +; XSI: V_CVT_F32_U32_e32 +; XSI: V_CVT_F32_U32_e32 +; SI: S_ENDPGM +define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind { + %load = load <4 x i8> addrspace(1)* %in, align 4 + %cvt = uitofp <4 x i8> %load to <4 x float> + store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 + %add = add <4 x i8> %load, <i8 9, i8 9, i8 9, i8 9> ; Second use of %load + store <4 x i8> %add, <4 x i8> addrspace(1)* %out2, align 4 + ret void +} + +; Make sure this doesn't crash. +; SI-LABEL: @load_v7i8_to_v7f32: +; SI: S_ENDPGM +define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind { + %load = load <7 x i8> addrspace(1)* %in, align 1 + %cvt = uitofp <7 x i8> %load to <7 x float> + store <7 x float> %cvt, <7 x float> addrspace(1)* %out, align 16 + ret void +} + +; SI-LABEL: @load_v8i8_to_v8f32: +; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LOLOAD:[0-9]+]]:[[HILOAD:[0-9]+]]{{\]}}, +; SI-NOT: BFE +; SI-NOT: LSHR +; SI-DAG: V_CVT_F32_UBYTE3_e32 v{{[0-9]+}}, v[[LOLOAD]] +; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, v[[LOLOAD]] +; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, v[[LOLOAD]] +; SI-DAG: V_CVT_F32_UBYTE0_e32 v{{[0-9]+}}, v[[LOLOAD]] +; SI-DAG: V_CVT_F32_UBYTE3_e32 v{{[0-9]+}}, v[[HILOAD]] +; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, v[[HILOAD]] +; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, v[[HILOAD]] +; SI-DAG: V_CVT_F32_UBYTE0_e32 v{{[0-9]+}}, v[[HILOAD]] +; SI-NOT: BFE +; SI-NOT: LSHR +; SI: BUFFER_STORE_DWORD +; SI: BUFFER_STORE_DWORD +; SI: BUFFER_STORE_DWORD +; SI: BUFFER_STORE_DWORD +; SI: BUFFER_STORE_DWORD +; SI: BUFFER_STORE_DWORD +; SI: BUFFER_STORE_DWORD +; SI: BUFFER_STORE_DWORD +define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind { + %load = load <8 x i8> addrspace(1)* %in, align 1 + %cvt = uitofp <8 x i8> %load to <8 x float> + store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16 + ret void +} + +; SI-LABEL: @i8_zext_inreg_i32_to_f32: +; SI: BUFFER_LOAD_DWORD [[LOADREG:v[0-9]+]], +; SI: V_ADD_I32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]] +; SI-NEXT: V_CVT_F32_UBYTE0_e32 [[CONV:v[0-9]+]], [[ADD]] +; SI: BUFFER_STORE_DWORD [[CONV]], +define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %add = add i32 %load, 2 + %inreg = and i32 %add, 255 + %cvt = uitofp i32 %inreg to float + store float %cvt, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @i8_zext_inreg_hi1_to_f32: +define void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %inreg = and i32 %load, 65280 + %shr = lshr i32 %inreg, 8 + %cvt = uitofp i32 %shr to float + store float %cvt, float addrspace(1)* %out, align 4 + ret void +} + + +; We don't get these ones because of the zext, but instcombine removes +; them so it shouldn't really matter. +define void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind { + %load = load i8 addrspace(1)* %in, align 1 + %ext = zext i8 %load to i32 + %cvt = uitofp i32 %ext to float + store float %cvt, float addrspace(1)* %out, align 4 + ret void +} + +define void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { + %load = load <4 x i8> addrspace(1)* %in, align 1 + %ext = zext <4 x i8> %load to <4 x i32> + %cvt = uitofp <4 x i32> %ext to <4 x float> + store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 + ret void +} diff --git a/test/CodeGen/R600/default-fp-mode.ll b/test/CodeGen/R600/default-fp-mode.ll new file mode 100644 index 0000000..214b2c2 --- /dev/null +++ b/test/CodeGen/R600/default-fp-mode.ll @@ -0,0 +1,10 @@ +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s + +; SI-LABEL: @test_kernel +; SI: FloatMode: 240 +; SI: IeeeMode: 0 +define void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind { + store float 0.0, float addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} diff --git a/test/CodeGen/R600/fceil.ll b/test/CodeGen/R600/fceil.ll index b8b945f..458363a 100644 --- a/test/CodeGen/R600/fceil.ll +++ b/test/CodeGen/R600/fceil.ll @@ -1,84 +1,131 @@ -; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -declare double @llvm.ceil.f64(double) nounwind readnone -declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone -declare <3 x double> @llvm.ceil.v3f64(<3 x double>) nounwind readnone -declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone -declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone -declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone +declare float @llvm.ceil.f32(float) nounwind readnone +declare <2 x float> @llvm.ceil.v2f32(<2 x float>) nounwind readnone +declare <3 x float> @llvm.ceil.v3f32(<3 x float>) nounwind readnone +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone +declare <8 x float> @llvm.ceil.v8f32(<8 x float>) nounwind readnone +declare <16 x float> @llvm.ceil.v16f32(<16 x float>) nounwind readnone -; CI-LABEL: @fceil_f64: -; CI: V_CEIL_F64_e32 -define void @fceil_f64(double addrspace(1)* %out, double %x) { - %y = call double @llvm.ceil.f64(double %x) nounwind readnone - store double %y, double addrspace(1)* %out +; FUNC-LABEL: @fceil_f32: +; SI: V_CEIL_F32_e32 +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]] +; EG: CEIL {{\*? *}}[[RESULT]] +define void @fceil_f32(float addrspace(1)* %out, float %x) { + %y = call float @llvm.ceil.f32(float %x) nounwind readnone + store float %y, float addrspace(1)* %out ret void } -; CI-LABEL: @fceil_v2f64: -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { - %y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone - store <2 x double> %y, <2 x double> addrspace(1)* %out +; FUNC-LABEL: @fceil_v2f32: +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}} +; EG: CEIL {{\*? *}}[[RESULT]] +; EG: CEIL {{\*? *}}[[RESULT]] +define void @fceil_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) { + %y = call <2 x float> @llvm.ceil.v2f32(<2 x float> %x) nounwind readnone + store <2 x float> %y, <2 x float> addrspace(1)* %out ret void } -; FIXME-CI-LABEL: @fceil_v3f64: -; FIXME-CI: V_CEIL_F64_e32 -; FIXME-CI: V_CEIL_F64_e32 -; FIXME-CI: V_CEIL_F64_e32 -; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { -; %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone -; store <3 x double> %y, <3 x double> addrspace(1)* %out -; ret void -; } +; FUNC-LABEL: @fceil_v3f32: +; FIXME-SI: V_CEIL_F32_e32 +; FIXME-SI: V_CEIL_F32_e32 +; FIXME-SI: V_CEIL_F32_e32 +; FIXME-EG: v3 is treated as v2 and v1, hence 2 stores +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}} +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}} +; EG-DAG: CEIL {{\*? *}}[[RESULT1]] +; EG-DAG: CEIL {{\*? *}}[[RESULT2]] +; EG-DAG: CEIL {{\*? *}}[[RESULT2]] +define void @fceil_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) { + %y = call <3 x float> @llvm.ceil.v3f32(<3 x float> %x) nounwind readnone + store <3 x float> %y, <3 x float> addrspace(1)* %out + ret void +} -; CI-LABEL: @fceil_v4f64: -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { - %y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone - store <4 x double> %y, <4 x double> addrspace(1)* %out +; FUNC-LABEL: @fceil_v4f32: +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}} +; EG: CEIL {{\*? *}}[[RESULT]] +; EG: CEIL {{\*? *}}[[RESULT]] +; EG: CEIL {{\*? *}}[[RESULT]] +; EG: CEIL {{\*? *}}[[RESULT]] +define void @fceil_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) { + %y = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone + store <4 x float> %y, <4 x float> addrspace(1)* %out ret void } -; CI-LABEL: @fceil_v8f64: -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { - %y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone - store <8 x double> %y, <8 x double> addrspace(1)* %out +; FUNC-LABEL: @fceil_v8f32: +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}} +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}} +; EG-DAG: CEIL {{\*? *}}[[RESULT1]] +; EG-DAG: CEIL {{\*? *}}[[RESULT1]] +; EG-DAG: CEIL {{\*? *}}[[RESULT1]] +; EG-DAG: CEIL {{\*? *}}[[RESULT1]] +; EG-DAG: CEIL {{\*? *}}[[RESULT2]] +; EG-DAG: CEIL {{\*? *}}[[RESULT2]] +; EG-DAG: CEIL {{\*? *}}[[RESULT2]] +; EG-DAG: CEIL {{\*? *}}[[RESULT2]] +define void @fceil_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) { + %y = call <8 x float> @llvm.ceil.v8f32(<8 x float> %x) nounwind readnone + store <8 x float> %y, <8 x float> addrspace(1)* %out ret void } -; CI-LABEL: @fceil_v16f64: -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -; CI: V_CEIL_F64_e32 -define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) { - %y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone - store <16 x double> %y, <16 x double> addrspace(1)* %out +; FUNC-LABEL: @fceil_v16f32: +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; SI: V_CEIL_F32_e32 +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}} +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}} +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT3:T[0-9]+]]{{\.[XYZW]}} +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT4:T[0-9]+]]{{\.[XYZW]}} +; EG-DAG: CEIL {{\*? *}}[[RESULT1]] +; EG-DAG: CEIL {{\*? *}}[[RESULT1]] +; EG-DAG: CEIL {{\*? *}}[[RESULT1]] +; EG-DAG: CEIL {{\*? *}}[[RESULT1]] +; EG-DAG: CEIL {{\*? *}}[[RESULT2]] +; EG-DAG: CEIL {{\*? *}}[[RESULT2]] +; EG-DAG: CEIL {{\*? *}}[[RESULT2]] +; EG-DAG: CEIL {{\*? *}}[[RESULT2]] +; EG-DAG: CEIL {{\*? *}}[[RESULT3]] +; EG-DAG: CEIL {{\*? *}}[[RESULT3]] +; EG-DAG: CEIL {{\*? *}}[[RESULT3]] +; EG-DAG: CEIL {{\*? *}}[[RESULT3]] +; EG-DAG: CEIL {{\*? *}}[[RESULT4]] +; EG-DAG: CEIL {{\*? *}}[[RESULT4]] +; EG-DAG: CEIL {{\*? *}}[[RESULT4]] +; EG-DAG: CEIL {{\*? *}}[[RESULT4]] +define void @fceil_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) { + %y = call <16 x float> @llvm.ceil.v16f32(<16 x float> %x) nounwind readnone + store <16 x float> %y, <16 x float> addrspace(1)* %out ret void } diff --git a/test/CodeGen/R600/fceil64.ll b/test/CodeGen/R600/fceil64.ll new file mode 100644 index 0000000..b42aefa --- /dev/null +++ b/test/CodeGen/R600/fceil64.ll @@ -0,0 +1,103 @@ +; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare double @llvm.ceil.f64(double) nounwind readnone +declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone +declare <3 x double> @llvm.ceil.v3f64(<3 x double>) nounwind readnone +declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone +declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone +declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone + +; FUNC-LABEL: @fceil_f64: +; CI: V_CEIL_F64_e32 +; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014 +; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01 +; SI: S_LSHR_B64 +; SI: S_NOT_B64 +; SI: S_AND_B64 +; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 +; SI: CMP_LT_I32 +; SI: CNDMASK_B32 +; SI: CNDMASK_B32 +; SI: CMP_GT_I32 +; SI: CNDMASK_B32 +; SI: CNDMASK_B32 +; SI: CMP_GT_F64 +; SI: CNDMASK_B32 +; SI: CMP_NE_I32 +; SI: CNDMASK_B32 +; SI: CNDMASK_B32 +; SI: V_ADD_F64 +define void @fceil_f64(double addrspace(1)* %out, double %x) { + %y = call double @llvm.ceil.f64(double %x) nounwind readnone + store double %y, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @fceil_v2f64: +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { + %y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone + store <2 x double> %y, <2 x double> addrspace(1)* %out + ret void +} + +; FIXME-FUNC-LABEL: @fceil_v3f64: +; FIXME-CI: V_CEIL_F64_e32 +; FIXME-CI: V_CEIL_F64_e32 +; FIXME-CI: V_CEIL_F64_e32 +; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { +; %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone +; store <3 x double> %y, <3 x double> addrspace(1)* %out +; ret void +; } + +; FUNC-LABEL: @fceil_v4f64: +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { + %y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone + store <4 x double> %y, <4 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @fceil_v8f64: +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { + %y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone + store <8 x double> %y, <8 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @fceil_v16f64: +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +; CI: V_CEIL_F64_e32 +define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) { + %y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone + store <16 x double> %y, <16 x double> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fcopysign.f32.ll b/test/CodeGen/R600/fcopysign.f32.ll new file mode 100644 index 0000000..7b4425b --- /dev/null +++ b/test/CodeGen/R600/fcopysign.f32.ll @@ -0,0 +1,50 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + + +declare float @llvm.copysign.f32(float, float) nounwind readnone +declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind readnone + +; Try to identify arg based on higher address. +; FUNC-LABEL: @test_copysign_f32: +; SI: S_LOAD_DWORD [[SSIGN:s[0-9]+]], {{.*}} 0xc +; SI: V_MOV_B32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]] +; SI-DAG: S_LOAD_DWORD [[SMAG:s[0-9]+]], {{.*}} 0xb +; SI-DAG: V_MOV_B32_e32 [[VMAG:v[0-9]+]], [[SMAG]] +; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff +; SI: V_BFI_B32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM + +; EG: BFI_INT +define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign) nounwind { + %result = call float @llvm.copysign.f32(float %mag, float %sign) + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @test_copysign_v2f32: +; SI: S_ENDPGM + +; EG: BFI_INT +; EG: BFI_INT +define void @test_copysign_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %mag, <2 x float> %sign) nounwind { + %result = call <2 x float> @llvm.copysign.v2f32(<2 x float> %mag, <2 x float> %sign) + store <2 x float> %result, <2 x float> addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_copysign_v4f32: +; SI: S_ENDPGM + +; EG: BFI_INT +; EG: BFI_INT +; EG: BFI_INT +; EG: BFI_INT +define void @test_copysign_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %mag, <4 x float> %sign) nounwind { + %result = call <4 x float> @llvm.copysign.v4f32(<4 x float> %mag, <4 x float> %sign) + store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16 + ret void +} + diff --git a/test/CodeGen/R600/fcopysign.f64.ll b/test/CodeGen/R600/fcopysign.f64.ll new file mode 100644 index 0000000..ea7a6db --- /dev/null +++ b/test/CodeGen/R600/fcopysign.f64.ll @@ -0,0 +1,37 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare double @llvm.copysign.f64(double, double) nounwind readnone +declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind readnone +declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind readnone + +; FUNC-LABEL: @test_copysign_f64: +; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd +; SI: V_MOV_B32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]] +; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: V_MOV_B32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]] +; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff +; SI: V_BFI_B32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]] +; SI: V_MOV_B32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]] +; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}} +; SI: S_ENDPGM +define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %sign) nounwind { + %result = call double @llvm.copysign.f64(double %mag, double %sign) + store double %result, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_copysign_v2f64: +; SI: S_ENDPGM +define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %mag, <2 x double> %sign) nounwind { + %result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign) + store <2 x double> %result, <2 x double> addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_copysign_v4f64: +; SI: S_ENDPGM +define void @test_copysign_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %mag, <4 x double> %sign) nounwind { + %result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign) + store <4 x double> %result, <4 x double> addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/ffloor.ll b/test/CodeGen/R600/ffloor.ll index 51d2b89..31c6116 100644 --- a/test/CodeGen/R600/ffloor.ll +++ b/test/CodeGen/R600/ffloor.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s +; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare double @llvm.floor.f64(double) nounwind readnone declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone @@ -7,15 +8,34 @@ declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone -; CI-LABEL: @ffloor_f64: +; FUNC-LABEL: @ffloor_f64: ; CI: V_FLOOR_F64_e32 + +; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014 +; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01 +; SI: S_LSHR_B64 +; SI: S_NOT_B64 +; SI: S_AND_B64 +; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 +; SI: CMP_LT_I32 +; SI: CNDMASK_B32 +; SI: CNDMASK_B32 +; SI: CMP_GT_I32 +; SI: CNDMASK_B32 +; SI: CNDMASK_B32 +; SI: CMP_LT_F64 +; SI: CNDMASK_B32 +; SI: CMP_NE_I32 +; SI: CNDMASK_B32 +; SI: CNDMASK_B32 +; SI: V_ADD_F64 define void @ffloor_f64(double addrspace(1)* %out, double %x) { %y = call double @llvm.floor.f64(double %x) nounwind readnone store double %y, double addrspace(1)* %out ret void } -; CI-LABEL: @ffloor_v2f64: +; FUNC-LABEL: @ffloor_v2f64: ; CI: V_FLOOR_F64_e32 ; CI: V_FLOOR_F64_e32 define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { @@ -24,7 +44,7 @@ define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { ret void } -; FIXME-CI-LABEL: @ffloor_v3f64: +; FIXME-FUNC-LABEL: @ffloor_v3f64: ; FIXME-CI: V_FLOOR_F64_e32 ; FIXME-CI: V_FLOOR_F64_e32 ; FIXME-CI: V_FLOOR_F64_e32 @@ -34,7 +54,7 @@ define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { ; ret void ; } -; CI-LABEL: @ffloor_v4f64: +; FUNC-LABEL: @ffloor_v4f64: ; CI: V_FLOOR_F64_e32 ; CI: V_FLOOR_F64_e32 ; CI: V_FLOOR_F64_e32 @@ -45,7 +65,7 @@ define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { ret void } -; CI-LABEL: @ffloor_v8f64: +; FUNC-LABEL: @ffloor_v8f64: ; CI: V_FLOOR_F64_e32 ; CI: V_FLOOR_F64_e32 ; CI: V_FLOOR_F64_e32 @@ -60,7 +80,7 @@ define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { ret void } -; CI-LABEL: @ffloor_v16f64: +; FUNC-LABEL: @ffloor_v16f64: ; CI: V_FLOOR_F64_e32 ; CI: V_FLOOR_F64_e32 ; CI: V_FLOOR_F64_e32 diff --git a/test/CodeGen/R600/fma.ll b/test/CodeGen/R600/fma.ll index 51e9d29..d72ffec 100644 --- a/test/CodeGen/R600/fma.ll +++ b/test/CodeGen/R600/fma.ll @@ -1,8 +1,15 @@ -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; CHECK: @fma_f32 -; CHECK: V_FMA_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}} +declare float @llvm.fma.f32(float, float, float) nounwind readnone +declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone +declare double @llvm.fma.f64(double, double, double) nounwind readnone +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone +declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone + +; FUNC-LABEL: @fma_f32 +; SI: V_FMA_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}} define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2, float addrspace(1)* %in3) { %r0 = load float addrspace(1)* %in1 @@ -13,11 +20,36 @@ define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1, ret void } -declare float @llvm.fma.f32(float, float, float) +; FUNC-LABEL: @fma_v2f32 +; SI: V_FMA_F32 +; SI: V_FMA_F32 +define void @fma_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1, + <2 x float> addrspace(1)* %in2, <2 x float> addrspace(1)* %in3) { + %r0 = load <2 x float> addrspace(1)* %in1 + %r1 = load <2 x float> addrspace(1)* %in2 + %r2 = load <2 x float> addrspace(1)* %in3 + %r3 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2) + store <2 x float> %r3, <2 x float> addrspace(1)* %out + ret void +} -; CHECK: @fma_f64 -; CHECK: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} +; FUNC-LABEL: @fma_v4f32 +; SI: V_FMA_F32 +; SI: V_FMA_F32 +; SI: V_FMA_F32 +; SI: V_FMA_F32 +define void @fma_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1, + <4 x float> addrspace(1)* %in2, <4 x float> addrspace(1)* %in3) { + %r0 = load <4 x float> addrspace(1)* %in1 + %r1 = load <4 x float> addrspace(1)* %in2 + %r2 = load <4 x float> addrspace(1)* %in3 + %r3 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %r0, <4 x float> %r1, <4 x float> %r2) + store <4 x float> %r3, <4 x float> addrspace(1)* %out + ret void +} +; FUNC-LABEL: @fma_f64 +; SI: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2, double addrspace(1)* %in3) { %r0 = load double addrspace(1)* %in1 @@ -28,4 +60,30 @@ define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1, ret void } -declare double @llvm.fma.f64(double, double, double) +; FUNC-LABEL: @fma_v2f64 +; SI: V_FMA_F64 +; SI: V_FMA_F64 +define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1, + <2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) { + %r0 = load <2 x double> addrspace(1)* %in1 + %r1 = load <2 x double> addrspace(1)* %in2 + %r2 = load <2 x double> addrspace(1)* %in3 + %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2) + store <2 x double> %r3, <2 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @fma_v4f64 +; SI: V_FMA_F64 +; SI: V_FMA_F64 +; SI: V_FMA_F64 +; SI: V_FMA_F64 +define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1, + <4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) { + %r0 = load <4 x double> addrspace(1)* %in1 + %r1 = load <4 x double> addrspace(1)* %in2 + %r2 = load <4 x double> addrspace(1)* %in3 + %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2) + store <4 x double> %r3, <4 x double> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fnearbyint.ll b/test/CodeGen/R600/fnearbyint.ll new file mode 100644 index 0000000..1c1d731 --- /dev/null +++ b/test/CodeGen/R600/fnearbyint.ll @@ -0,0 +1,57 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s +; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s + +; This should have the exactly the same output as the test for rint, +; so no need to check anything. + +declare float @llvm.nearbyint.f32(float) #0 +declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) #0 +declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) #0 +declare double @llvm.nearbyint.f64(double) #0 +declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #0 +declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) #0 + + +define void @fnearbyint_f32(float addrspace(1)* %out, float %in) #1 { +entry: + %0 = call float @llvm.nearbyint.f32(float %in) + store float %0, float addrspace(1)* %out + ret void +} + +define void @fnearbyint_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) #1 { +entry: + %0 = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %in) + store <2 x float> %0, <2 x float> addrspace(1)* %out + ret void +} + +define void @fnearbyint_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) #1 { +entry: + %0 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %in) + store <4 x float> %0, <4 x float> addrspace(1)* %out + ret void +} + +define void @nearbyint_f64(double addrspace(1)* %out, double %in) { +entry: + %0 = call double @llvm.nearbyint.f64(double %in) + store double %0, double addrspace(1)* %out + ret void +} +define void @nearbyint_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) { +entry: + %0 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %in) + store <2 x double> %0, <2 x double> addrspace(1)* %out + ret void +} + +define void @nearbyint_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) { +entry: + %0 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %in) + store <4 x double> %0, <4 x double> addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readonly } +attributes #1 = { nounwind } diff --git a/test/CodeGen/R600/fp16_to_fp32.ll b/test/CodeGen/R600/fp16_to_fp32.ll new file mode 100644 index 0000000..fa2e379 --- /dev/null +++ b/test/CodeGen/R600/fp16_to_fp32.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare i16 @llvm.convert.to.fp16(float) nounwind readnone + +; SI-LABEL: @test_convert_fp16_to_fp32: +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]] +; SI: V_CVT_F16_F32_e32 [[RESULT:v[0-9]+]], [[VAL]] +; SI: BUFFER_STORE_SHORT [[RESULT]] +define void @test_convert_fp16_to_fp32(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { + %val = load float addrspace(1)* %in, align 4 + %cvt = call i16 @llvm.convert.to.fp16(float %val) nounwind readnone + store i16 %cvt, i16 addrspace(1)* %out, align 2 + ret void +} diff --git a/test/CodeGen/R600/fp32_to_fp16.ll b/test/CodeGen/R600/fp32_to_fp16.ll new file mode 100644 index 0000000..9997cd3 --- /dev/null +++ b/test/CodeGen/R600/fp32_to_fp16.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare float @llvm.convert.from.fp16(i16) nounwind readnone + +; SI-LABEL: @test_convert_fp16_to_fp32: +; SI: BUFFER_LOAD_USHORT [[VAL:v[0-9]+]] +; SI: V_CVT_F32_F16_e32 [[RESULT:v[0-9]+]], [[VAL]] +; SI: BUFFER_STORE_DWORD [[RESULT]] +define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { + %val = load i16 addrspace(1)* %in, align 2 + %cvt = call float @llvm.convert.from.fp16(i16 %val) nounwind readnone + store float %cvt, float addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/fp_to_sint_i64.ll b/test/CodeGen/R600/fp_to_sint_i64.ll new file mode 100644 index 0000000..ec3e198 --- /dev/null +++ b/test/CodeGen/R600/fp_to_sint_i64.ll @@ -0,0 +1,12 @@ +; FIXME: Merge into fp_to_sint.ll when EG/NI supports 64-bit types +; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s + +; SI-LABEL: @fp_to_sint_i64 +; Check that the compiler doesn't crash with a "cannot select" error +; SI: S_ENDPGM +define void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) { +entry: + %0 = fptosi float %in to i64 + store i64 %0, i64 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fsub64.ll b/test/CodeGen/R600/fsub64.ll index 1445a20..f5e5708 100644 --- a/test/CodeGen/R600/fsub64.ll +++ b/test/CodeGen/R600/fsub64.ll @@ -1,8 +1,7 @@ -; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s - -; CHECK: @fsub_f64 -; CHECK: V_ADD_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}, 0, 0, 0, 0, 2 +; RUN: llc -march=r600 -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; SI-LABEL: @fsub_f64: +; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { %r0 = load double addrspace(1)* %in1 diff --git a/test/CodeGen/R600/ftrunc.ll b/test/CodeGen/R600/ftrunc.ll index 6b235ff..0d7d467 100644 --- a/test/CodeGen/R600/ftrunc.ll +++ b/test/CodeGen/R600/ftrunc.ll @@ -1,84 +1,119 @@ -; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG --check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI --check-prefix=FUNC %s -declare double @llvm.trunc.f64(double) nounwind readnone -declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone -declare <3 x double> @llvm.trunc.v3f64(<3 x double>) nounwind readnone -declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone -declare <8 x double> @llvm.trunc.v8f64(<8 x double>) nounwind readnone -declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone +declare float @llvm.trunc.f32(float) nounwind readnone +declare <2 x float> @llvm.trunc.v2f32(<2 x float>) nounwind readnone +declare <3 x float> @llvm.trunc.v3f32(<3 x float>) nounwind readnone +declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone +declare <8 x float> @llvm.trunc.v8f32(<8 x float>) nounwind readnone +declare <16 x float> @llvm.trunc.v16f32(<16 x float>) nounwind readnone -; CI-LABEL: @ftrunc_f64: -; CI: V_TRUNC_F64_e32 -define void @ftrunc_f64(double addrspace(1)* %out, double %x) { - %y = call double @llvm.trunc.f64(double %x) nounwind readnone - store double %y, double addrspace(1)* %out +; FUNC-LABEL: @ftrunc_f32: +; EG: TRUNC +; SI: V_TRUNC_F32_e32 +define void @ftrunc_f32(float addrspace(1)* %out, float %x) { + %y = call float @llvm.trunc.f32(float %x) nounwind readnone + store float %y, float addrspace(1)* %out ret void } -; CI-LABEL: @ftrunc_v2f64: -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { - %y = call <2 x double> @llvm.trunc.v2f64(<2 x double> %x) nounwind readnone - store <2 x double> %y, <2 x double> addrspace(1)* %out +; FUNC-LABEL: @ftrunc_v2f32: +; EG: TRUNC +; EG: TRUNC +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +define void @ftrunc_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) { + %y = call <2 x float> @llvm.trunc.v2f32(<2 x float> %x) nounwind readnone + store <2 x float> %y, <2 x float> addrspace(1)* %out ret void } -; FIXME-CI-LABEL: @ftrunc_v3f64: -; FIXME-CI: V_TRUNC_F64_e32 -; FIXME-CI: V_TRUNC_F64_e32 -; FIXME-CI: V_TRUNC_F64_e32 -; define void @ftrunc_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { -; %y = call <3 x double> @llvm.trunc.v3f64(<3 x double> %x) nounwind readnone -; store <3 x double> %y, <3 x double> addrspace(1)* %out +; FIXME-FUNC-LABEL: @ftrunc_v3f32: +; FIXME-EG: TRUNC +; FIXME-EG: TRUNC +; FIXME-EG: TRUNC +; FIXME-SI: V_TRUNC_F32_e32 +; FIXME-SI: V_TRUNC_F32_e32 +; FIXME-SI: V_TRUNC_F32_e32 +; define void @ftrunc_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) { +; %y = call <3 x float> @llvm.trunc.v3f32(<3 x float> %x) nounwind readnone +; store <3 x float> %y, <3 x float> addrspace(1)* %out ; ret void ; } -; CI-LABEL: @ftrunc_v4f64: -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -define void @ftrunc_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { - %y = call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone - store <4 x double> %y, <4 x double> addrspace(1)* %out +; FUNC-LABEL: @ftrunc_v4f32: +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +define void @ftrunc_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) { + %y = call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone + store <4 x float> %y, <4 x float> addrspace(1)* %out ret void } -; CI-LABEL: @ftrunc_v8f64: -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -define void @ftrunc_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { - %y = call <8 x double> @llvm.trunc.v8f64(<8 x double> %x) nounwind readnone - store <8 x double> %y, <8 x double> addrspace(1)* %out +; FUNC-LABEL: @ftrunc_v8f32: +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +define void @ftrunc_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) { + %y = call <8 x float> @llvm.trunc.v8f32(<8 x float> %x) nounwind readnone + store <8 x float> %y, <8 x float> addrspace(1)* %out ret void } -; CI-LABEL: @ftrunc_v16f64: -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -; CI: V_TRUNC_F64_e32 -define void @ftrunc_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) { - %y = call <16 x double> @llvm.trunc.v16f64(<16 x double> %x) nounwind readnone - store <16 x double> %y, <16 x double> addrspace(1)* %out +; FUNC-LABEL: @ftrunc_v16f32: +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; EG: TRUNC +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +; SI: V_TRUNC_F32_e32 +define void @ftrunc_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) { + %y = call <16 x float> @llvm.trunc.v16f32(<16 x float> %x) nounwind readnone + store <16 x float> %y, <16 x float> addrspace(1)* %out ret void } diff --git a/test/CodeGen/R600/gv-const-addrspace.ll b/test/CodeGen/R600/gv-const-addrspace.ll index 0176061..db64a6f 100644 --- a/test/CodeGen/R600/gv-const-addrspace.ll +++ b/test/CodeGen/R600/gv-const-addrspace.ll @@ -6,7 +6,7 @@ ; XXX: Test on SI once 64-bit adds are supportes. -@float_gv = internal addrspace(2) unnamed_addr constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4 +@float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4 ; FUNC-LABEL: @float @@ -25,7 +25,7 @@ entry: ret void } -@i32_gv = internal addrspace(2) unnamed_addr constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 4 +@i32_gv = internal unnamed_addr addrspace(2) constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 4 ; FUNC-LABEL: @i32 @@ -47,7 +47,7 @@ entry: %struct.foo = type { float, [5 x i32] } -@struct_foo_gv = internal addrspace(2) unnamed_addr constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ] +@struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ] ; FUNC-LABEL: @struct_foo_gv_load diff --git a/test/CodeGen/R600/indirect-private-64.ll b/test/CodeGen/R600/indirect-private-64.ll index 4d1f734..b127b7e 100644 --- a/test/CodeGen/R600/indirect-private-64.ll +++ b/test/CodeGen/R600/indirect-private-64.ll @@ -3,10 +3,8 @@ declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind ; SI-LABEL: @private_access_f64_alloca: -; SI: V_MOVRELD_B32_e32 -; SI: V_MOVRELD_B32_e32 -; SI: V_MOVRELS_B32_e32 -; SI: V_MOVRELS_B32_e32 +; SI: DS_WRITE_B64 +; SI: DS_READ_B64 define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind { %val = load double addrspace(1)* %in, align 8 %array = alloca double, i32 16, align 8 @@ -19,14 +17,10 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double } ; SI-LABEL: @private_access_v2f64_alloca: -; SI: V_MOVRELD_B32_e32 -; SI: V_MOVRELD_B32_e32 -; SI: V_MOVRELD_B32_e32 -; SI: V_MOVRELD_B32_e32 -; SI: V_MOVRELS_B32_e32 -; SI: V_MOVRELS_B32_e32 -; SI: V_MOVRELS_B32_e32 -; SI: V_MOVRELS_B32_e32 +; SI: DS_WRITE_B64 +; SI: DS_WRITE_B64 +; SI: DS_READ_B64 +; SI: DS_READ_B64 define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind { %val = load <2 x double> addrspace(1)* %in, align 16 %array = alloca <2 x double>, i32 16, align 16 @@ -39,10 +33,8 @@ define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out } ; SI-LABEL: @private_access_i64_alloca: -; SI: V_MOVRELD_B32_e32 -; SI: V_MOVRELD_B32_e32 -; SI: V_MOVRELS_B32_e32 -; SI: V_MOVRELS_B32_e32 +; SI: DS_WRITE_B64 +; SI: DS_READ_B64 define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind { %val = load i64 addrspace(1)* %in, align 8 %array = alloca i64, i32 16, align 8 @@ -55,14 +47,10 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs } ; SI-LABEL: @private_access_v2i64_alloca: -; SI: V_MOVRELD_B32_e32 -; SI: V_MOVRELD_B32_e32 -; SI: V_MOVRELD_B32_e32 -; SI: V_MOVRELD_B32_e32 -; SI: V_MOVRELS_B32_e32 -; SI: V_MOVRELS_B32_e32 -; SI: V_MOVRELS_B32_e32 -; SI: V_MOVRELS_B32_e32 +; SI: DS_WRITE_B64 +; SI: DS_WRITE_B64 +; SI: DS_READ_B64 +; SI: DS_READ_B64 define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind { %val = load <2 x i64> addrspace(1)* %in, align 16 %array = alloca <2 x i64>, i32 16, align 16 diff --git a/test/CodeGen/R600/input-mods.ll b/test/CodeGen/R600/input-mods.ll new file mode 100644 index 0000000..13bfbab --- /dev/null +++ b/test/CodeGen/R600/input-mods.ll @@ -0,0 +1,26 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK +;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK + +;EG-CHECK-LABEL: @test +;EG-CHECK: EXP_IEEE * +;CM-CHECK-LABEL: @test +;CM-CHECK: EXP_IEEE T{{[0-9]+}}.X, -|T{{[0-9]+}}.X| +;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Y (MASKED), -|T{{[0-9]+}}.X| +;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Z (MASKED), -|T{{[0-9]+}}.X| +;CM-CHECK: EXP_IEEE * T{{[0-9]+}}.W (MASKED), -|T{{[0-9]+}}.X| + +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = call float @llvm.fabs.f32(float %r0) + %r2 = fsub float -0.000000e+00, %r1 + %r3 = call float @llvm.exp2.f32(float %r2) + %vec = insertelement <4 x float> undef, float %r3, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) + ret void +} + +declare float @llvm.exp2.f32(float) readnone +declare float @llvm.fabs.f32(float) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/large-alloca.ll b/test/CodeGen/R600/large-alloca.ll new file mode 100644 index 0000000..d8be6d4 --- /dev/null +++ b/test/CodeGen/R600/large-alloca.ll @@ -0,0 +1,14 @@ +; XFAIL: * +; REQUIRES: asserts +; RUN: llc -march=r600 -mcpu=SI < %s + +define void @large_alloca(i32 addrspace(1)* %out, i32 %x, i32 %y) nounwind { + %large = alloca [8192 x i32], align 4 + %gep = getelementptr [8192 x i32]* %large, i32 0, i32 8191 + store i32 %x, i32* %gep + %gep1 = getelementptr [8192 x i32]* %large, i32 0, i32 %y + %0 = load i32* %gep1 + store i32 %0, i32 addrspace(1)* %out + ret void +} + diff --git a/test/CodeGen/R600/large-constant-initializer.ll b/test/CodeGen/R600/large-constant-initializer.ll new file mode 100644 index 0000000..552cd05 --- /dev/null +++ b/test/CodeGen/R600/large-constant-initializer.ll @@ -0,0 +1,19 @@ +; XFAIL: * +; REQUIRES: asserts +; RUN: llc -march=r600 -mcpu=SI < %s + +@gv = external unnamed_addr addrspace(2) constant [239 x i32], align 4 + +define void @opencv_cvtfloat_crash(i32 addrspace(1)* %out, i32 %x) nounwind { + %val = load i32 addrspace(2)* getelementptr ([239 x i32] addrspace(2)* @gv, i64 0, i64 239), align 4 + %mul12 = mul nsw i32 %val, 7 + br i1 undef, label %exit, label %bb + +bb: + %cmp = icmp slt i32 %x, 0 + br label %exit + +exit: + ret void +} + diff --git a/test/CodeGen/R600/lds-output-queue.ll b/test/CodeGen/R600/lds-output-queue.ll index af0db0d..d5dc061 100644 --- a/test/CodeGen/R600/lds-output-queue.ll +++ b/test/CodeGen/R600/lds-output-queue.ll @@ -8,7 +8,7 @@ ; CHECK-NOT: ALU clause ; CHECK: MOV * T{{[0-9]\.[XYZW]}}, OQAP -@local_mem = internal addrspace(3) unnamed_addr global [2 x i32] [i32 1, i32 2], align 4 +@local_mem = internal unnamed_addr addrspace(3) global [2 x i32] [i32 1, i32 2], align 4 define void @lds_input_queue(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %index) { entry: diff --git a/test/CodeGen/R600/lds-size.ll b/test/CodeGen/R600/lds-size.ll index 2185180..9182e25 100644 --- a/test/CodeGen/R600/lds-size.ll +++ b/test/CodeGen/R600/lds-size.ll @@ -6,7 +6,7 @@ ; CHECK-LABEL: @test ; CHECK: .long 166120 ; CHECK-NEXT: .long 1 -@lds = internal addrspace(3) unnamed_addr global i32 zeroinitializer, align 4 +@lds = internal unnamed_addr addrspace(3) global i32 zeroinitializer, align 4 define void @test(i32 addrspace(1)* %out, i32 %cond) { entry: diff --git a/test/CodeGen/R600/lit.local.cfg b/test/CodeGen/R600/lit.local.cfg index 2d8930a..ad9ce25 100644 --- a/test/CodeGen/R600/lit.local.cfg +++ b/test/CodeGen/R600/lit.local.cfg @@ -1,3 +1,2 @@ -targets = set(config.root.targets_to_build.split()) -if not 'R600' in targets: +if not 'R600' in config.root.targets: config.unsupported = True diff --git a/test/CodeGen/R600/llvm.AMDGPU.abs.ll b/test/CodeGen/R600/llvm.AMDGPU.abs.ll new file mode 100644 index 0000000..a0a47b7 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.abs.ll @@ -0,0 +1,48 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +declare i32 @llvm.AMDGPU.abs(i32) nounwind readnone + +; Legacy name +declare i32 @llvm.AMDIL.abs.i32(i32) nounwind readnone + +; FUNC-LABEL: @s_abs_i32 +; SI: S_SUB_I32 +; SI: S_MAX_I32 +; SI: S_ENDPGM + +; EG: SUB_INT +; EG: MAX_INT +define void @s_abs_i32(i32 addrspace(1)* %out, i32 %src) nounwind { + %abs = call i32 @llvm.AMDGPU.abs(i32 %src) nounwind readnone + store i32 %abs, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_abs_i32 +; SI: V_SUB_I32_e32 +; SI: V_MAX_I32_e32 +; SI: S_ENDPGM + +; EG: SUB_INT +; EG: MAX_INT +define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { + %val = load i32 addrspace(1)* %src, align 4 + %abs = call i32 @llvm.AMDGPU.abs(i32 %val) nounwind readnone + store i32 %abs, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @abs_i32_legacy_amdil +; SI: V_SUB_I32_e32 +; SI: V_MAX_I32_e32 +; SI: S_ENDPGM + +; EG: SUB_INT +; EG: MAX_INT +define void @abs_i32_legacy_amdil(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { + %val = load i32 addrspace(1)* %src, align 4 + %abs = call i32 @llvm.AMDIL.abs.i32(i32 %val) nounwind readnone + store i32 %abs, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.brev.ll b/test/CodeGen/R600/llvm.AMDGPU.brev.ll new file mode 100644 index 0000000..68a5ad0 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.brev.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare i32 @llvm.AMDGPU.brev(i32) nounwind readnone + +; FUNC-LABEL: @s_brev_i32: +; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], +; SI: S_BREV_B32 [[SRESULT:s[0-9]+]], [[VAL]] +; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] +; SI: BUFFER_STORE_DWORD [[VRESULT]], +; SI: S_ENDPGM +define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { + %ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone + store i32 %ctlz, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_brev_i32: +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], +; SI: V_BFREV_B32_e32 [[RESULT:v[0-9]+]], [[VAL]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { + %val = load i32 addrspace(1)* %valptr, align 4 + %ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone + store i32 %ctlz, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.clamp.ll b/test/CodeGen/R600/llvm.AMDGPU.clamp.ll new file mode 100644 index 0000000..d608953 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.clamp.ll @@ -0,0 +1,28 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +declare float @llvm.AMDGPU.clamp.f32(float, float, float) nounwind readnone +declare float @llvm.AMDIL.clamp.f32(float, float, float) nounwind readnone + +; FUNC-LABEL: @clamp_0_1_f32 +; SI: S_LOAD_DWORD [[ARG:s[0-9]+]], +; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], [[ARG]], 0, 1, 0 +; SI: BUFFER_STORE_DWORD [[RESULT]] +; SI: S_ENDPGM + +; EG: MOV_SAT +define void @clamp_0_1_f32(float addrspace(1)* %out, float %src) nounwind { + %clamp = call float @llvm.AMDGPU.clamp.f32(float %src, float 0.0, float 1.0) nounwind readnone + store float %clamp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @clamp_0_1_amdil_legacy_f32 +; SI: S_LOAD_DWORD [[ARG:s[0-9]+]], +; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], [[ARG]], 0, 1, 0 +; SI: BUFFER_STORE_DWORD [[RESULT]] +define void @clamp_0_1_amdil_legacy_f32(float addrspace(1)* %out, float %src) nounwind { + %clamp = call float @llvm.AMDIL.clamp.f32(float %src, float 0.0, float 1.0) nounwind readnone + store float %clamp, float addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll b/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll new file mode 100644 index 0000000..6facb47 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll @@ -0,0 +1,42 @@ +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s + +declare float @llvm.AMDGPU.cvt.f32.ubyte0(i32) nounwind readnone +declare float @llvm.AMDGPU.cvt.f32.ubyte1(i32) nounwind readnone +declare float @llvm.AMDGPU.cvt.f32.ubyte2(i32) nounwind readnone +declare float @llvm.AMDGPU.cvt.f32.ubyte3(i32) nounwind readnone + +; SI-LABEL: @test_unpack_byte0_to_float: +; SI: V_CVT_F32_UBYTE0 +define void @test_unpack_byte0_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %val = load i32 addrspace(1)* %in, align 4 + %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte0(i32 %val) nounwind readnone + store float %cvt, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @test_unpack_byte1_to_float: +; SI: V_CVT_F32_UBYTE1 +define void @test_unpack_byte1_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %val = load i32 addrspace(1)* %in, align 4 + %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte1(i32 %val) nounwind readnone + store float %cvt, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @test_unpack_byte2_to_float: +; SI: V_CVT_F32_UBYTE2 +define void @test_unpack_byte2_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %val = load i32 addrspace(1)* %in, align 4 + %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte2(i32 %val) nounwind readnone + store float %cvt, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @test_unpack_byte3_to_float: +; SI: V_CVT_F32_UBYTE3 +define void @test_unpack_byte3_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %val = load i32 addrspace(1)* %in, align 4 + %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte3(i32 %val) nounwind readnone + store float %cvt, float addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll new file mode 100644 index 0000000..c8c7357 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone +declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone + +; SI-LABEL: @test_div_fixup_f32: +; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd +; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]] +; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc +; SI: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]] +; SI: V_DIV_FIXUP_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind { + %result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @test_div_fixup_f64: +; SI: V_DIV_FIXUP_F64 +define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind { + %result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone + store double %result, double addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll new file mode 100644 index 0000000..4f1e827 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare float @llvm.AMDGPU.div.fmas.f32(float, float, float) nounwind readnone +declare double @llvm.AMDGPU.div.fmas.f64(double, double, double) nounwind readnone + +; SI-LABEL: @test_div_fmas_f32: +; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd +; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]] +; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc +; SI: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]] +; SI: V_DIV_FMAS_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind { + %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c) nounwind readnone + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @test_div_fmas_f64: +; SI: V_DIV_FMAS_F64 +define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind { + %result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c) nounwind readnone + store double %result, double addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll new file mode 100644 index 0000000..527c8da --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll @@ -0,0 +1,48 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare { float, i1 } @llvm.AMDGPU.div.scale.f32(float, float, i1) nounwind readnone +declare { double, i1 } @llvm.AMDGPU.div.scale.f64(double, double, i1) nounwind readnone + +; SI-LABEL @test_div_scale_f32_1: +; SI: V_DIV_SCALE_F32 +define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind { + %a = load float addrspace(1)* %aptr, align 4 + %b = load float addrspace(1)* %bptr, align 4 + %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone + %result0 = extractvalue { float, i1 } %result, 0 + store float %result0, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL @test_div_scale_f32_2: +; SI: V_DIV_SCALE_F32 +define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind { + %a = load float addrspace(1)* %aptr, align 4 + %b = load float addrspace(1)* %bptr, align 4 + %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone + %result0 = extractvalue { float, i1 } %result, 0 + store float %result0, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL @test_div_scale_f64_1: +; SI: V_DIV_SCALE_F64 +define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr, double addrspace(1)* %cptr) nounwind { + %a = load double addrspace(1)* %aptr, align 8 + %b = load double addrspace(1)* %bptr, align 8 + %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone + %result0 = extractvalue { double, i1 } %result, 0 + store double %result0, double addrspace(1)* %out, align 8 + ret void +} + +; SI-LABEL @test_div_scale_f64_1: +; SI: V_DIV_SCALE_F64 +define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr, double addrspace(1)* %cptr) nounwind { + %a = load double addrspace(1)* %aptr, align 8 + %b = load double addrspace(1)* %bptr, align 8 + %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone + %result0 = extractvalue { double, i1 } %result, 0 + store double %result0, double addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.fract.ll b/test/CodeGen/R600/llvm.AMDGPU.fract.ll new file mode 100644 index 0000000..72ec1c5 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.fract.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +declare float @llvm.AMDGPU.fract.f32(float) nounwind readnone + +; Legacy name +declare float @llvm.AMDIL.fraction.f32(float) nounwind readnone + +; FUNC-LABEL: @fract_f32 +; SI: V_FRACT_F32 +; EG: FRACT +define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) nounwind { + %val = load float addrspace(1)* %src, align 4 + %fract = call float @llvm.AMDGPU.fract.f32(float %val) nounwind readnone + store float %fract, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @fract_f32_legacy_amdil +; SI: V_FRACT_F32 +; EG: FRACT +define void @fract_f32_legacy_amdil(float addrspace(1)* %out, float addrspace(1)* %src) nounwind { + %val = load float addrspace(1)* %src, align 4 + %fract = call float @llvm.AMDIL.fraction.f32(float %val) nounwind readnone + store float %fract, float addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll b/test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll new file mode 100644 index 0000000..51964ee --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll @@ -0,0 +1,13 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +declare float @llvm.AMDGPU.legacy.rsq(float) nounwind readnone + +; FUNC-LABEL: @rsq_legacy_f32 +; SI: V_RSQ_LEGACY_F32_e32 +; EG: RECIPSQRT_IEEE +define void @rsq_legacy_f32(float addrspace(1)* %out, float %src) nounwind { + %rsq = call float @llvm.AMDGPU.legacy.rsq(float %src) nounwind readnone + store float %rsq, float addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.rcp.ll b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll new file mode 100644 index 0000000..ca5260d --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll @@ -0,0 +1,58 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone +declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone + + +declare float @llvm.sqrt.f32(float) nounwind readnone +declare double @llvm.sqrt.f64(double) nounwind readnone + +; FUNC-LABEL: @rcp_f32 +; SI: V_RCP_F32_e32 +define void @rcp_f32(float addrspace(1)* %out, float %src) nounwind { + %rcp = call float @llvm.AMDGPU.rcp.f32(float %src) nounwind readnone + store float %rcp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @rcp_f64 +; SI: V_RCP_F64_e32 +define void @rcp_f64(double addrspace(1)* %out, double %src) nounwind { + %rcp = call double @llvm.AMDGPU.rcp.f64(double %src) nounwind readnone + store double %rcp, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @rcp_pat_f32 +; SI: V_RCP_F32_e32 +define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind { + %rcp = fdiv float 1.0, %src + store float %rcp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @rcp_pat_f64 +; SI: V_RCP_F64_e32 +define void @rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind { + %rcp = fdiv double 1.0, %src + store double %rcp, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @rsq_rcp_pat_f32 +; SI: V_RSQ_F32_e32 +define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind { + %sqrt = call float @llvm.sqrt.f32(float %src) nounwind readnone + %rcp = call float @llvm.AMDGPU.rcp.f32(float %sqrt) nounwind readnone + store float %rcp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @rsq_rcp_pat_f64 +; SI: V_RSQ_F64_e32 +define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind { + %sqrt = call double @llvm.sqrt.f64(double %src) nounwind readnone + %rcp = call double @llvm.AMDGPU.rcp.f64(double %sqrt) nounwind readnone + store double %rcp, double addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll new file mode 100644 index 0000000..100d6ff --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll @@ -0,0 +1,11 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone + +; FUNC-LABEL: @rsq_clamped_f64 +; SI: V_RSQ_CLAMP_F64_e32 +define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind { + %rsq_clamped = call double @llvm.AMDGPU.rsq.clamped.f64(double %src) nounwind readnone + store double %rsq_clamped, double addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll new file mode 100644 index 0000000..683df73 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + + +declare float @llvm.AMDGPU.rsq.clamped.f32(float) nounwind readnone + +; FUNC-LABEL: @rsq_clamped_f32 +; SI: V_RSQ_CLAMP_F32_e32 +; EG: RECIPSQRT_CLAMPED +define void @rsq_clamped_f32(float addrspace(1)* %out, float %src) nounwind { + %rsq_clamped = call float @llvm.AMDGPU.rsq.clamped.f32(float %src) nounwind readnone + store float %rsq_clamped, float addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.ll b/test/CodeGen/R600/llvm.AMDGPU.rsq.ll new file mode 100644 index 0000000..27cf6b2 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.rsq.ll @@ -0,0 +1,13 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +declare float @llvm.AMDGPU.rsq.f32(float) nounwind readnone + +; FUNC-LABEL: @rsq_f32 +; SI: V_RSQ_F32_e32 +; EG: RECIPSQRT_IEEE +define void @rsq_f32(float addrspace(1)* %out, float %src) nounwind { + %rsq = call float @llvm.AMDGPU.rsq.f32(float %src) nounwind readnone + store float %rsq, float addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll new file mode 100644 index 0000000..1c736d4 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare double @llvm.AMDGPU.trig.preop.f64(double, i32) nounwind readnone + +; SI-LABEL: @test_trig_preop_f64: +; SI-DAG: BUFFER_LOAD_DWORD [[SEG:v[0-9]+]] +; SI-DAG: BUFFER_LOAD_DWORDX2 [[SRC:v\[[0-9]+:[0-9]+\]]], +; SI: V_TRIG_PREOP_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], [[SEG]] +; SI: BUFFER_STORE_DWORDX2 [[RESULT]], +; SI: S_ENDPGM +define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { + %a = load double addrspace(1)* %aptr, align 8 + %b = load i32 addrspace(1)* %bptr, align 4 + %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 %b) nounwind readnone + store double %result, double addrspace(1)* %out, align 8 + ret void +} + +; SI-LABEL: @test_trig_preop_f64_imm_segment: +; SI: BUFFER_LOAD_DWORDX2 [[SRC:v\[[0-9]+:[0-9]+\]]], +; SI: V_TRIG_PREOP_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], 7 +; SI: BUFFER_STORE_DWORDX2 [[RESULT]], +; SI: S_ENDPGM +define void @test_trig_preop_f64_imm_segment(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind { + %a = load double addrspace(1)* %aptr, align 8 + %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 7) nounwind readnone + store double %result, double addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/llvm.SI.gather4.ll b/test/CodeGen/R600/llvm.SI.gather4.ll new file mode 100644 index 0000000..8402faa --- /dev/null +++ b/test/CodeGen/R600/llvm.SI.gather4.ll @@ -0,0 +1,508 @@ +;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s + +;CHECK-LABEL: @gather4_v2 +;CHECK: IMAGE_GATHER4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_v2() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4 +;CHECK: IMAGE_GATHER4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_cl +;CHECK: IMAGE_GATHER4_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_cl() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_l +;CHECK: IMAGE_GATHER4_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_l() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_b +;CHECK: IMAGE_GATHER4_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_b() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_b_cl +;CHECK: IMAGE_GATHER4_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_b_cl() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_b_cl_v8 +;CHECK: IMAGE_GATHER4_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_b_cl_v8() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_lz_v2 +;CHECK: IMAGE_GATHER4_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_lz_v2() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_lz +;CHECK: IMAGE_GATHER4_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_lz() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + + + +;CHECK-LABEL: @gather4_o +;CHECK: IMAGE_GATHER4_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_cl_o +;CHECK: IMAGE_GATHER4_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_cl_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_cl_o_v8 +;CHECK: IMAGE_GATHER4_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_cl_o_v8() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_l_o +;CHECK: IMAGE_GATHER4_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_l_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_l_o_v8 +;CHECK: IMAGE_GATHER4_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_l_o_v8() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_b_o +;CHECK: IMAGE_GATHER4_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_b_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_b_o_v8 +;CHECK: IMAGE_GATHER4_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_b_o_v8() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_b_cl_o +;CHECK: IMAGE_GATHER4_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_b_cl_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_lz_o +;CHECK: IMAGE_GATHER4_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_lz_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + + + +;CHECK-LABEL: @gather4_c +;CHECK: IMAGE_GATHER4_C {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_cl +;CHECK: IMAGE_GATHER4_C_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_cl() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_cl_v8 +;CHECK: IMAGE_GATHER4_C_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_cl_v8() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_l +;CHECK: IMAGE_GATHER4_C_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_l() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_l_v8 +;CHECK: IMAGE_GATHER4_C_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_l_v8() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_b +;CHECK: IMAGE_GATHER4_C_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_b() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_b_v8 +;CHECK: IMAGE_GATHER4_C_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_b_v8() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_b_cl +;CHECK: IMAGE_GATHER4_C_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_b_cl() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_lz +;CHECK: IMAGE_GATHER4_C_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_lz() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + + + +;CHECK-LABEL: @gather4_c_o +;CHECK: IMAGE_GATHER4_C_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_o_v8 +;CHECK: IMAGE_GATHER4_C_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_o_v8() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_cl_o +;CHECK: IMAGE_GATHER4_C_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_cl_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_l_o +;CHECK: IMAGE_GATHER4_C_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_l_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_b_o +;CHECK: IMAGE_GATHER4_C_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_b_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_b_cl_o +;CHECK: IMAGE_GATHER4_C_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_b_cl_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_lz_o +;CHECK: IMAGE_GATHER4_C_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_lz_o() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + +;CHECK-LABEL: @gather4_c_lz_o_v8 +;CHECK: IMAGE_GATHER4_C_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @gather4_c_lz_o_v8() #0 { +main_body: + %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3) + ret void +} + + + +declare <4 x float> @llvm.SI.gather4.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 + +declare <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 + +declare <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 + +declare <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { "ShaderType"="0" } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/R600/llvm.SI.getlod.ll b/test/CodeGen/R600/llvm.SI.getlod.ll new file mode 100644 index 0000000..a7a17ec --- /dev/null +++ b/test/CodeGen/R600/llvm.SI.getlod.ll @@ -0,0 +1,44 @@ +;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s + +;CHECK-LABEL: @getlod +;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @getlod() #0 { +main_body: + %r = call <4 x float> @llvm.SI.getlod.i32(i32 undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1) + ret void +} + +;CHECK-LABEL: @getlod_v2 +;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @getlod_v2() #0 { +main_body: + %r = call <4 x float> @llvm.SI.getlod.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1) + ret void +} + +;CHECK-LABEL: @getlod_v4 +;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @getlod_v4() #0 { +main_body: + %r = call <4 x float> @llvm.SI.getlod.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r0, float %r1) + ret void +} + + +declare <4 x float> @llvm.SI.getlod.i32(i32, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.getlod.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 +declare <4 x float> @llvm.SI.getlod.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { "ShaderType"="0" } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/R600/llvm.exp2.ll b/test/CodeGen/R600/llvm.exp2.ll index 13bfbab..119d5ef 100644 --- a/test/CodeGen/R600/llvm.exp2.ll +++ b/test/CodeGen/R600/llvm.exp2.ll @@ -1,26 +1,79 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK -;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK --check-prefix=FUNC +;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK --check-prefix=FUNC +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC -;EG-CHECK-LABEL: @test -;EG-CHECK: EXP_IEEE * -;CM-CHECK-LABEL: @test -;CM-CHECK: EXP_IEEE T{{[0-9]+}}.X, -|T{{[0-9]+}}.X| -;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Y (MASKED), -|T{{[0-9]+}}.X| -;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Z (MASKED), -|T{{[0-9]+}}.X| -;CM-CHECK: EXP_IEEE * T{{[0-9]+}}.W (MASKED), -|T{{[0-9]+}}.X| +;FUNC-LABEL: @test +;EG-CHECK: EXP_IEEE +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;SI-CHECK: V_EXP_F32 -define void @test(<4 x float> inreg %reg0) #0 { - %r0 = extractelement <4 x float> %reg0, i32 0 - %r1 = call float @llvm.fabs.f32(float %r0) - %r2 = fsub float -0.000000e+00, %r1 - %r3 = call float @llvm.exp2.f32(float %r2) - %vec = insertelement <4 x float> undef, float %r3, i32 0 - call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) +define void @test(float addrspace(1)* %out, float %in) { +entry: + %0 = call float @llvm.exp2.f32(float %in) + store float %0, float addrspace(1)* %out ret void } -declare float @llvm.exp2.f32(float) readnone -declare float @llvm.fabs.f32(float) readnone -declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) +;FUNC-LABEL: @testv2 +;EG-CHECK: EXP_IEEE +;EG-CHECK: EXP_IEEE +; FIXME: We should be able to merge these packets together on Cayman so we +; have a maximum of 4 instructions. +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;SI-CHECK: V_EXP_F32 +;SI-CHECK: V_EXP_F32 + +define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) { +entry: + %0 = call <2 x float> @llvm.exp2.v2f32(<2 x float> %in) + store <2 x float> %0, <2 x float> addrspace(1)* %out + ret void +} -attributes #0 = { "ShaderType"="0" } +;FUNC-LABEL: @testv4 +;EG-CHECK: EXP_IEEE +;EG-CHECK: EXP_IEEE +;EG-CHECK: EXP_IEEE +;EG-CHECK: EXP_IEEE +; FIXME: We should be able to merge these packets together on Cayman so we +; have a maximum of 4 instructions. +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} +;SI-CHECK: V_EXP_F32 +;SI-CHECK: V_EXP_F32 +;SI-CHECK: V_EXP_F32 +;SI-CHECK: V_EXP_F32 +define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) { +entry: + %0 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %in) + store <4 x float> %0, <4 x float> addrspace(1)* %out + ret void +} + +declare float @llvm.exp2.f32(float) readnone +declare <2 x float> @llvm.exp2.v2f32(<2 x float>) readnone +declare <4 x float> @llvm.exp2.v4f32(<4 x float>) readnone diff --git a/test/CodeGen/R600/llvm.log2.ll b/test/CodeGen/R600/llvm.log2.ll new file mode 100644 index 0000000..4cba2d4 --- /dev/null +++ b/test/CodeGen/R600/llvm.log2.ll @@ -0,0 +1,79 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK --check-prefix=FUNC +;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK --check-prefix=FUNC +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC + +;FUNC-LABEL: @test +;EG-CHECK: LOG_IEEE +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;SI-CHECK: V_LOG_F32 + +define void @test(float addrspace(1)* %out, float %in) { +entry: + %0 = call float @llvm.log2.f32(float %in) + store float %0, float addrspace(1)* %out + ret void +} + +;FUNC-LABEL: @testv2 +;EG-CHECK: LOG_IEEE +;EG-CHECK: LOG_IEEE +; FIXME: We should be able to merge these packets together on Cayman so we +; have a maximum of 4 instructions. +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;SI-CHECK: V_LOG_F32 +;SI-CHECK: V_LOG_F32 + +define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) { +entry: + %0 = call <2 x float> @llvm.log2.v2f32(<2 x float> %in) + store <2 x float> %0, <2 x float> addrspace(1)* %out + ret void +} + +;FUNC-LABEL: @testv4 +;EG-CHECK: LOG_IEEE +;EG-CHECK: LOG_IEEE +;EG-CHECK: LOG_IEEE +;EG-CHECK: LOG_IEEE +; FIXME: We should be able to merge these packets together on Cayman so we +; have a maximum of 4 instructions. +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED) +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} +;SI-CHECK: V_LOG_F32 +;SI-CHECK: V_LOG_F32 +;SI-CHECK: V_LOG_F32 +;SI-CHECK: V_LOG_F32 +define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) { +entry: + %0 = call <4 x float> @llvm.log2.v4f32(<4 x float> %in) + store <4 x float> %0, <4 x float> addrspace(1)* %out + ret void +} + +declare float @llvm.log2.f32(float) readnone +declare <2 x float> @llvm.log2.v2f32(<2 x float>) readnone +declare <4 x float> @llvm.log2.v4f32(<4 x float>) readnone diff --git a/test/CodeGen/R600/llvm.rint.f64.ll b/test/CodeGen/R600/llvm.rint.f64.ll index a7a909a..3e2884b 100644 --- a/test/CodeGen/R600/llvm.rint.f64.ll +++ b/test/CodeGen/R600/llvm.rint.f64.ll @@ -1,30 +1,38 @@ ; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; FUNC-LABEL: @f64 +; FUNC-LABEL: @rint_f64 ; CI: V_RNDNE_F64_e32 -define void @f64(double addrspace(1)* %out, double %in) { + +; SI-DAG: V_ADD_F64 +; SI-DAG: V_ADD_F64 +; SI-DAG V_CMP_GT_F64_e64 +; SI: V_CNDMASK_B32 +; SI: V_CNDMASK_B32 +; SI: S_ENDPGM +define void @rint_f64(double addrspace(1)* %out, double %in) { entry: %0 = call double @llvm.rint.f64(double %in) store double %0, double addrspace(1)* %out ret void } -; FUNC-LABEL: @v2f64 +; FUNC-LABEL: @rint_v2f64 ; CI: V_RNDNE_F64_e32 ; CI: V_RNDNE_F64_e32 -define void @v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) { +define void @rint_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) { entry: %0 = call <2 x double> @llvm.rint.v2f64(<2 x double> %in) store <2 x double> %0, <2 x double> addrspace(1)* %out ret void } -; FUNC-LABEL: @v4f64 +; FUNC-LABEL: @rint_v4f64 ; CI: V_RNDNE_F64_e32 ; CI: V_RNDNE_F64_e32 ; CI: V_RNDNE_F64_e32 ; CI: V_RNDNE_F64_e32 -define void @v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) { +define void @rint_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) { entry: %0 = call <4 x double> @llvm.rint.v4f64(<4 x double> %in) store <4 x double> %0, <4 x double> addrspace(1)* %out diff --git a/test/CodeGen/R600/llvm.rint.ll b/test/CodeGen/R600/llvm.rint.ll index db8352f..209bb43 100644 --- a/test/CodeGen/R600/llvm.rint.ll +++ b/test/CodeGen/R600/llvm.rint.ll @@ -1,31 +1,31 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; FUNC-LABEL: @f32 +; FUNC-LABEL: @rint_f32 ; R600: RNDNE ; SI: V_RNDNE_F32_e32 -define void @f32(float addrspace(1)* %out, float %in) { +define void @rint_f32(float addrspace(1)* %out, float %in) { entry: - %0 = call float @llvm.rint.f32(float %in) + %0 = call float @llvm.rint.f32(float %in) #0 store float %0, float addrspace(1)* %out ret void } -; FUNC-LABEL: @v2f32 +; FUNC-LABEL: @rint_v2f32 ; R600: RNDNE ; R600: RNDNE ; SI: V_RNDNE_F32_e32 ; SI: V_RNDNE_F32_e32 -define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { +define void @rint_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { entry: - %0 = call <2 x float> @llvm.rint.v2f32(<2 x float> %in) + %0 = call <2 x float> @llvm.rint.v2f32(<2 x float> %in) #0 store <2 x float> %0, <2 x float> addrspace(1)* %out ret void } -; FUNC-LABEL: @v4f32 +; FUNC-LABEL: @rint_v4f32 ; R600: RNDNE ; R600: RNDNE ; R600: RNDNE @@ -35,15 +35,27 @@ entry: ; SI: V_RNDNE_F32_e32 ; SI: V_RNDNE_F32_e32 ; SI: V_RNDNE_F32_e32 -define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) { +define void @rint_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) { entry: - %0 = call <4 x float> @llvm.rint.v4f32(<4 x float> %in) + %0 = call <4 x float> @llvm.rint.v4f32(<4 x float> %in) #0 store <4 x float> %0, <4 x float> addrspace(1)* %out ret void } +; FUNC-LABEL: @legacy_amdil_round_nearest_f32 +; R600: RNDNE + +; SI: V_RNDNE_F32_e32 +define void @legacy_amdil_round_nearest_f32(float addrspace(1)* %out, float %in) { +entry: + %0 = call float @llvm.AMDIL.round.nearest.f32(float %in) #0 + store float %0, float addrspace(1)* %out + ret void +} + +declare float @llvm.AMDIL.round.nearest.f32(float) #0 declare float @llvm.rint.f32(float) #0 declare <2 x float> @llvm.rint.v2f32(<2 x float>) #0 declare <4 x float> @llvm.rint.v4f32(<4 x float>) #0 -attributes #0 = { nounwind readonly } +attributes #0 = { nounwind readnone } diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll index 1486c4d..a57df5c 100644 --- a/test/CodeGen/R600/load.ll +++ b/test/CodeGen/R600/load.ll @@ -696,8 +696,7 @@ entry: ; R600-CHECK: LDS_READ_RET ; R600-CHECK: LDS_READ_RET ; SI-CHECK: S_MOV_B32 m0 -; SI-CHECK: DS_READ_B32 -; SI-CHECK: DS_READ_B32 +; SI-CHECK: DS_READ_B64 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) { entry: %0 = load <2 x float> addrspace(3)* %in diff --git a/test/CodeGen/R600/local-atomics.ll b/test/CodeGen/R600/local-atomics.ll new file mode 100644 index 0000000..5a44951 --- /dev/null +++ b/test/CodeGen/R600/local-atomics.ll @@ -0,0 +1,254 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +; FUNC-LABEL: @lds_atomic_xchg_ret_i32: +; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]], +; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4 +; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]] +; SI: DS_WRXCHG_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]], 0x0, [M0] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_xchg_ret_i32_offset: +; SI: DS_WRXCHG_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +; SI: S_ENDPGM +define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; XXX - Is it really necessary to load 4 into VGPR? +; FUNC-LABEL: @lds_atomic_add_ret_i32: +; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]], +; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4 +; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]] +; SI: DS_ADD_RTN_U32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]], 0x0, [M0] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_add_ret_i32_offset: +; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +; SI: S_ENDPGM +define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_inc_ret_i32: +; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1 +; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] +; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x0 +; SI: S_ENDPGM +define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_inc_ret_i32_offset: +; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1 +; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] +; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x10 +; SI: S_ENDPGM +define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_sub_ret_i32: +; SI: DS_SUB_RTN_U32 +; SI: S_ENDPGM +define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_sub_ret_i32_offset: +; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +; SI: S_ENDPGM +define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_dec_ret_i32: +; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1 +; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] +; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x0 +; SI: S_ENDPGM +define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_dec_ret_i32_offset: +; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1 +; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] +; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x10 +; SI: S_ENDPGM +define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_and_ret_i32: +; SI: DS_AND_RTN_B32 +; SI: S_ENDPGM +define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_and_ret_i32_offset: +; SI: DS_AND_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +; SI: S_ENDPGM +define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_or_ret_i32: +; SI: DS_OR_RTN_B32 +; SI: S_ENDPGM +define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_or_ret_i32_offset: +; SI: DS_OR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +; SI: S_ENDPGM +define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_xor_ret_i32: +; SI: DS_XOR_RTN_B32 +; SI: S_ENDPGM +define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_xor_ret_i32_offset: +; SI: DS_XOR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +; SI: S_ENDPGM +define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FIXME: There is no atomic nand instr +; XFUNC-LABEL: @lds_atomic_nand_ret_i32:uction, so we somehow need to expand this. +; define void @lds_atomic_nand_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { +; %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst +; store i32 %result, i32 addrspace(1)* %out, align 4 +; ret void +; } + +; FUNC-LABEL: @lds_atomic_min_ret_i32: +; SI: DS_MIN_RTN_I32 +; SI: S_ENDPGM +define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_min_ret_i32_offset: +; SI: DS_MIN_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +; SI: S_ENDPGM +define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_max_ret_i32: +; SI: DS_MAX_RTN_I32 +; SI: S_ENDPGM +define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_max_ret_i32_offset: +; SI: DS_MAX_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +; SI: S_ENDPGM +define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_umin_ret_i32: +; SI: DS_MIN_RTN_U32 +; SI: S_ENDPGM +define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_umin_ret_i32_offset: +; SI: DS_MIN_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +; SI: S_ENDPGM +define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_umax_ret_i32: +; SI: DS_MAX_RTN_U32 +; SI: S_ENDPGM +define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @lds_atomic_umax_ret_i32_offset: +; SI: DS_MAX_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +; SI: S_ENDPGM +define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/local-atomics64.ll b/test/CodeGen/R600/local-atomics64.ll new file mode 100644 index 0000000..849b033 --- /dev/null +++ b/test/CodeGen/R600/local-atomics64.ll @@ -0,0 +1,251 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +; FUNC-LABEL: @lds_atomic_xchg_ret_i64: +; SI: DS_WRXCHG_RTN_B64 +; SI: S_ENDPGM +define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_xchg_ret_i64_offset: +; SI: DS_WRXCHG_RTN_B64 {{.*}} 0x20 +; SI: S_ENDPGM +define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 + %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_add_ret_i64: +; SI: DS_ADD_RTN_U64 +; SI: S_ENDPGM +define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_add_ret_i64_offset: +; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, 9 +; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]] +; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]] +; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; SI: DS_ADD_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}, 0x20, [M0] +; SI: BUFFER_STORE_DWORDX2 [[RESULT]], +; SI: S_ENDPGM +define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i64 addrspace(3)* %ptr, i64 4 + %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_inc_ret_i64: +; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1 +; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]] +; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]] +; SI: DS_INC_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}, +; SI: BUFFER_STORE_DWORDX2 [[RESULT]], +; SI: S_ENDPGM +define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_inc_ret_i64_offset: +; SI: DS_INC_RTN_U64 {{.*}} 0x20 +; SI: S_ENDPGM +define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 + %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_sub_ret_i64: +; SI: DS_SUB_RTN_U64 +; SI: S_ENDPGM +define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_sub_ret_i64_offset: +; SI: DS_SUB_RTN_U64 {{.*}} 0x20 +; SI: S_ENDPGM +define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 + %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_dec_ret_i64: +; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1 +; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]] +; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]] +; SI: DS_DEC_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}, +; SI: BUFFER_STORE_DWORDX2 [[RESULT]], +; SI: S_ENDPGM +define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_dec_ret_i64_offset: +; SI: DS_DEC_RTN_U64 {{.*}} 0x20 +; SI: S_ENDPGM +define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 + %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_and_ret_i64: +; SI: DS_AND_RTN_B64 +; SI: S_ENDPGM +define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_and_ret_i64_offset: +; SI: DS_AND_RTN_B64 {{.*}} 0x20 +; SI: S_ENDPGM +define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 + %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_or_ret_i64: +; SI: DS_OR_RTN_B64 +; SI: S_ENDPGM +define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_or_ret_i64_offset: +; SI: DS_OR_RTN_B64 {{.*}} 0x20 +; SI: S_ENDPGM +define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 + %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_xor_ret_i64: +; SI: DS_XOR_RTN_B64 +; SI: S_ENDPGM +define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_xor_ret_i64_offset: +; SI: DS_XOR_RTN_B64 {{.*}} 0x20 +; SI: S_ENDPGM +define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 + %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FIXME: There is no atomic nand instr +; XFUNC-LABEL: @lds_atomic_nand_ret_i64:uction, so we somehow need to expand this. +; define void @lds_atomic_nand_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { +; %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst +; store i64 %result, i64 addrspace(1)* %out, align 8 +; ret void +; } + +; FUNC-LABEL: @lds_atomic_min_ret_i64: +; SI: DS_MIN_RTN_I64 +; SI: S_ENDPGM +define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_min_ret_i64_offset: +; SI: DS_MIN_RTN_I64 {{.*}} 0x20 +; SI: S_ENDPGM +define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 + %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_max_ret_i64: +; SI: DS_MAX_RTN_I64 +; SI: S_ENDPGM +define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_max_ret_i64_offset: +; SI: DS_MAX_RTN_I64 {{.*}} 0x20 +; SI: S_ENDPGM +define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 + %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_umin_ret_i64: +; SI: DS_MIN_RTN_U64 +; SI: S_ENDPGM +define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_umin_ret_i64_offset: +; SI: DS_MIN_RTN_U64 {{.*}} 0x20 +; SI: S_ENDPGM +define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 + %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_umax_ret_i64: +; SI: DS_MAX_RTN_U64 +; SI: S_ENDPGM +define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @lds_atomic_umax_ret_i64_offset: +; SI: DS_MAX_RTN_U64 {{.*}} 0x20 +; SI: S_ENDPGM +define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 + %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/local-memory-two-objects.ll b/test/CodeGen/R600/local-memory-two-objects.ll index 1e42285..e29e4cc 100644 --- a/test/CodeGen/R600/local-memory-two-objects.ll +++ b/test/CodeGen/R600/local-memory-two-objects.ll @@ -1,8 +1,8 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s ; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s -@local_memory_two_objects.local_mem0 = internal addrspace(3) unnamed_addr global [4 x i32] zeroinitializer, align 4 -@local_memory_two_objects.local_mem1 = internal addrspace(3) unnamed_addr global [4 x i32] zeroinitializer, align 4 +@local_memory_two_objects.local_mem0 = internal unnamed_addr addrspace(3) global [4 x i32] zeroinitializer, align 4 +@local_memory_two_objects.local_mem1 = internal unnamed_addr addrspace(3) global [4 x i32] zeroinitializer, align 4 ; EG-CHECK: @local_memory_two_objects diff --git a/test/CodeGen/R600/local-memory.ll b/test/CodeGen/R600/local-memory.ll index 6ebe41d..51af484 100644 --- a/test/CodeGen/R600/local-memory.ll +++ b/test/CodeGen/R600/local-memory.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s ; RUN: llc < %s -march=r600 -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=CI-CHECK %s -@local_memory.local_mem = internal addrspace(3) unnamed_addr global [128 x i32] zeroinitializer, align 4 +@local_memory.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] zeroinitializer, align 4 ; EG-CHECK-LABEL: @local_memory ; SI-CHECK-LABEL: @local_memory diff --git a/test/CodeGen/R600/mul.ll b/test/CodeGen/R600/mul.ll index 6ed754c..d231e92 100644 --- a/test/CodeGen/R600/mul.ll +++ b/test/CodeGen/R600/mul.ll @@ -1,14 +1,14 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s --check-prefix=FUNC -; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s -check-prefix=FUNC +; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; mul24 and mad24 are affected -;FUNC-LABEL: @test2 -;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; FUNC-LABEL: @test2 +; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 @@ -19,16 +19,16 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { ret void } -;FUNC-LABEL: @test4 -;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; FUNC-LABEL: @test4 +; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 @@ -39,11 +39,11 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { ret void } -; SI-CHECK-LABEL: @trunc_i64_mul_to_i32 -; SI-CHECK: S_LOAD_DWORD -; SI-CHECK: S_LOAD_DWORD -; SI-CHECK: V_MUL_LO_I32 -; SI-CHECK: BUFFER_STORE_DWORD +; FUNC-LABEL: @trunc_i64_mul_to_i32 +; SI: S_LOAD_DWORD +; SI: S_LOAD_DWORD +; SI: V_MUL_LO_I32 +; SI: BUFFER_STORE_DWORD define void @trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) { %mul = mul i64 %b, %a %trunc = trunc i64 %mul to i32 diff --git a/test/CodeGen/R600/no-initializer-constant-addrspace.ll b/test/CodeGen/R600/no-initializer-constant-addrspace.ll new file mode 100644 index 0000000..ab82e7e --- /dev/null +++ b/test/CodeGen/R600/no-initializer-constant-addrspace.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=r600 -mcpu=SI -o /dev/null %s +; RUN: llc -march=r600 -mcpu=cypress -o /dev/null %s + +@extern_const_addrspace = external unnamed_addr addrspace(2) constant [5 x i32], align 4 + +; FUNC-LABEL: @load_extern_const_init +define void @load_extern_const_init(i32 addrspace(1)* %out) nounwind { + %val = load i32 addrspace(2)* getelementptr ([5 x i32] addrspace(2)* @extern_const_addrspace, i64 0, i64 3), align 4 + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +@undef_const_addrspace = unnamed_addr addrspace(2) constant [5 x i32] undef, align 4 + +; FUNC-LABEL: @load_undef_const_init +define void @load_undef_const_init(i32 addrspace(1)* %out) nounwind { + %val = load i32 addrspace(2)* getelementptr ([5 x i32] addrspace(2)* @undef_const_addrspace, i64 0, i64 3), align 4 + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll index 9878366..91a70b7 100644 --- a/test/CodeGen/R600/or.ll +++ b/test/CodeGen/R600/or.ll @@ -116,9 +116,9 @@ define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 } ; SI-LABEL: @trunc_i64_or_to_i32 -; SI: S_LOAD_DWORD [[SREG0:s[0-9]+]], -; SI: S_LOAD_DWORD [[SREG1:s[0-9]+]], -; SI: S_OR_B32 [[SRESULT:s[0-9]+]], [[SREG1]], [[SREG0]] +; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG0:[0-9]+]] +; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG1:[0-9]+]] +; SI: S_OR_B32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]] ; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] ; SI: BUFFER_STORE_DWORD [[VRESULT]], define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) { diff --git a/test/CodeGen/R600/parallelandifcollapse.ll b/test/CodeGen/R600/parallelandifcollapse.ll index 4afaf68..8a269e0 100644 --- a/test/CodeGen/R600/parallelandifcollapse.ll +++ b/test/CodeGen/R600/parallelandifcollapse.ll @@ -7,6 +7,12 @@ ; CHECK: AND_INT ; CHECK-NEXT: AND_INT ; CHECK-NEXT: OR_INT + +; FIXME: For some reason having the allocas here allowed the flatten cfg pass +; to do its transfomation, however now that we are using local memory for +; allocas, the transformation isn't happening. +; XFAIL: * + define void @_Z9chk1D_512v() #0 { entry: %a0 = alloca i32, align 4 diff --git a/test/CodeGen/R600/parallelorifcollapse.ll b/test/CodeGen/R600/parallelorifcollapse.ll index b0db7cd..feca688 100644 --- a/test/CodeGen/R600/parallelorifcollapse.ll +++ b/test/CodeGen/R600/parallelorifcollapse.ll @@ -3,6 +3,11 @@ ; ; CFG flattening should use parallel-or to generate branch conditions and ; then merge if-regions with the same bodies. + +; FIXME: For some reason having the allocas here allowed the flatten cfg pass +; to do its transfomation, however now that we are using local memory for +; allocas, the transformation isn't happening. +; XFAIL: * ; ; CHECK: OR_INT ; CHECK-NEXT: OR_INT diff --git a/test/CodeGen/R600/private-memory-atomics.ll b/test/CodeGen/R600/private-memory-atomics.ll new file mode 100644 index 0000000..def4f9d --- /dev/null +++ b/test/CodeGen/R600/private-memory-atomics.ll @@ -0,0 +1,31 @@ +; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s + +; This works because promote allocas pass replaces these with LDS atomics. + +; Private atomics have no real use, but at least shouldn't crash on it. +define void @atomicrmw_private(i32 addrspace(1)* %out, i32 %in) nounwind { +entry: + %tmp = alloca [2 x i32] + %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0 + %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1 + store i32 0, i32* %tmp1 + store i32 1, i32* %tmp2 + %tmp3 = getelementptr [2 x i32]* %tmp, i32 0, i32 %in + %tmp4 = atomicrmw add i32* %tmp3, i32 7 acq_rel + store i32 %tmp4, i32 addrspace(1)* %out + ret void +} + +define void @cmpxchg_private(i32 addrspace(1)* %out, i32 %in) nounwind { +entry: + %tmp = alloca [2 x i32] + %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0 + %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1 + store i32 0, i32* %tmp1 + store i32 1, i32* %tmp2 + %tmp3 = getelementptr [2 x i32]* %tmp, i32 0, i32 %in + %tmp4 = cmpxchg i32* %tmp3, i32 0, i32 1 acq_rel monotonic + %val = extractvalue { i32, i1 } %tmp4, 0 + store i32 %val, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/private-memory-broken.ll b/test/CodeGen/R600/private-memory-broken.ll new file mode 100644 index 0000000..4086085 --- /dev/null +++ b/test/CodeGen/R600/private-memory-broken.ll @@ -0,0 +1,20 @@ +; RUN: not llc -verify-machineinstrs -march=r600 -mcpu=SI %s -o /dev/null 2>&1 | FileCheck %s + +; Make sure promote alloca pass doesn't crash + +; CHECK: unsupported call + +declare i32 @foo(i32*) nounwind + +define void @call_private(i32 addrspace(1)* %out, i32 %in) nounwind { +entry: + %tmp = alloca [2 x i32] + %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0 + %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1 + store i32 0, i32* %tmp1 + store i32 1, i32* %tmp2 + %tmp3 = getelementptr [2 x i32]* %tmp, i32 0, i32 %in + %val = call i32 @foo(i32* %tmp3) nounwind + store i32 %val, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll index d3453f2..89122be 100644 --- a/test/CodeGen/R600/private-memory.ll +++ b/test/CodeGen/R600/private-memory.ll @@ -1,24 +1,19 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=R600-CHECK --check-prefix=FUNC ; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC -; This test checks that uses and defs of the AR register happen in the same -; instruction clause. +declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; FUNC-LABEL: @mova_same_clause -; R600-CHECK: MOVA_INT -; R600-CHECK-NOT: ALU clause -; R600-CHECK: 0 + AR.x -; R600-CHECK: MOVA_INT -; R600-CHECK-NOT: ALU clause -; R600-CHECK: 0 + AR.x - -; SI-CHECK: V_READFIRSTLANE_B32 vcc_lo -; SI-CHECK: V_MOVRELD -; SI-CHECK: S_CBRANCH -; SI-CHECK: V_READFIRSTLANE_B32 vcc_lo -; SI-CHECK: V_MOVRELD -; SI-CHECK: S_CBRANCH +; R600-CHECK: LDS_WRITE +; R600-CHECK: LDS_WRITE +; R600-CHECK: LDS_READ +; R600-CHECK: LDS_READ + +; SI-CHECK: DS_WRITE_B32 +; SI-CHECK: DS_WRITE_B32 +; SI-CHECK: DS_READ_B32 +; SI-CHECK: DS_READ_B32 define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) { entry: %stack = alloca [5 x i32], align 4 @@ -114,12 +109,8 @@ for.end: ; FUNC-LABEL: @short_array -; R600-CHECK: MOV {{\** *}}T{{[0-9]\.[XYZW]}}, literal -; R600-CHECK: 65536 -; R600-CHECK: * ; R600-CHECK: MOVA_INT -; SI-CHECK: V_MOV_B32_e32 v{{[0-9]}}, 0x10000 ; SI-CHECK: V_MOVRELS_B32_e32 define void @short_array(i32 addrspace(1)* %out, i32 %index) { entry: @@ -137,10 +128,7 @@ entry: ; FUNC-LABEL: @char_array -; R600-CHECK: OR_INT {{\** *}}T{{[0-9]\.[XYZW]}}, {{[PVT0-9]+\.[XYZW]}}, literal -; R600-CHECK: 256 -; R600-CHECK: * -; R600-CHECK-NEXT: MOVA_INT +; R600-CHECK: MOVA_INT ; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}, 0x100 ; SI-CHECK: V_MOVRELS_B32_e32 @@ -185,7 +173,9 @@ entry: ; Test that two stack objects are not stored in the same register ; The second stack object should be in T3.X ; FUNC-LABEL: @no_overlap -; R600-CHECK: MOV {{\** *}}T3.X +; R600_CHECK: MOV +; R600_CHECK: [[CHAN:[XYZW]]]+ +; R600-CHECK-NOT: [[CHAN]]+ ; SI-CHECK: V_MOV_B32_e32 v3 define void @no_overlap(i32 addrspace(1)* %out, i32 %in) { entry: @@ -211,6 +201,85 @@ entry: ret void } +define void @char_array_array(i32 addrspace(1)* %out, i32 %index) { +entry: + %alloca = alloca [2 x [2 x i8]] + %gep0 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 0 + %gep1 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 1 + store i8 0, i8* %gep0 + store i8 1, i8* %gep1 + %gep2 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 %index + %load = load i8* %gep2 + %sext = sext i8 %load to i32 + store i32 %sext, i32 addrspace(1)* %out + ret void +} +define void @i32_array_array(i32 addrspace(1)* %out, i32 %index) { +entry: + %alloca = alloca [2 x [2 x i32]] + %gep0 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0 + %gep1 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1 + store i32 0, i32* %gep0 + store i32 1, i32* %gep1 + %gep2 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index + %load = load i32* %gep2 + store i32 %load, i32 addrspace(1)* %out + ret void +} + +define void @i64_array_array(i64 addrspace(1)* %out, i32 %index) { +entry: + %alloca = alloca [2 x [2 x i64]] + %gep0 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 0 + %gep1 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 1 + store i64 0, i64* %gep0 + store i64 1, i64* %gep1 + %gep2 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 %index + %load = load i64* %gep2 + store i64 %load, i64 addrspace(1)* %out + ret void +} + +%struct.pair32 = type { i32, i32 } + +define void @struct_array_array(i32 addrspace(1)* %out, i32 %index) { +entry: + %alloca = alloca [2 x [2 x %struct.pair32]] + %gep0 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 0, i32 1 + %gep1 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 1, i32 1 + store i32 0, i32* %gep0 + store i32 1, i32* %gep1 + %gep2 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 %index, i32 0 + %load = load i32* %gep2 + store i32 %load, i32 addrspace(1)* %out + ret void +} + +define void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) { +entry: + %alloca = alloca [2 x %struct.pair32] + %gep0 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 0, i32 1 + %gep1 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 1, i32 0 + store i32 0, i32* %gep0 + store i32 1, i32* %gep1 + %gep2 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 %index, i32 0 + %load = load i32* %gep2 + store i32 %load, i32 addrspace(1)* %out + ret void +} + +define void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind { +entry: + %tmp = alloca [2 x i32] + %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0 + %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1 + store i32 0, i32* %tmp1 + store i32 1, i32* %tmp2 + %cmp = icmp eq i32 %in, 0 + %sel = select i1 %cmp, i32* %tmp1, i32* %tmp2 + %load = load i32* %sel + store i32 %load, i32 addrspace(1)* %out + ret void +} -declare i32 @llvm.r600.read.tidig.x() nounwind readnone diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll index f322bc7..55eb56d 100644 --- a/test/CodeGen/R600/pv.ll +++ b/test/CodeGen/R600/pv.ll @@ -103,7 +103,7 @@ main_body: %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3 %96 = call float @llvm.AMDGPU.dp4(<4 x float> %91, <4 x float> %95) %97 = call float @fabs(float %96) - %98 = call float @llvm.AMDGPU.rsq(float %97) + %98 = call float @llvm.AMDGPU.rsq.f32(float %97) %99 = fmul float %4, %98 %100 = fmul float %5, %98 %101 = fmul float %6, %98 @@ -225,7 +225,7 @@ declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 declare float @fabs(float) #2 ; Function Attrs: readnone -declare float @llvm.AMDGPU.rsq(float) #1 +declare float @llvm.AMDGPU.rsq.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #1 diff --git a/test/CodeGen/R600/reorder-stores.ll b/test/CodeGen/R600/reorder-stores.ll new file mode 100644 index 0000000..be2fcc6 --- /dev/null +++ b/test/CodeGen/R600/reorder-stores.ll @@ -0,0 +1,104 @@ +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s + +; SI-LABEL: @no_reorder_v2f64_global_load_store +; SI: BUFFER_LOAD_DWORDX2 +; SI: BUFFER_LOAD_DWORDX2 +; SI: BUFFER_LOAD_DWORDX2 +; SI: BUFFER_LOAD_DWORDX2 +; SI: BUFFER_STORE_DWORDX2 +; SI: BUFFER_STORE_DWORDX2 +; SI: BUFFER_STORE_DWORDX2 +; SI: BUFFER_STORE_DWORDX2 +; SI: S_ENDPGM +define void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind { + %tmp1 = load <2 x double> addrspace(1)* %x, align 16 + %tmp4 = load <2 x double> addrspace(1)* %y, align 16 + store <2 x double> %tmp4, <2 x double> addrspace(1)* %x, align 16 + store <2 x double> %tmp1, <2 x double> addrspace(1)* %y, align 16 + ret void +} + +; SI-LABEL: @no_reorder_scalarized_v2f64_local_load_store +; SI: DS_READ_B64 +; SI: DS_READ_B64 +; SI: DS_WRITE_B64 +; SI: DS_WRITE_B64 +; SI: S_ENDPGM +define void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace(3)* nocapture %x, <2 x double> addrspace(3)* nocapture %y) nounwind { + %tmp1 = load <2 x double> addrspace(3)* %x, align 16 + %tmp4 = load <2 x double> addrspace(3)* %y, align 16 + store <2 x double> %tmp4, <2 x double> addrspace(3)* %x, align 16 + store <2 x double> %tmp1, <2 x double> addrspace(3)* %y, align 16 + ret void +} + +; SI-LABEL: @no_reorder_split_v8i32_global_load_store +; SI: BUFFER_LOAD_DWORD +; SI: BUFFER_LOAD_DWORD +; SI: BUFFER_LOAD_DWORD +; SI: BUFFER_LOAD_DWORD + +; SI: BUFFER_LOAD_DWORD +; SI: BUFFER_LOAD_DWORD +; SI: BUFFER_LOAD_DWORD +; SI: BUFFER_LOAD_DWORD + +; SI: BUFFER_LOAD_DWORD +; SI: BUFFER_LOAD_DWORD +; SI: BUFFER_LOAD_DWORD +; SI: BUFFER_LOAD_DWORD + +; SI: BUFFER_LOAD_DWORD +; SI: BUFFER_LOAD_DWORD +; SI: BUFFER_LOAD_DWORD +; SI: BUFFER_LOAD_DWORD + + +; SI: BUFFER_STORE_DWORD +; SI: BUFFER_STORE_DWORD +; SI: BUFFER_STORE_DWORD +; SI: BUFFER_STORE_DWORD + +; SI: BUFFER_STORE_DWORD +; SI: BUFFER_STORE_DWORD +; SI: BUFFER_STORE_DWORD +; SI: BUFFER_STORE_DWORD + +; SI: BUFFER_STORE_DWORD +; SI: BUFFER_STORE_DWORD +; SI: BUFFER_STORE_DWORD +; SI: BUFFER_STORE_DWORD + +; SI: BUFFER_STORE_DWORD +; SI: BUFFER_STORE_DWORD +; SI: BUFFER_STORE_DWORD +; SI: BUFFER_STORE_DWORD +; SI: S_ENDPGM +define void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind { + %tmp1 = load <8 x i32> addrspace(1)* %x, align 32 + %tmp4 = load <8 x i32> addrspace(1)* %y, align 32 + store <8 x i32> %tmp4, <8 x i32> addrspace(1)* %x, align 32 + store <8 x i32> %tmp1, <8 x i32> addrspace(1)* %y, align 32 + ret void +} + +; SI-LABEL: @no_reorder_extload_64 +; SI: DS_READ_B64 +; SI: DS_READ_B64 +; SI: DS_WRITE_B64 +; SI-NOT: DS_READ +; SI: DS_WRITE_B64 +; SI: S_ENDPGM +define void @no_reorder_extload_64(<2 x i32> addrspace(3)* nocapture %x, <2 x i32> addrspace(3)* nocapture %y) nounwind { + %tmp1 = load <2 x i32> addrspace(3)* %x, align 8 + %tmp4 = load <2 x i32> addrspace(3)* %y, align 8 + %tmp1ext = zext <2 x i32> %tmp1 to <2 x i64> + %tmp4ext = zext <2 x i32> %tmp4 to <2 x i64> + %tmp7 = add <2 x i64> %tmp1ext, <i64 1, i64 1> + %tmp9 = add <2 x i64> %tmp4ext, <i64 1, i64 1> + %trunctmp9 = trunc <2 x i64> %tmp9 to <2 x i32> + %trunctmp7 = trunc <2 x i64> %tmp7 to <2 x i32> + store <2 x i32> %trunctmp9, <2 x i32> addrspace(3)* %x, align 8 + store <2 x i32> %trunctmp7, <2 x i32> addrspace(3)* %y, align 8 + ret void +} diff --git a/test/CodeGen/R600/rotl.i64.ll b/test/CodeGen/R600/rotl.i64.ll new file mode 100644 index 0000000..bda0b66 --- /dev/null +++ b/test/CodeGen/R600/rotl.i64.ll @@ -0,0 +1,34 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +; FUNC-LABEL: @s_rotl_i64: +; SI: S_LSHL_B64 +; SI: S_SUB_I32 +; SI: S_LSHR_B64 +; SI: S_OR_B64 +define void @s_rotl_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) { +entry: + %0 = shl i64 %x, %y + %1 = sub i64 64, %y + %2 = lshr i64 %x, %1 + %3 = or i64 %0, %2 + store i64 %3, i64 addrspace(1)* %in + ret void +} + +; FUNC-LABEL: @v_rotl_i64: +; SI: V_LSHL_B64 +; SI: V_SUB_I32 +; SI: V_LSHR_B64 +; SI: V_OR_B32 +; SI: V_OR_B32 +define void @v_rotl_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) { +entry: + %x = load i64 addrspace(1)* %xptr, align 8 + %y = load i64 addrspace(1)* %yptr, align 8 + %tmp0 = shl i64 %x, %y + %tmp1 = sub i64 64, %y + %tmp2 = lshr i64 %x, %tmp1 + %tmp3 = or i64 %tmp0, %tmp2 + store i64 %tmp3, i64 addrspace(1)* %in, align 8 + ret void +} diff --git a/test/CodeGen/R600/rotl.ll b/test/CodeGen/R600/rotl.ll new file mode 100644 index 0000000..83f657f --- /dev/null +++ b/test/CodeGen/R600/rotl.ll @@ -0,0 +1,54 @@ +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +; FUNC-LABEL: @rotl_i32: +; R600: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x +; R600-NEXT: 32 +; R600: BIT_ALIGN_INT {{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}} + +; SI: S_SUB_I32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}} +; SI: V_MOV_B32_e32 [[VDST:v[0-9]+]], [[SDST]] +; SI: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, v[0-9]+}}, [[VDST]] +define void @rotl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) { +entry: + %0 = shl i32 %x, %y + %1 = sub i32 32, %y + %2 = lshr i32 %x, %1 + %3 = or i32 %0, %2 + store i32 %3, i32 addrspace(1)* %in + ret void +} + +; FUNC-LABEL: @rotl_v2i32 +; SI: S_SUB_I32 +; SI: V_ALIGNBIT_B32 +; SI: S_SUB_I32 +; SI: V_ALIGNBIT_B32 +define void @rotl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) { +entry: + %0 = shl <2 x i32> %x, %y + %1 = sub <2 x i32> <i32 32, i32 32>, %y + %2 = lshr <2 x i32> %x, %1 + %3 = or <2 x i32> %0, %2 + store <2 x i32> %3, <2 x i32> addrspace(1)* %in + ret void +} + +; FUNC-LABEL: @rotl_v4i32 +; SI: S_SUB_I32 +; SI: V_ALIGNBIT_B32 +; SI: S_SUB_I32 +; SI: V_ALIGNBIT_B32 +; SI: S_SUB_I32 +; SI: V_ALIGNBIT_B32 +; SI: S_SUB_I32 +; SI: V_ALIGNBIT_B32 +define void @rotl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) { +entry: + %0 = shl <4 x i32> %x, %y + %1 = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y + %2 = lshr <4 x i32> %x, %1 + %3 = or <4 x i32> %0, %2 + store <4 x i32> %3, <4 x i32> addrspace(1)* %in + ret void +} diff --git a/test/CodeGen/R600/rotr.i64.ll b/test/CodeGen/R600/rotr.i64.ll new file mode 100644 index 0000000..c264751 --- /dev/null +++ b/test/CodeGen/R600/rotr.i64.ll @@ -0,0 +1,58 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +; FUNC-LABEL: @s_rotr_i64 +; SI: S_LSHR_B64 +; SI: S_SUB_I32 +; SI: S_LSHL_B64 +; SI: S_OR_B64 +define void @s_rotr_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) { +entry: + %tmp0 = sub i64 64, %y + %tmp1 = shl i64 %x, %tmp0 + %tmp2 = lshr i64 %x, %y + %tmp3 = or i64 %tmp1, %tmp2 + store i64 %tmp3, i64 addrspace(1)* %in + ret void +} + +; FUNC-LABEL: @v_rotr_i64 +; SI: V_LSHR_B64 +; SI: V_SUB_I32 +; SI: V_LSHL_B64 +; SI: V_OR_B32 +; SI: V_OR_B32 +define void @v_rotr_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) { +entry: + %x = load i64 addrspace(1)* %xptr, align 8 + %y = load i64 addrspace(1)* %yptr, align 8 + %tmp0 = sub i64 64, %y + %tmp1 = shl i64 %x, %tmp0 + %tmp2 = lshr i64 %x, %y + %tmp3 = or i64 %tmp1, %tmp2 + store i64 %tmp3, i64 addrspace(1)* %in + ret void +} + +; FUNC-LABEL: @s_rotr_v2i64 +define void @s_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> %x, <2 x i64> %y) { +entry: + %tmp0 = sub <2 x i64> <i64 64, i64 64>, %y + %tmp1 = shl <2 x i64> %x, %tmp0 + %tmp2 = lshr <2 x i64> %x, %y + %tmp3 = or <2 x i64> %tmp1, %tmp2 + store <2 x i64> %tmp3, <2 x i64> addrspace(1)* %in + ret void +} + +; FUNC-LABEL: @v_rotr_v2i64 +define void @v_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> addrspace(1)* %xptr, <2 x i64> addrspace(1)* %yptr) { +entry: + %x = load <2 x i64> addrspace(1)* %xptr, align 8 + %y = load <2 x i64> addrspace(1)* %yptr, align 8 + %tmp0 = sub <2 x i64> <i64 64, i64 64>, %y + %tmp1 = shl <2 x i64> %x, %tmp0 + %tmp2 = lshr <2 x i64> %x, %y + %tmp3 = or <2 x i64> %tmp1, %tmp2 + store <2 x i64> %tmp3, <2 x i64> addrspace(1)* %in + ret void +} diff --git a/test/CodeGen/R600/rotr.ll b/test/CodeGen/R600/rotr.ll index edf7aee..a5a4da4 100644 --- a/test/CodeGen/R600/rotr.ll +++ b/test/CodeGen/R600/rotr.ll @@ -1,37 +1,52 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; R600-CHECK-LABEL: @rotr: -; R600-CHECK: BIT_ALIGN_INT +; FUNC-LABEL: @rotr_i32: +; R600: BIT_ALIGN_INT -; SI-CHECK-LABEL: @rotr: -; SI-CHECK: V_ALIGNBIT_B32 -define void @rotr(i32 addrspace(1)* %in, i32 %x, i32 %y) { +; SI: V_ALIGNBIT_B32 +define void @rotr_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) { entry: - %0 = sub i32 32, %y - %1 = shl i32 %x, %0 - %2 = lshr i32 %x, %y - %3 = or i32 %1, %2 - store i32 %3, i32 addrspace(1)* %in + %tmp0 = sub i32 32, %y + %tmp1 = shl i32 %x, %tmp0 + %tmp2 = lshr i32 %x, %y + %tmp3 = or i32 %tmp1, %tmp2 + store i32 %tmp3, i32 addrspace(1)* %in ret void } -; R600-CHECK-LABEL: @rotl: -; R600-CHECK: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x -; R600-CHECK-NEXT: 32 -; R600-CHECK: BIT_ALIGN_INT {{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}} +; FUNC-LABEL: @rotr_v2i32: +; R600: BIT_ALIGN_INT +; R600: BIT_ALIGN_INT +; SI: V_ALIGNBIT_B32 +; SI: V_ALIGNBIT_B32 +define void @rotr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) { +entry: + %tmp0 = sub <2 x i32> <i32 32, i32 32>, %y + %tmp1 = shl <2 x i32> %x, %tmp0 + %tmp2 = lshr <2 x i32> %x, %y + %tmp3 = or <2 x i32> %tmp1, %tmp2 + store <2 x i32> %tmp3, <2 x i32> addrspace(1)* %in + ret void +} + +; FUNC-LABEL: @rotr_v4i32: +; R600: BIT_ALIGN_INT +; R600: BIT_ALIGN_INT +; R600: BIT_ALIGN_INT +; R600: BIT_ALIGN_INT -; SI-CHECK-LABEL: @rotl: -; SI-CHECK: S_SUB_I32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}} -; SI-CHECK: V_MOV_B32_e32 [[VDST:v[0-9]+]], [[SDST]] -; SI-CHECK: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, v[0-9]+}}, [[VDST]] -define void @rotl(i32 addrspace(1)* %in, i32 %x, i32 %y) { +; SI: V_ALIGNBIT_B32 +; SI: V_ALIGNBIT_B32 +; SI: V_ALIGNBIT_B32 +; SI: V_ALIGNBIT_B32 +define void @rotr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) { entry: - %0 = shl i32 %x, %y - %1 = sub i32 32, %y - %2 = lshr i32 %x, %1 - %3 = or i32 %0, %2 - store i32 %3, i32 addrspace(1)* %in + %tmp0 = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y + %tmp1 = shl <4 x i32> %x, %tmp0 + %tmp2 = lshr <4 x i32> %x, %y + %tmp3 = or <4 x i32> %tmp1, %tmp2 + store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %in ret void } diff --git a/test/CodeGen/R600/rsq.ll b/test/CodeGen/R600/rsq.ll new file mode 100644 index 0000000..87c0570 --- /dev/null +++ b/test/CodeGen/R600/rsq.ll @@ -0,0 +1,26 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare float @llvm.sqrt.f32(float) nounwind readnone +declare double @llvm.sqrt.f64(double) nounwind readnone + +; SI-LABEL: @rsq_f32 +; SI: V_RSQ_F32_e32 +; SI: S_ENDPGM +define void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { + %val = load float addrspace(1)* %in, align 4 + %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone + %div = fdiv float 1.0, %sqrt + store float %div, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @rsq_f64 +; SI: V_RSQ_F64_e32 +; SI: S_ENDPGM +define void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind { + %val = load double addrspace(1)* %in, align 4 + %sqrt = call double @llvm.sqrt.f64(double %val) nounwind readnone + %div = fdiv double 1.0, %sqrt + store double %div, double addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/saddo.ll b/test/CodeGen/R600/saddo.ll new file mode 100644 index 0000000..c80480e --- /dev/null +++ b/test/CodeGen/R600/saddo.ll @@ -0,0 +1,62 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s + +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone +declare { i64, i1 } @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone + +; FUNC-LABEL: @saddo_i64_zext +define void @saddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind + %val = extractvalue { i64, i1 } %sadd, 0 + %carry = extractvalue { i64, i1 } %sadd, 1 + %ext = zext i1 %carry to i64 + %add2 = add i64 %val, %ext + store i64 %add2, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @s_saddo_i32 +define void @s_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind { + %sadd = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind + %val = extractvalue { i32, i1 } %sadd, 0 + %carry = extractvalue { i32, i1 } %sadd, 1 + store i32 %val, i32 addrspace(1)* %out, align 4 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} + +; FUNC-LABEL: @v_saddo_i32 +define void @v_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { + %a = load i32 addrspace(1)* %aptr, align 4 + %b = load i32 addrspace(1)* %bptr, align 4 + %sadd = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind + %val = extractvalue { i32, i1 } %sadd, 0 + %carry = extractvalue { i32, i1 } %sadd, 1 + store i32 %val, i32 addrspace(1)* %out, align 4 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} + +; FUNC-LABEL: @s_saddo_i64 +define void @s_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind { + %sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind + %val = extractvalue { i64, i1 } %sadd, 0 + %carry = extractvalue { i64, i1 } %sadd, 1 + store i64 %val, i64 addrspace(1)* %out, align 8 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} + +; FUNC-LABEL: @v_saddo_i64 +; SI: V_ADD_I32 +; SI: V_ADDC_U32 +define void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { + %a = load i64 addrspace(1)* %aptr, align 4 + %b = load i64 addrspace(1)* %bptr, align 4 + %sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind + %val = extractvalue { i64, i1 } %sadd, 0 + %carry = extractvalue { i64, i1 } %sadd, 1 + store i64 %val, i64 addrspace(1)* %out, align 8 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} diff --git a/test/CodeGen/R600/scalar_to_vector.ll b/test/CodeGen/R600/scalar_to_vector.ll new file mode 100644 index 0000000..bcccb06 --- /dev/null +++ b/test/CodeGen/R600/scalar_to_vector.ll @@ -0,0 +1,80 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + + +; FUNC-LABEL: @scalar_to_vector_v2i32 +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], +; SI: V_LSHRREV_B32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]] +; SI: BUFFER_STORE_SHORT [[RESULT]] +; SI: BUFFER_STORE_SHORT [[RESULT]] +; SI: BUFFER_STORE_SHORT [[RESULT]] +; SI: BUFFER_STORE_SHORT [[RESULT]] +; SI: S_ENDPGM +define void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %tmp1 = load i32 addrspace(1)* %in, align 4 + %bc = bitcast i32 %tmp1 to <2 x i16> + %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> + store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @scalar_to_vector_v2f32 +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], +; SI: V_LSHRREV_B32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]] +; SI: BUFFER_STORE_SHORT [[RESULT]] +; SI: BUFFER_STORE_SHORT [[RESULT]] +; SI: BUFFER_STORE_SHORT [[RESULT]] +; SI: BUFFER_STORE_SHORT [[RESULT]] +; SI: S_ENDPGM +define void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind { + %tmp1 = load float addrspace(1)* %in, align 4 + %bc = bitcast float %tmp1 to <2 x i16> + %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> + store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8 + ret void +} + +; Getting a SCALAR_TO_VECTOR seems to be tricky. These cases managed +; to produce one, but for some reason never made it to selection. + + +; define void @scalar_to_vector_test2(<8 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { +; %tmp1 = load i32 addrspace(1)* %in, align 4 +; %bc = bitcast i32 %tmp1 to <4 x i8> + +; %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; store <8 x i8> %tmp2, <8 x i8> addrspace(1)* %out, align 4 +; ret void +; } + +; define void @scalar_to_vector_test3(<4 x i32> addrspace(1)* %out) nounwind { +; %newvec0 = insertelement <2 x i64> undef, i64 12345, i32 0 +; %newvec1 = insertelement <2 x i64> %newvec0, i64 undef, i32 1 +; %bc = bitcast <2 x i64> %newvec1 to <4 x i32> +; %add = add <4 x i32> %bc, <i32 1, i32 2, i32 3, i32 4> +; store <4 x i32> %add, <4 x i32> addrspace(1)* %out, align 16 +; ret void +; } + +; define void @scalar_to_vector_test4(<8 x i16> addrspace(1)* %out) nounwind { +; %newvec0 = insertelement <4 x i32> undef, i32 12345, i32 0 +; %bc = bitcast <4 x i32> %newvec0 to <8 x i16> +; %add = add <8 x i16> %bc, <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4> +; store <8 x i16> %add, <8 x i16> addrspace(1)* %out, align 16 +; ret void +; } + +; define void @scalar_to_vector_test5(<4 x i16> addrspace(1)* %out) nounwind { +; %newvec0 = insertelement <2 x i32> undef, i32 12345, i32 0 +; %bc = bitcast <2 x i32> %newvec0 to <4 x i16> +; %add = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4> +; store <4 x i16> %add, <4 x i16> addrspace(1)* %out, align 16 +; ret void +; } + +; define void @scalar_to_vector_test6(<4 x i16> addrspace(1)* %out) nounwind { +; %newvec0 = insertelement <2 x i32> undef, i32 12345, i32 0 +; %bc = bitcast <2 x i32> %newvec0 to <4 x i16> +; %add = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4> +; store <4 x i16> %add, <4 x i16> addrspace(1)* %out, align 16 +; ret void +; } diff --git a/test/CodeGen/R600/sdiv.ll b/test/CodeGen/R600/sdiv.ll index 3dd10c8..e922d5c 100644 --- a/test/CodeGen/R600/sdiv.ll +++ b/test/CodeGen/R600/sdiv.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; The code generated by sdiv is long and complex and may frequently change. ; The goal of this test is to make sure the ISel doesn't fail. @@ -9,9 +10,9 @@ ; This was fixed by adding an additional pattern in R600Instructions.td to ; match this pattern with a CNDGE_INT. -; CHECK: CF_END - -define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +; FUNC-LABEL: @sdiv_i32 +; EG: CF_END +define void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1 %num = load i32 addrspace(1) * %in %den = load i32 addrspace(1) * %den_ptr @@ -19,3 +20,84 @@ define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { store i32 %result, i32 addrspace(1)* %out ret void } + +; FUNC-LABEL: @sdiv_i32_4 +define void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %num = load i32 addrspace(1) * %in + %result = sdiv i32 %num, 4 + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; Multiply by a weird constant to make sure setIntDivIsCheap is +; working. + +; FUNC-LABEL: @slow_sdiv_i32_3435 +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], +; SI: V_MOV_B32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b +; SI: V_MUL_HI_I32 [[TMP:v[0-9]+]], [[VAL]], [[MAGIC]] +; SI: V_ADD_I32 +; SI: V_LSHRREV_B32 +; SI: V_ASHRREV_I32 +; SI: V_ADD_I32 +; SI: BUFFER_STORE_DWORD +; SI: S_ENDPGM +define void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %num = load i32 addrspace(1) * %in + %result = sdiv i32 %num, 3435 + store i32 %result, i32 addrspace(1)* %out + ret void +} + +define void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %den_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 + %num = load <2 x i32> addrspace(1) * %in + %den = load <2 x i32> addrspace(1) * %den_ptr + %result = sdiv <2 x i32> %num, %den + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +define void @sdiv_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %num = load <2 x i32> addrspace(1) * %in + %result = sdiv <2 x i32> %num, <i32 4, i32 4> + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +define void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %den_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %num = load <4 x i32> addrspace(1) * %in + %den = load <4 x i32> addrspace(1) * %den_ptr + %result = sdiv <4 x i32> %num, %den + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +define void @sdiv_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %num = load <4 x i32> addrspace(1) * %in + %result = sdiv <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4> + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; Tests for 64-bit divide bypass. +; define void @test_get_quotient(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { +; %result = sdiv i64 %a, %b +; store i64 %result, i64 addrspace(1)* %out, align 8 +; ret void +; } + +; define void @test_get_remainder(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { +; %result = srem i64 %a, %b +; store i64 %result, i64 addrspace(1)* %out, align 8 +; ret void +; } + +; define void @test_get_quotient_and_remainder(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { +; %resultdiv = sdiv i64 %a, %b +; %resultrem = srem i64 %a, %b +; %result = add i64 %resultdiv, %resultrem +; store i64 %result, i64 addrspace(1)* %out, align 8 +; ret void +; } diff --git a/test/CodeGen/R600/setcc-equivalent.ll b/test/CodeGen/R600/setcc-equivalent.ll index 4c50aa3..f796748 100644 --- a/test/CodeGen/R600/setcc-equivalent.ll +++ b/test/CodeGen/R600/setcc-equivalent.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s +; XFAIL: * ; EG-LABEL: @and_setcc_setcc_i32 ; EG: AND_INT diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/R600/sgpr-copy.ll index c581d86..c7d5bf9 100644 --- a/test/CodeGen/R600/sgpr-copy.ll +++ b/test/CodeGen/R600/sgpr-copy.ll @@ -70,7 +70,7 @@ main_body: %55 = fadd float %54, %53 %56 = fmul float %45, %45 %57 = fadd float %55, %56 - %58 = call float @llvm.AMDGPU.rsq(float %57) + %58 = call float @llvm.AMDGPU.rsq.f32(float %57) %59 = fmul float %43, %58 %60 = fmul float %44, %58 %61 = fmul float %45, %58 @@ -212,7 +212,7 @@ declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone -declare float @llvm.AMDGPU.rsq(float) #3 +declare float @llvm.AMDGPU.rsq.f32(float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #3 diff --git a/test/CodeGen/R600/shl.ll b/test/CodeGen/R600/shl.ll index 4a6aab4..43fab2a 100644 --- a/test/CodeGen/R600/shl.ll +++ b/test/CodeGen/R600/shl.ll @@ -39,5 +39,118 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in ret void } -; XXX: Add SI test for i64 shl once i64 stores and i64 function arguments are -; supported. +;EG-CHECK: @shl_i64 +;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] +;EG-CHECK: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} +;EG-CHECK: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 +;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +;EG-CHECK-DAG: LSHL {{\*? *}}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]] +;EG-CHECK-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} +;EG-CHECK-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]}} +;EG-CHECK-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} +;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0 + +;SI-CHECK: @shl_i64 +;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + +define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { + %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1 + %a = load i64 addrspace(1) * %in + %b = load i64 addrspace(1) * %b_ptr + %result = shl i64 %a, %b + store i64 %result, i64 addrspace(1)* %out + ret void +} + +;EG-CHECK: @shl_v2i64 +;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] +;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] +;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHA]] +;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHB]] +;EG-CHECK-DAG: LSHR {{.*}}, 1 +;EG-CHECK-DAG: LSHR {{.*}}, 1 +;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-CHECK-DAG: LSHL {{.*}}, [[SHA]] +;EG-CHECK-DAG: LSHL {{.*}}, [[SHB]] +;EG-CHECK-DAG: LSHL {{.*}}, [[SHA]] +;EG-CHECK-DAG: LSHL {{.*}}, [[SHB]] +;EG-CHECK-DAG: LSHL +;EG-CHECK-DAG: LSHL +;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal +;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal +;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 +;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 +;EG-CHECK-DAG: CNDE_INT +;EG-CHECK-DAG: CNDE_INT + +;SI-CHECK: @shl_v2i64 +;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + +define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1 + %a = load <2 x i64> addrspace(1) * %in + %b = load <2 x i64> addrspace(1) * %b_ptr + %result = shl <2 x i64> %a, %b + store <2 x i64> %result, <2 x i64> addrspace(1)* %out + ret void +} + +;EG-CHECK: @shl_v4i64 +;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] +;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] +;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]] +;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]] +;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHA]] +;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHB]] +;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHC]] +;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHD]] +;EG-CHECK-DAG: LSHR {{.*}}, 1 +;EG-CHECK-DAG: LSHR {{.*}}, 1 +;EG-CHECK-DAG: LSHR {{.*}}, 1 +;EG-CHECK-DAG: LSHR {{.*}}, 1 +;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-CHECK-DAG: LSHL {{.*}}, [[SHA]] +;EG-CHECK-DAG: LSHL {{.*}}, [[SHB]] +;EG-CHECK-DAG: LSHL {{.*}}, [[SHC]] +;EG-CHECK-DAG: LSHL {{.*}}, [[SHD]] +;EG-CHECK-DAG: LSHL {{.*}}, [[SHA]] +;EG-CHECK-DAG: LSHL {{.*}}, [[SHB]] +;EG-CHECK-DAG: LSHL {{.*}}, [[SHC]] +;EG-CHECK-DAG: LSHL {{.*}}, [[SHD]] +;EG-CHECK-DAG: LSHL +;EG-CHECK-DAG: LSHL +;EG-CHECK-DAG: LSHL +;EG-CHECK-DAG: LSHL +;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal +;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal +;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal +;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal +;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 +;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 +;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 +;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 +;EG-CHECK-DAG: CNDE_INT +;EG-CHECK-DAG: CNDE_INT +;EG-CHECK-DAG: CNDE_INT +;EG-CHECK-DAG: CNDE_INT + +;SI-CHECK: @shl_v4i64 +;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + +define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1 + %a = load <4 x i64> addrspace(1) * %in + %b = load <4 x i64> addrspace(1) * %b_ptr + %result = shl <4 x i64> %a, %b + store <4 x i64> %result, <4 x i64> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/si-sgpr-spill.ll b/test/CodeGen/R600/si-sgpr-spill.ll index b34a757..53a0965 100644 --- a/test/CodeGen/R600/si-sgpr-spill.ll +++ b/test/CodeGen/R600/si-sgpr-spill.ll @@ -203,7 +203,7 @@ main_body: %198 = fadd float %197, %196 %199 = fmul float %97, %97 %200 = fadd float %198, %199 - %201 = call float @llvm.AMDGPU.rsq(float %200) + %201 = call float @llvm.AMDGPU.rsq.f32(float %200) %202 = fmul float %95, %201 %203 = fmul float %96, %201 %204 = fmul float %202, %29 @@ -384,7 +384,7 @@ IF67: ; preds = %LOOP65 %355 = fadd float %354, %353 %356 = fmul float %352, %352 %357 = fadd float %355, %356 - %358 = call float @llvm.AMDGPU.rsq(float %357) + %358 = call float @llvm.AMDGPU.rsq.f32(float %357) %359 = fmul float %350, %358 %360 = fmul float %351, %358 %361 = fmul float %352, %358 @@ -512,7 +512,7 @@ IF67: ; preds = %LOOP65 %483 = fadd float %482, %481 %484 = fmul float %109, %109 %485 = fadd float %483, %484 - %486 = call float @llvm.AMDGPU.rsq(float %485) + %486 = call float @llvm.AMDGPU.rsq.f32(float %485) %487 = fmul float %107, %486 %488 = fmul float %108, %486 %489 = fmul float %109, %486 @@ -541,7 +541,7 @@ IF67: ; preds = %LOOP65 %512 = fadd float %511, %510 %513 = fmul float %97, %97 %514 = fadd float %512, %513 - %515 = call float @llvm.AMDGPU.rsq(float %514) + %515 = call float @llvm.AMDGPU.rsq.f32(float %514) %516 = fmul float %95, %515 %517 = fmul float %96, %515 %518 = fmul float %97, %515 @@ -658,7 +658,7 @@ declare i32 @llvm.SI.tid() #2 declare float @ceil(float) #3 ; Function Attrs: readnone -declare float @llvm.AMDGPU.rsq(float) #2 +declare float @llvm.AMDGPU.rsq.f32(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #1 @@ -887,7 +887,7 @@ main_body: %212 = fadd float %211, %210 %213 = fmul float %209, %209 %214 = fadd float %212, %213 - %215 = call float @llvm.AMDGPU.rsq(float %214) + %215 = call float @llvm.AMDGPU.rsq.f32(float %214) %216 = fmul float %205, %215 %217 = fmul float %207, %215 %218 = fmul float %209, %215 @@ -1123,7 +1123,7 @@ IF189: ; preds = %LOOP %434 = fsub float -0.000000e+00, %433 %435 = fadd float 0x3FF00068E0000000, %434 %436 = call float @llvm.AMDIL.clamp.(float %435, float 0.000000e+00, float 1.000000e+00) - %437 = call float @llvm.AMDGPU.rsq(float %436) + %437 = call float @llvm.AMDGPU.rsq.f32(float %436) %438 = fmul float %437, %436 %439 = fsub float -0.000000e+00, %436 %440 = call float @llvm.AMDGPU.cndlt(float %439, float %438, float 0.000000e+00) @@ -1147,7 +1147,7 @@ IF189: ; preds = %LOOP %458 = fadd float %457, %456 %459 = fmul float %455, %455 %460 = fadd float %458, %459 - %461 = call float @llvm.AMDGPU.rsq(float %460) + %461 = call float @llvm.AMDGPU.rsq.f32(float %460) %462 = fmul float %451, %461 %463 = fmul float %453, %461 %464 = fmul float %455, %461 @@ -1257,7 +1257,7 @@ ENDIF197: ; preds = %IF189, %IF198 %559 = fadd float %558, %557 %560 = fmul float %556, %556 %561 = fadd float %559, %560 - %562 = call float @llvm.AMDGPU.rsq(float %561) + %562 = call float @llvm.AMDGPU.rsq.f32(float %561) %563 = fmul float %562, %561 %564 = fsub float -0.000000e+00, %561 %565 = call float @llvm.AMDGPU.cndlt(float %564, float %563, float 0.000000e+00) diff --git a/test/CodeGen/R600/sign_extend.ll b/test/CodeGen/R600/sign_extend.ll index 1212cee..e3bee50 100644 --- a/test/CodeGen/R600/sign_extend.ll +++ b/test/CodeGen/R600/sign_extend.ll @@ -1,12 +1,61 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s +; SI-LABEL: @s_sext_i1_to_i32: +; SI: V_CNDMASK_B32_e64 +; SI: S_ENDPGM +define void @s_sext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %cmp = icmp eq i32 %a, %b + %sext = sext i1 %cmp to i32 + store i32 %sext, i32 addrspace(1)* %out, align 4 + ret void +} -; CHECK: V_ASHR -define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { +; SI-LABEL: @test: +; SI: V_ASHR +; SI: S_ENDPG +define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind { entry: - %0 = mul i32 %a, %b - %1 = add i32 %0, %c - %2 = sext i32 %1 to i64 - store i64 %2, i64 addrspace(1)* %out + %mul = mul i32 %a, %b + %add = add i32 %mul, %c + %sext = sext i32 %add to i64 + store i64 %sext, i64 addrspace(1)* %out, align 8 + ret void +} + +; SI-LABEL: @s_sext_i1_to_i64: +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: S_ENDPGM +define void @s_sext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %cmp = icmp eq i32 %a, %b + %sext = sext i1 %cmp to i64 + store i64 %sext, i64 addrspace(1)* %out, align 8 + ret void +} + +; SI-LABEL: @s_sext_i32_to_i64: +; SI: S_ASHR_I32 +; SI: S_ENDPGM +define void @s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a) nounwind { + %sext = sext i32 %a to i64 + store i64 %sext, i64 addrspace(1)* %out, align 8 + ret void +} + +; SI-LABEL: @v_sext_i32_to_i64: +; SI: V_ASHR +; SI: S_ENDPGM +define void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %val = load i32 addrspace(1)* %in, align 4 + %sext = sext i32 %val to i64 + store i64 %sext, i64 addrspace(1)* %out, align 8 + ret void +} + +; SI-LABEL: @s_sext_i16_to_i64: +; SI: S_ENDPGM +define void @s_sext_i16_to_i64(i64 addrspace(1)* %out, i16 %a) nounwind { + %sext = sext i16 %a to i64 + store i64 %sext, i64 addrspace(1)* %out, align 8 ret void } diff --git a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll index d9f60ea..dee4326 100644 --- a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll +++ b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll @@ -1,5 +1,7 @@ ; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; XFAIL: * + ; 64-bit select was originally lowered with a build_pair, and this ; could be simplified to 1 cndmask instead of 2, but that broken when ; it started being implemented with a v2i32 build_vector and @@ -12,9 +14,10 @@ define void @trunc_select_i64(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) { ret void } +; FIXME: Fix truncating store for local memory ; SI-LABEL: @trunc_load_alloca_i64: -; SI: V_MOVRELS_B32 -; SI-NOT: V_MOVRELS_B32 +; SI: DS_READ_B32 +; SI-NOT: DS_READ_B64 ; SI: S_ENDPGM define void @trunc_load_alloca_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) { %idx = add i32 %a, %b diff --git a/test/CodeGen/R600/sint_to_fp.ll b/test/CodeGen/R600/sint_to_fp.ll index 9241799..b27dfda 100644 --- a/test/CodeGen/R600/sint_to_fp.ll +++ b/test/CodeGen/R600/sint_to_fp.ll @@ -29,3 +29,25 @@ define void @sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspac store <4 x float> %result, <4 x float> addrspace(1)* %out ret void } + +; FUNC-LABEL: @sint_to_fp_i1_f32: +; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], +; SI-NEXT: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, -1.000000e+00, [[CMP]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) { + %cmp = icmp eq i32 %in, 0 + %fp = uitofp i1 %cmp to float + store float %fp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sint_to_fp_i1_f32_load: +; SI: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, -1.000000e+00 +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) { + %fp = sitofp i1 %in to float + store float %fp, float addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/sint_to_fp64.ll b/test/CodeGen/R600/sint_to_fp64.ll index 5abc9d1..12b8cf5 100644 --- a/test/CodeGen/R600/sint_to_fp64.ll +++ b/test/CodeGen/R600/sint_to_fp64.ll @@ -1,9 +1,35 @@ -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; CHECK: @sint_to_fp64 -; CHECK: V_CVT_F64_I32_e32 +; SI: @sint_to_fp64 +; SI: V_CVT_F64_I32_e32 define void @sint_to_fp64(double addrspace(1)* %out, i32 %in) { %result = sitofp i32 %in to double store double %result, double addrspace(1)* %out ret void } + +; SI-LABEL: @sint_to_fp_i1_f64: +; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], +; FIXME: We should the VGPR sources for V_CNDMASK are copied from SGPRs, +; we should be able to fold the SGPRs into the V_CNDMASK instructions. +; SI: V_CNDMASK_B32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]] +; SI: V_CNDMASK_B32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]] +; SI: BUFFER_STORE_DWORDX2 +; SI: S_ENDPGM +define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) { + %cmp = icmp eq i32 %in, 0 + %fp = sitofp i1 %cmp to double + store double %fp, double addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @sint_to_fp_i1_f64_load: +; SI: V_CNDMASK_B32_e64 [[IRESULT:v[0-9]]], 0, -1 +; SI-NEXT: V_CVT_F64_I32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]] +; SI: BUFFER_STORE_DWORDX2 [[RESULT]] +; SI: S_ENDPGM +define void @sint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) { + %fp = sitofp i1 %in to double + store double %fp, double addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll index fe9df10..9eb3dc5 100644 --- a/test/CodeGen/R600/sra.ll +++ b/test/CodeGen/R600/sra.ll @@ -52,3 +52,133 @@ entry: ret void } +;EG-CHECK-LABEL: @ashr_i64_2 +;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] +;EG-CHECK: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} +;EG-CHECK: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 +;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +;EG-CHECK-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]] +;EG-CHECK-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} +;EG-CHECK-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} +;EG-CHECK-DAG: ASHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal +;EG-CHECK-DAG: ASHR {{\*? *}}[[HIBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal +;EG-CHECK-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} +;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} + +;SI-CHECK-LABEL: @ashr_i64_2 +;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { +entry: + %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1 + %a = load i64 addrspace(1) * %in + %b = load i64 addrspace(1) * %b_ptr + %result = ashr i64 %a, %b + store i64 %result, i64 addrspace(1)* %out + ret void +} + +;EG-CHECK-LABEL: @ashr_v2i64 +;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] +;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] +;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]] +;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]] +;EG-CHECK-DAG: LSHL {{.*}}, 1 +;EG-CHECK-DAG: LSHL {{.*}}, 1 +;EG-CHECK-DAG: ASHR {{.*}}, [[SHA]] +;EG-CHECK-DAG: ASHR {{.*}}, [[SHB]] +;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]] +;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]] +;EG-CHECK-DAG: OR_INT +;EG-CHECK-DAG: OR_INT +;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-CHECK-DAG: ASHR +;EG-CHECK-DAG: ASHR +;EG-CHECK-DAG: ASHR {{.*}}, literal +;EG-CHECK-DAG: ASHR {{.*}}, literal +;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal +;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal +;EG-CHECK-DAG: CNDE_INT +;EG-CHECK-DAG: CNDE_INT +;EG-CHECK-DAG: CNDE_INT +;EG-CHECK-DAG: CNDE_INT + +;SI-CHECK-LABEL: @ashr_v2i64 +;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + +define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1 + %a = load <2 x i64> addrspace(1) * %in + %b = load <2 x i64> addrspace(1) * %b_ptr + %result = ashr <2 x i64> %a, %b + store <2 x i64> %result, <2 x i64> addrspace(1)* %out + ret void +} + +;EG-CHECK-LABEL: @ashr_v4i64 +;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] +;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] +;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]] +;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]] +;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]] +;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]] +;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHC]] +;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHD]] +;EG-CHECK-DAG: LSHL {{.*}}, 1 +;EG-CHECK-DAG: LSHL {{.*}}, 1 +;EG-CHECK-DAG: LSHL {{.*}}, 1 +;EG-CHECK-DAG: LSHL {{.*}}, 1 +;EG-CHECK-DAG: ASHR {{.*}}, [[SHA]] +;EG-CHECK-DAG: ASHR {{.*}}, [[SHB]] +;EG-CHECK-DAG: ASHR {{.*}}, [[SHC]] +;EG-CHECK-DAG: ASHR {{.*}}, [[SHD]] +;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]] +;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]] +;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]] +;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]] +;EG-CHECK-DAG: OR_INT +;EG-CHECK-DAG: OR_INT +;EG-CHECK-DAG: OR_INT +;EG-CHECK-DAG: OR_INT +;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-CHECK-DAG: ASHR +;EG-CHECK-DAG: ASHR +;EG-CHECK-DAG: ASHR +;EG-CHECK-DAG: ASHR +;EG-CHECK-DAG: ASHR {{.*}}, literal +;EG-CHECK-DAG: ASHR {{.*}}, literal +;EG-CHECK-DAG: ASHR {{.*}}, literal +;EG-CHECK-DAG: ASHR {{.*}}, literal +;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal +;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal +;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal +;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal +;EG-CHECK-DAG: CNDE_INT +;EG-CHECK-DAG: CNDE_INT +;EG-CHECK-DAG: CNDE_INT +;EG-CHECK-DAG: CNDE_INT +;EG-CHECK-DAG: CNDE_INT +;EG-CHECK-DAG: CNDE_INT +;EG-CHECK-DAG: CNDE_INT +;EG-CHECK-DAG: CNDE_INT + +;SI-CHECK-LABEL: @ashr_v4i64 +;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + +define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1 + %a = load <4 x i64> addrspace(1) * %in + %b = load <4 x i64> addrspace(1) * %b_ptr + %result = ashr <4 x i64> %a, %b + store <4 x i64> %result, <4 x i64> addrspace(1)* %out + ret void +} + diff --git a/test/CodeGen/R600/srem.ll b/test/CodeGen/R600/srem.ll new file mode 100644 index 0000000..65e3395 --- /dev/null +++ b/test/CodeGen/R600/srem.ll @@ -0,0 +1,50 @@ +; RUN: llc -march=r600 -mcpu=SI < %s +; RUN: llc -march=r600 -mcpu=redwood < %s + +define void @srem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1 + %num = load i32 addrspace(1) * %in + %den = load i32 addrspace(1) * %den_ptr + %result = srem i32 %num, %den + store i32 %result, i32 addrspace(1)* %out + ret void +} + +define void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %num = load i32 addrspace(1) * %in + %result = srem i32 %num, 4 + store i32 %result, i32 addrspace(1)* %out + ret void +} + +define void @srem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %den_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 + %num = load <2 x i32> addrspace(1) * %in + %den = load <2 x i32> addrspace(1) * %den_ptr + %result = srem <2 x i32> %num, %den + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +define void @srem_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %num = load <2 x i32> addrspace(1) * %in + %result = srem <2 x i32> %num, <i32 4, i32 4> + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +define void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %den_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %num = load <4 x i32> addrspace(1) * %in + %den = load <4 x i32> addrspace(1) * %den_ptr + %result = srem <4 x i32> %num, %den + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +define void @srem_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %num = load <4 x i32> addrspace(1) * %in + %result = srem <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4> + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/srl.ll b/test/CodeGen/R600/srl.ll index 7637355..44ad73f 100644 --- a/test/CodeGen/R600/srl.ll +++ b/test/CodeGen/R600/srl.ll @@ -39,3 +39,129 @@ define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i store <4 x i32> %result, <4 x i32> addrspace(1)* %out ret void } + +;EG-CHECK: @lshr_i64 +;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] +;EG-CHECK: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} +;EG-CHECK: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 +;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +;EG-CHECK-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]] +;EG-CHECK-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} +;EG-CHECK-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} +;EG-CHECK-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} +;EG-CHECK-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} +;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0 + +;SI-CHECK: @lshr_i64 +;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + +define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { + %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1 + %a = load i64 addrspace(1) * %in + %b = load i64 addrspace(1) * %b_ptr + %result = lshr i64 %a, %b + store i64 %result, i64 addrspace(1)* %out + ret void +} + +;EG-CHECK: @lshr_v2i64 +;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] +;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] +;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]] +;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]] +;EG-CHECK-DAG: LSHL {{.*}}, 1 +;EG-CHECK-DAG: LSHL {{.*}}, 1 +;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]] +;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]] +;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]] +;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]] +;EG-CHECK-DAG: OR_INT +;EG-CHECK-DAG: OR_INT +;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-CHECK-DAG: LSHR +;EG-CHECK-DAG: LSHR +;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal +;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal +;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 +;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 +;EG-CHECK-DAG: CNDE_INT +;EG-CHECK-DAG: CNDE_INT + +;SI-CHECK: @lshr_v2i64 +;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + +define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1 + %a = load <2 x i64> addrspace(1) * %in + %b = load <2 x i64> addrspace(1) * %b_ptr + %result = lshr <2 x i64> %a, %b + store <2 x i64> %result, <2 x i64> addrspace(1)* %out + ret void +} + + +;EG-CHECK: @lshr_v4i64 +;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] +;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] +;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]] +;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]] +;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]] +;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]] +;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHC]] +;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHD]] +;EG-CHECK-DAG: LSHL {{.*}}, 1 +;EG-CHECK-DAG: LSHL {{.*}}, 1 +;EG-CHECK-DAG: LSHL {{.*}}, 1 +;EG-CHECK-DAG: LSHL {{.*}}, 1 +;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]] +;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]] +;EG-CHECK-DAG: LSHR {{.*}}, [[SHC]] +;EG-CHECK-DAG: LSHR {{.*}}, [[SHD]] +;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]] +;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]] +;EG-CHECK-DAG: LSHR {{.*}}, [[SHC]] +;EG-CHECK-DAG: LSHR {{.*}}, [[SHD]] +;EG-CHECK-DAG: OR_INT +;EG-CHECK-DAG: OR_INT +;EG-CHECK-DAG: OR_INT +;EG-CHECK-DAG: OR_INT +;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-CHECK-DAG: ADD_INT {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal +;EG-CHECK-DAG: LSHR +;EG-CHECK-DAG: LSHR +;EG-CHECK-DAG: LSHR +;EG-CHECK-DAG: LSHR +;EG-CHECK-DAG: LSHR +;EG-CHECK-DAG: LSHR +;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal +;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal +;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal +;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal +;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 +;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 +;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 +;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0 +;EG-CHECK-DAG: CNDE_INT +;EG-CHECK-DAG: CNDE_INT +;EG-CHECK-DAG: CNDE_INT +;EG-CHECK-DAG: CNDE_INT + +;SI-CHECK: @lshr_v4i64 +;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + +define void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1 + %a = load <4 x i64> addrspace(1) * %in + %b = load <4 x i64> addrspace(1) * %b_ptr + %result = lshr <4 x i64> %a, %b + store <4 x i64> %result, <4 x i64> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/ssubo.ll b/test/CodeGen/R600/ssubo.ll new file mode 100644 index 0000000..b330276 --- /dev/null +++ b/test/CodeGen/R600/ssubo.ll @@ -0,0 +1,64 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s + +declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone +declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone + +; FUNC-LABEL: @ssubo_i64_zext +define void @ssubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind + %val = extractvalue { i64, i1 } %ssub, 0 + %carry = extractvalue { i64, i1 } %ssub, 1 + %ext = zext i1 %carry to i64 + %add2 = add i64 %val, %ext + store i64 %add2, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @s_ssubo_i32 +define void @s_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind { + %ssub = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind + %val = extractvalue { i32, i1 } %ssub, 0 + %carry = extractvalue { i32, i1 } %ssub, 1 + store i32 %val, i32 addrspace(1)* %out, align 4 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} + +; FUNC-LABEL: @v_ssubo_i32 +define void @v_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { + %a = load i32 addrspace(1)* %aptr, align 4 + %b = load i32 addrspace(1)* %bptr, align 4 + %ssub = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind + %val = extractvalue { i32, i1 } %ssub, 0 + %carry = extractvalue { i32, i1 } %ssub, 1 + store i32 %val, i32 addrspace(1)* %out, align 4 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} + +; FUNC-LABEL: @s_ssubo_i64 +; SI: S_SUB_I32 +; SI: S_SUBB_U32 +define void @s_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind { + %ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind + %val = extractvalue { i64, i1 } %ssub, 0 + %carry = extractvalue { i64, i1 } %ssub, 1 + store i64 %val, i64 addrspace(1)* %out, align 8 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} + +; FUNC-LABEL: @v_ssubo_i64 +; SI: V_SUB_I32_e32 +; SI: V_SUBB_U32_e32 +define void @v_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { + %a = load i64 addrspace(1)* %aptr, align 4 + %b = load i64 addrspace(1)* %bptr, align 4 + %ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind + %val = extractvalue { i64, i1 } %ssub, 0 + %carry = extractvalue { i64, i1 } %ssub, 1 + store i64 %val, i64 addrspace(1)* %out, align 8 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll index c0c8ccc..dd27533 100644 --- a/test/CodeGen/R600/store.ll +++ b/test/CodeGen/R600/store.ll @@ -263,8 +263,7 @@ entry: ; CM-CHECK: LDS_WRITE ; CM-CHECK: LDS_WRITE ; SI-CHECK-LABEL: @store_local_v2i32 -; SI-CHECK: DS_WRITE_B32 -; SI-CHECK: DS_WRITE_B32 +; SI-CHECK: DS_WRITE_B64 define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) { entry: store <2 x i32> %in, <2 x i32> addrspace(3)* %out diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll index e321ed6..8e64148 100644 --- a/test/CodeGen/R600/sub.ll +++ b/test/CodeGen/R600/sub.ll @@ -1,5 +1,7 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +;RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare i32 @llvm.r600.read.tidig.x() readnone ;FUNC-LABEL: @test2 ;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} @@ -37,23 +39,37 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { ret void } -;FUNC_LABEL: @test5 +; FUNC-LABEL: @s_sub_i64: +; SI: S_SUB_I32 +; SI: S_SUBB_U32 -;EG-DAG: SETGE_UINT -;EG-DAG: CNDE_INT -;EG-DAG: SUB_INT -;EG-DAG: SUB_INT -;EG-DAG: SUB_INT +; EG-DAG: SETGE_UINT +; EG-DAG: CNDE_INT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +define void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind { + %result = sub i64 %a, %b + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} -;SI: S_XOR_B64 -;SI-DAG: S_ADD_I32 -;SI-DAG: S_ADDC_U32 -;SI-DAG: S_ADD_I32 -;SI-DAG: S_ADDC_U32 +; FUNC-LABEL: @v_sub_i64: +; SI: V_SUB_I32_e32 +; SI: V_SUBB_U32_e32 -define void @test5(i64 addrspace(1)* %out, i64 %a, i64 %b) { -entry: - %0 = sub i64 %a, %b - store i64 %0, i64 addrspace(1)* %out +; EG-DAG: SETGE_UINT +; EG-DAG: CNDE_INT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +define void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() readnone + %a_ptr = getelementptr i64 addrspace(1)* %inA, i32 %tid + %b_ptr = getelementptr i64 addrspace(1)* %inB, i32 %tid + %a = load i64 addrspace(1)* %a_ptr + %b = load i64 addrspace(1)* %b_ptr + %result = sub i64 %a, %b + store i64 %result, i64 addrspace(1)* %out, align 8 ret void } diff --git a/test/CodeGen/R600/uaddo.ll b/test/CodeGen/R600/uaddo.ll index 3b69687..a80e502 100644 --- a/test/CodeGen/R600/uaddo.ll +++ b/test/CodeGen/R600/uaddo.ll @@ -1,8 +1,10 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone -; SI-LABEL: @uaddo_i64_zext +; FUNC-LABEL: @uaddo_i64_zext ; SI: ADD ; SI: ADDC ; SI: ADDC @@ -15,3 +17,53 @@ define void @uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { store i64 %add2, i64 addrspace(1)* %out, align 8 ret void } + +; FUNC-LABEL: @s_uaddo_i32 +; SI: S_ADD_I32 +define void @s_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind { + %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) nounwind + %val = extractvalue { i32, i1 } %uadd, 0 + %carry = extractvalue { i32, i1 } %uadd, 1 + store i32 %val, i32 addrspace(1)* %out, align 4 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} + +; FUNC-LABEL: @v_uaddo_i32 +; SI: V_ADD_I32 +define void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { + %a = load i32 addrspace(1)* %aptr, align 4 + %b = load i32 addrspace(1)* %bptr, align 4 + %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) nounwind + %val = extractvalue { i32, i1 } %uadd, 0 + %carry = extractvalue { i32, i1 } %uadd, 1 + store i32 %val, i32 addrspace(1)* %out, align 4 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} + +; FUNC-LABEL: @s_uaddo_i64 +; SI: S_ADD_I32 +; SI: S_ADDC_U32 +define void @s_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind { + %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind + %val = extractvalue { i64, i1 } %uadd, 0 + %carry = extractvalue { i64, i1 } %uadd, 1 + store i64 %val, i64 addrspace(1)* %out, align 8 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} + +; FUNC-LABEL: @v_uaddo_i64 +; SI: V_ADD_I32 +; SI: V_ADDC_U32 +define void @v_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { + %a = load i64 addrspace(1)* %aptr, align 4 + %b = load i64 addrspace(1)* %bptr, align 4 + %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind + %val = extractvalue { i64, i1 } %uadd, 0 + %carry = extractvalue { i64, i1 } %uadd, 1 + store i64 %val, i64 addrspace(1)* %out, align 8 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} diff --git a/test/CodeGen/R600/udivrem.ll b/test/CodeGen/R600/udivrem.ll new file mode 100644 index 0000000..5f5753a --- /dev/null +++ b/test/CodeGen/R600/udivrem.ll @@ -0,0 +1,358 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s + +; FUNC-LABEL: @test_udivrem +; EG: RECIP_UINT +; EG-DAG: MULHI +; EG-DAG: MULLO_INT +; EG-DAG: SUB_INT +; EG: CNDE_INT +; EG: MULHI +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG: CNDE_INT +; EG: MULHI +; EG: MULLO_INT +; EG: SUB_INT +; EG-DAG: SETGE_UINT +; EG-DAG: SETGE_UINT +; EG: AND_INT +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT + +; SI: V_RCP_IFLAG_F32_e32 [[RCP:v[0-9]+]] +; SI-DAG: V_MUL_HI_U32 [[RCP_HI:v[0-9]+]], [[RCP]] +; SI-DAG: V_MUL_LO_I32 [[RCP_LO:v[0-9]+]], [[RCP]] +; SI-DAG: V_SUB_I32_e32 [[NEG_RCP_LO:v[0-9]+]], 0, [[RCP_LO]] +; SI: V_CNDMASK_B32_e64 +; SI: V_MUL_HI_U32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]] +; SI-DAG: V_ADD_I32_e32 [[RCP_A_E:v[0-9]+]], [[E]], [[RCP]] +; SI-DAG: V_SUBREV_I32_e32 [[RCP_S_E:v[0-9]+]], [[E]], [[RCP]] +; SI: V_CNDMASK_B32_e64 +; SI: V_MUL_HI_U32 [[Quotient:v[0-9]+]] +; SI: V_MUL_LO_I32 [[Num_S_Remainder:v[0-9]+]] +; SI-DAG: V_SUB_I32_e32 [[Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[Num_S_Remainder]] +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI: V_AND_B32_e32 [[Tmp1:v[0-9]+]] +; SI-DAG: V_ADD_I32_e32 [[Quotient_A_One:v[0-9]+]], 1, [[Quotient]] +; SI-DAG: V_SUBREV_I32_e32 [[Quotient_S_One:v[0-9]+]], +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_ADD_I32_e32 [[Remainder_A_Den:v[0-9]+]], +; SI-DAG: V_SUBREV_I32_e32 [[Remainder_S_Den:v[0-9]+]], +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI: S_ENDPGM +define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) { + %result0 = udiv i32 %x, %y + store i32 %result0, i32 addrspace(1)* %out + %result1 = urem i32 %x, %y + store i32 %result1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @test_udivrem_v2 +; EG-DAG: RECIP_UINT +; EG-DAG: MULHI +; EG-DAG: MULLO_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: MULHI +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: MULHI +; EG-DAG: MULLO_INT +; EG-DAG: SUB_INT +; EG-DAG: SETGE_UINT +; EG-DAG: SETGE_UINT +; EG-DAG: AND_INT +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT +; EG-DAG: RECIP_UINT +; EG-DAG: MULHI +; EG-DAG: MULLO_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: MULHI +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: MULHI +; EG-DAG: MULLO_INT +; EG-DAG: SUB_INT +; EG-DAG: SETGE_UINT +; EG-DAG: SETGE_UINT +; EG-DAG: AND_INT +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT + +; SI-DAG: V_RCP_IFLAG_F32_e32 [[FIRST_RCP:v[0-9]+]] +; SI-DAG: V_MUL_HI_U32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]] +; SI-DAG: V_MUL_LO_I32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]] +; SI-DAG: V_SUB_I32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]] +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_MUL_HI_U32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]] +; SI-DAG: V_ADD_I32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]] +; SI-DAG: V_SUBREV_I32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]] +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_MUL_HI_U32 [[FIRST_Quotient:v[0-9]+]] +; SI-DAG: V_MUL_LO_I32 [[FIRST_Num_S_Remainder:v[0-9]+]] +; SI-DAG: V_SUB_I32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]] +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_AND_B32_e32 [[FIRST_Tmp1:v[0-9]+]] +; SI-DAG: V_ADD_I32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]] +; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Quotient_S_One:v[0-9]+]], +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_ADD_I32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]], +; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]], +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_RCP_IFLAG_F32_e32 [[SECOND_RCP:v[0-9]+]] +; SI-DAG: V_MUL_HI_U32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]] +; SI-DAG: V_MUL_LO_I32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]] +; SI-DAG: V_SUB_I32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]] +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_MUL_HI_U32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]] +; SI-DAG: V_ADD_I32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]] +; SI-DAG: V_SUBREV_I32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]] +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_MUL_HI_U32 [[SECOND_Quotient:v[0-9]+]] +; SI-DAG: V_MUL_LO_I32 [[SECOND_Num_S_Remainder:v[0-9]+]] +; SI-DAG: V_SUB_I32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]] +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_AND_B32_e32 [[SECOND_Tmp1:v[0-9]+]] +; SI-DAG: V_ADD_I32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]] +; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Quotient_S_One:v[0-9]+]], +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_ADD_I32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]], +; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]], +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI: S_ENDPGM +define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) { + %result0 = udiv <2 x i32> %x, %y + store <2 x i32> %result0, <2 x i32> addrspace(1)* %out + %result1 = urem <2 x i32> %x, %y + store <2 x i32> %result1, <2 x i32> addrspace(1)* %out + ret void +} + + +; FUNC-LABEL: @test_udivrem_v4 +; EG-DAG: RECIP_UINT +; EG-DAG: MULHI +; EG-DAG: MULLO_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: MULHI +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: MULHI +; EG-DAG: MULLO_INT +; EG-DAG: SUB_INT +; EG-DAG: SETGE_UINT +; EG-DAG: SETGE_UINT +; EG-DAG: AND_INT +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT +; EG-DAG: RECIP_UINT +; EG-DAG: MULHI +; EG-DAG: MULLO_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: MULHI +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: MULHI +; EG-DAG: MULLO_INT +; EG-DAG: SUB_INT +; EG-DAG: SETGE_UINT +; EG-DAG: SETGE_UINT +; EG-DAG: AND_INT +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT +; EG-DAG: RECIP_UINT +; EG-DAG: MULHI +; EG-DAG: MULLO_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: MULHI +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: MULHI +; EG-DAG: MULLO_INT +; EG-DAG: SUB_INT +; EG-DAG: SETGE_UINT +; EG-DAG: SETGE_UINT +; EG-DAG: AND_INT +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT +; EG-DAG: RECIP_UINT +; EG-DAG: MULHI +; EG-DAG: MULLO_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: MULHI +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: MULHI +; EG-DAG: MULLO_INT +; EG-DAG: SUB_INT +; EG-DAG: SETGE_UINT +; EG-DAG: SETGE_UINT +; EG-DAG: AND_INT +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT +; EG-DAG: ADD_INT +; EG-DAG: SUB_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT + +; SI-DAG: V_RCP_IFLAG_F32_e32 [[FIRST_RCP:v[0-9]+]] +; SI-DAG: V_MUL_HI_U32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]] +; SI-DAG: V_MUL_LO_I32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]] +; SI-DAG: V_SUB_I32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]] +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_MUL_HI_U32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]] +; SI-DAG: V_ADD_I32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]] +; SI-DAG: V_SUBREV_I32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]] +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_MUL_HI_U32 [[FIRST_Quotient:v[0-9]+]] +; SI-DAG: V_MUL_LO_I32 [[FIRST_Num_S_Remainder:v[0-9]+]] +; SI-DAG: V_SUB_I32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]] +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_AND_B32_e32 [[FIRST_Tmp1:v[0-9]+]] +; SI-DAG: V_ADD_I32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]] +; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Quotient_S_One:v[0-9]+]], +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_ADD_I32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]], +; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]], +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_RCP_IFLAG_F32_e32 [[SECOND_RCP:v[0-9]+]] +; SI-DAG: V_MUL_HI_U32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]] +; SI-DAG: V_MUL_LO_I32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]] +; SI-DAG: V_SUB_I32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]] +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_MUL_HI_U32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]] +; SI-DAG: V_ADD_I32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]] +; SI-DAG: V_SUBREV_I32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]] +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_MUL_HI_U32 [[SECOND_Quotient:v[0-9]+]] +; SI-DAG: V_MUL_LO_I32 [[SECOND_Num_S_Remainder:v[0-9]+]] +; SI-DAG: V_SUB_I32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]] +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_AND_B32_e32 [[SECOND_Tmp1:v[0-9]+]] +; SI-DAG: V_ADD_I32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]] +; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Quotient_S_One:v[0-9]+]], +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_ADD_I32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]], +; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]], +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_RCP_IFLAG_F32_e32 [[THIRD_RCP:v[0-9]+]] +; SI-DAG: V_MUL_HI_U32 [[THIRD_RCP_HI:v[0-9]+]], [[THIRD_RCP]] +; SI-DAG: V_MUL_LO_I32 [[THIRD_RCP_LO:v[0-9]+]], [[THIRD_RCP]] +; SI-DAG: V_SUB_I32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], 0, [[THIRD_RCP_LO]] +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_MUL_HI_U32 [[THIRD_E:v[0-9]+]], {{v[0-9]+}}, [[THIRD_RCP]] +; SI-DAG: V_ADD_I32_e32 [[THIRD_RCP_A_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]] +; SI-DAG: V_SUBREV_I32_e32 [[THIRD_RCP_S_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]] +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_MUL_HI_U32 [[THIRD_Quotient:v[0-9]+]] +; SI-DAG: V_MUL_LO_I32 [[THIRD_Num_S_Remainder:v[0-9]+]] +; SI-DAG: V_SUB_I32_e32 [[THIRD_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[THIRD_Num_S_Remainder]] +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_AND_B32_e32 [[THIRD_Tmp1:v[0-9]+]] +; SI-DAG: V_ADD_I32_e32 [[THIRD_Quotient_A_One:v[0-9]+]], {{.*}}, [[THIRD_Quotient]] +; SI-DAG: V_SUBREV_I32_e32 [[THIRD_Quotient_S_One:v[0-9]+]], +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_ADD_I32_e32 [[THIRD_Remainder_A_Den:v[0-9]+]], +; SI-DAG: V_SUBREV_I32_e32 [[THIRD_Remainder_S_Den:v[0-9]+]], +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_RCP_IFLAG_F32_e32 [[FOURTH_RCP:v[0-9]+]] +; SI-DAG: V_MUL_HI_U32 [[FOURTH_RCP_HI:v[0-9]+]], [[FOURTH_RCP]] +; SI-DAG: V_MUL_LO_I32 [[FOURTH_RCP_LO:v[0-9]+]], [[FOURTH_RCP]] +; SI-DAG: V_SUB_I32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], 0, [[FOURTH_RCP_LO]] +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_MUL_HI_U32 [[FOURTH_E:v[0-9]+]], {{v[0-9]+}}, [[FOURTH_RCP]] +; SI-DAG: V_ADD_I32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]] +; SI-DAG: V_SUBREV_I32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]] +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_MUL_HI_U32 [[FOURTH_Quotient:v[0-9]+]] +; SI-DAG: V_MUL_LO_I32 [[FOURTH_Num_S_Remainder:v[0-9]+]] +; SI-DAG: V_SUB_I32_e32 [[FOURTH_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FOURTH_Num_S_Remainder]] +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_AND_B32_e32 [[FOURTH_Tmp1:v[0-9]+]] +; SI-DAG: V_ADD_I32_e32 [[FOURTH_Quotient_A_One:v[0-9]+]], {{.*}}, [[FOURTH_Quotient]] +; SI-DAG: V_SUBREV_I32_e32 [[FOURTH_Quotient_S_One:v[0-9]+]], +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_ADD_I32_e32 [[FOURTH_Remainder_A_Den:v[0-9]+]], +; SI-DAG: V_SUBREV_I32_e32 [[FOURTH_Remainder_S_Den:v[0-9]+]], +; SI-DAG: V_CNDMASK_B32_e64 +; SI-DAG: V_CNDMASK_B32_e64 +; SI: S_ENDPGM +define void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { + %result0 = udiv <4 x i32> %x, %y + store <4 x i32> %result0, <4 x i32> addrspace(1)* %out + %result1 = urem <4 x i32> %x, %y + store <4 x i32> %result1, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/uint_to_fp.f64.ll b/test/CodeGen/R600/uint_to_fp.f64.ll index 75150c2..9a41796 100644 --- a/test/CodeGen/R600/uint_to_fp.f64.ll +++ b/test/CodeGen/R600/uint_to_fp.f64.ll @@ -2,8 +2,35 @@ ; SI-LABEL: @uint_to_fp_f64_i32 ; SI: V_CVT_F64_U32_e32 +; SI: S_ENDPGM define void @uint_to_fp_f64_i32(double addrspace(1)* %out, i32 %in) { %cast = uitofp i32 %in to double store double %cast, double addrspace(1)* %out, align 8 ret void } + +; SI-LABEL: @uint_to_fp_i1_f64: +; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], +; FIXME: We should the VGPR sources for V_CNDMASK are copied from SGPRs, +; we should be able to fold the SGPRs into the V_CNDMASK instructions. +; SI: V_CNDMASK_B32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]] +; SI: V_CNDMASK_B32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]] +; SI: BUFFER_STORE_DWORDX2 +; SI: S_ENDPGM +define void @uint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) { + %cmp = icmp eq i32 %in, 0 + %fp = uitofp i1 %cmp to double + store double %fp, double addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @uint_to_fp_i1_f64_load: +; SI: V_CNDMASK_B32_e64 [[IRESULT:v[0-9]]], 0, 1 +; SI-NEXT: V_CVT_F64_U32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]] +; SI: BUFFER_STORE_DWORDX2 [[RESULT]] +; SI: S_ENDPGM +define void @uint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) { + %fp = uitofp i1 %in to double + store double %fp, double addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/uint_to_fp.ll b/test/CodeGen/R600/uint_to_fp.ll index a5ac355..8f5d42d 100644 --- a/test/CodeGen/R600/uint_to_fp.ll +++ b/test/CodeGen/R600/uint_to_fp.ll @@ -1,28 +1,30 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s -; R600-CHECK-LABEL: @uint_to_fp_v2i32 -; R600-CHECK-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W -; R600-CHECK-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X -; SI-CHECK-LABEL: @uint_to_fp_v2i32 -; SI-CHECK: V_CVT_F32_U32_e32 -; SI-CHECK: V_CVT_F32_U32_e32 +; FUNC-LABEL: @uint_to_fp_v2i32 +; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W +; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X + +; SI: V_CVT_F32_U32_e32 +; SI: V_CVT_F32_U32_e32 +; SI: S_ENDPGM define void @uint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) { %result = uitofp <2 x i32> %in to <2 x float> store <2 x float> %result, <2 x float> addrspace(1)* %out ret void } -; R600-CHECK-LABEL: @uint_to_fp_v4i32 -; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; SI-CHECK-LABEL: @uint_to_fp_v4i32 -; SI-CHECK: V_CVT_F32_U32_e32 -; SI-CHECK: V_CVT_F32_U32_e32 -; SI-CHECK: V_CVT_F32_U32_e32 -; SI-CHECK: V_CVT_F32_U32_e32 +; FUNC-LABEL: @uint_to_fp_v4i32 +; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +; SI: V_CVT_F32_U32_e32 +; SI: V_CVT_F32_U32_e32 +; SI: V_CVT_F32_U32_e32 +; SI: V_CVT_F32_U32_e32 +; SI: S_ENDPGM define void @uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %value = load <4 x i32> addrspace(1) * %in %result = uitofp <4 x i32> %value to <4 x float> @@ -30,17 +32,39 @@ define void @uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspac ret void } -; R600-CHECK-LABEL: @uint_to_fp_i64_f32 -; R600-CHECK: UINT_TO_FLT -; R600-CHECK: UINT_TO_FLT -; R600-CHECK: MULADD_IEEE -; SI-CHECK-LABEL: @uint_to_fp_i64_f32 -; SI-CHECK: V_CVT_F32_U32_e32 -; SI-CHECK: V_CVT_F32_U32_e32 -; SI-CHECK: V_MAD_F32 +; FUNC-LABEL: @uint_to_fp_i64_f32 +; R600: UINT_TO_FLT +; R600: UINT_TO_FLT +; R600: MULADD_IEEE +; SI: V_CVT_F32_U32_e32 +; SI: V_CVT_F32_U32_e32 +; SI: V_MAD_F32 +; SI: S_ENDPGM define void @uint_to_fp_i64_f32(float addrspace(1)* %out, i64 %in) { entry: %0 = uitofp i64 %in to float store float %0, float addrspace(1)* %out ret void } + +; FUNC-LABEL: @uint_to_fp_i1_f32: +; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], +; SI-NEXT: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, 1.000000e+00, [[CMP]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @uint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) { + %cmp = icmp eq i32 %in, 0 + %fp = uitofp i1 %cmp to float + store float %fp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @uint_to_fp_i1_f32_load: +; SI: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, 1.000000e+00 +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) { + %fp = uitofp i1 %in to float + store float %fp, float addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/usubo.ll b/test/CodeGen/R600/usubo.ll new file mode 100644 index 0000000..d57a2c7 --- /dev/null +++ b/test/CodeGen/R600/usubo.ll @@ -0,0 +1,66 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s + +declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone +declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone + +; FUNC-LABEL: @usubo_i64_zext +define void @usubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind + %val = extractvalue { i64, i1 } %usub, 0 + %carry = extractvalue { i64, i1 } %usub, 1 + %ext = zext i1 %carry to i64 + %add2 = add i64 %val, %ext + store i64 %add2, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @s_usubo_i32 +; SI: S_SUB_I32 +define void @s_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind { + %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) nounwind + %val = extractvalue { i32, i1 } %usub, 0 + %carry = extractvalue { i32, i1 } %usub, 1 + store i32 %val, i32 addrspace(1)* %out, align 4 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} + +; FUNC-LABEL: @v_usubo_i32 +; SI: V_SUBREV_I32_e32 +define void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { + %a = load i32 addrspace(1)* %aptr, align 4 + %b = load i32 addrspace(1)* %bptr, align 4 + %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) nounwind + %val = extractvalue { i32, i1 } %usub, 0 + %carry = extractvalue { i32, i1 } %usub, 1 + store i32 %val, i32 addrspace(1)* %out, align 4 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} + +; FUNC-LABEL: @s_usubo_i64 +; SI: S_SUB_I32 +; SI: S_SUBB_U32 +define void @s_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind { + %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind + %val = extractvalue { i64, i1 } %usub, 0 + %carry = extractvalue { i64, i1 } %usub, 1 + store i64 %val, i64 addrspace(1)* %out, align 8 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} + +; FUNC-LABEL: @v_usubo_i64 +; SI: V_SUB_I32 +; SI: V_SUBB_U32 +define void @v_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { + %a = load i64 addrspace(1)* %aptr, align 4 + %b = load i64 addrspace(1)* %bptr, align 4 + %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind + %val = extractvalue { i64, i1 } %usub, 0 + %carry = extractvalue { i64, i1 } %usub, 1 + store i64 %val, i64 addrspace(1)* %out, align 8 + store i1 %carry, i1 addrspace(1)* %carryout + ret void +} diff --git a/test/CodeGen/R600/vector-alloca.ll b/test/CodeGen/R600/vector-alloca.ll new file mode 100644 index 0000000..6543f6d --- /dev/null +++ b/test/CodeGen/R600/vector-alloca.ll @@ -0,0 +1,74 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s +; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s + +; FUNC-LABEL: @vector_read +; EG: MOV +; EG: MOV +; EG: MOV +; EG: MOV +; EG: MOVA_INT +define void @vector_read(i32 addrspace(1)* %out, i32 %index) { +entry: + %0 = alloca [4 x i32] + %x = getelementptr [4 x i32]* %0, i32 0, i32 0 + %y = getelementptr [4 x i32]* %0, i32 0, i32 1 + %z = getelementptr [4 x i32]* %0, i32 0, i32 2 + %w = getelementptr [4 x i32]* %0, i32 0, i32 3 + store i32 0, i32* %x + store i32 1, i32* %y + store i32 2, i32* %z + store i32 3, i32* %w + %1 = getelementptr [4 x i32]* %0, i32 0, i32 %index + %2 = load i32* %1 + store i32 %2, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @vector_write +; EG: MOV +; EG: MOV +; EG: MOV +; EG: MOV +; EG: MOVA_INT +; EG: MOVA_INT +define void @vector_write(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) { +entry: + %0 = alloca [4 x i32] + %x = getelementptr [4 x i32]* %0, i32 0, i32 0 + %y = getelementptr [4 x i32]* %0, i32 0, i32 1 + %z = getelementptr [4 x i32]* %0, i32 0, i32 2 + %w = getelementptr [4 x i32]* %0, i32 0, i32 3 + store i32 0, i32* %x + store i32 0, i32* %y + store i32 0, i32* %z + store i32 0, i32* %w + %1 = getelementptr [4 x i32]* %0, i32 0, i32 %w_index + store i32 1, i32* %1 + %2 = getelementptr [4 x i32]* %0, i32 0, i32 %r_index + %3 = load i32* %2 + store i32 %3, i32 addrspace(1)* %out + ret void +} + +; This test should be optimize to: +; store i32 0, i32 addrspace(1)* %out +; FUNC-LABEL: @bitcast_gep +; CHECK: STORE_RAW +define void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) { +entry: + %0 = alloca [4 x i32] + %x = getelementptr [4 x i32]* %0, i32 0, i32 0 + %y = getelementptr [4 x i32]* %0, i32 0, i32 1 + %z = getelementptr [4 x i32]* %0, i32 0, i32 2 + %w = getelementptr [4 x i32]* %0, i32 0, i32 3 + store i32 0, i32* %x + store i32 0, i32* %y + store i32 0, i32* %z + store i32 0, i32* %w + %1 = getelementptr [4 x i32]* %0, i32 0, i32 1 + %2 = bitcast i32* %1 to [4 x i32]* + %3 = getelementptr [4 x i32]* %2, i32 0, i32 0 + %4 = load i32* %3 + store i32 %4, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/R600/xor.ll index 5a5c86d..ab618cf 100644 --- a/test/CodeGen/R600/xor.ll +++ b/test/CodeGen/R600/xor.ll @@ -90,3 +90,69 @@ define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 store i32 %result, i32 addrspace(1)* %out ret void } + +; SI-CHECK-LABEL: @vector_xor_i64 +; SI-CHECK: V_XOR_B32_e32 +; SI-CHECK: V_XOR_B32_e32 +; SI-CHECK: S_ENDPGM +define void @vector_xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) { + %a = load i64 addrspace(1)* %in0 + %b = load i64 addrspace(1)* %in1 + %result = xor i64 %a, %b + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; SI-CHECK-LABEL: @scalar_xor_i64 +; SI-CHECK: S_XOR_B64 +; SI-CHECK: S_ENDPGM +define void @scalar_xor_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { + %result = xor i64 %a, %b + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; SI-CHECK-LABEL: @scalar_not_i64 +; SI-CHECK: S_NOT_B64 +define void @scalar_not_i64(i64 addrspace(1)* %out, i64 %a) { + %result = xor i64 %a, -1 + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; SI-CHECK-LABEL: @vector_not_i64 +; SI-CHECK: V_NOT_B32 +; SI-CHECK: V_NOT_B32 +define void @vector_not_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) { + %a = load i64 addrspace(1)* %in0 + %b = load i64 addrspace(1)* %in1 + %result = xor i64 %a, -1 + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; Test that we have a pattern to match xor inside a branch. +; Note that in the future the backend may be smart enough to +; use an SALU instruction for this. + +; SI-CHECK-LABEL: @xor_cf +; SI-CHECK: V_XOR +; SI-CHECK: V_XOR +define void @xor_cf(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b) { +entry: + %0 = icmp eq i64 %a, 0 + br i1 %0, label %if, label %else + +if: + %1 = xor i64 %a, %b + br label %endif + +else: + %2 = load i64 addrspace(1)* %in + br label %endif + +endif: + %3 = phi i64 [%1, %if], [%2, %else] + store i64 %3, i64 addrspace(1)* %out + ret void +} |