diff options
author | Stephen Hines <srhines@google.com> | 2014-05-29 02:49:00 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-05-29 02:49:00 -0700 |
commit | dce4a407a24b04eebc6a376f8e62b41aaa7b071f (patch) | |
tree | dcebc53f2b182f145a2e659393bf9a0472cedf23 /test/CodeGen/R600 | |
parent | 220b921aed042f9e520c26cffd8282a94c66c3d5 (diff) | |
download | external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.zip external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.gz external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.bz2 |
Update LLVM for 3.5 rebase (r209712).
Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
Diffstat (limited to 'test/CodeGen/R600')
77 files changed, 2488 insertions, 360 deletions
diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll index fffaefe..7dec426 100644 --- a/test/CodeGen/R600/32-bit-local-address-space.ll +++ b/test/CodeGen/R600/32-bit-local-address-space.ll @@ -33,7 +33,7 @@ entry: ; CHECK-LABEL: @local_address_gep_const_offset ; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}} -; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VPTR]], 4, +; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VPTR]], 0x4, define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { entry: %0 = getelementptr i32 addrspace(3)* %in, i32 1 @@ -44,7 +44,7 @@ entry: ; Offset too large, can't fold into 16-bit immediate offset. ; CHECK-LABEL: @local_address_gep_large_const_offset -; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 65540 +; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004 ; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; CHECK: DS_READ_B32 [[VPTR]] define void @local_address_gep_large_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { @@ -119,7 +119,7 @@ define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32 ; CHECK-LABEL: @local_address_gep_const_offset_store ; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}} ; CHECK: V_MOV_B32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}} -; CHECK: DS_WRITE_B32 [[VPTR]], [[VAL]], 4 +; CHECK: DS_WRITE_B32 [[VPTR]], [[VAL]], 0x4 define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %val) { %gep = getelementptr i32 addrspace(3)* %out, i32 1 store i32 %val, i32 addrspace(3)* %gep, align 4 @@ -128,7 +128,7 @@ define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %v ; Offset too large, can't fold into 16-bit immediate offset. ; CHECK-LABEL: @local_address_gep_large_const_offset_store -; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 65540 +; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004 ; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; CHECK: DS_WRITE_B32 [[VPTR]], v{{[0-9]+}}, 0 define void @local_address_gep_large_const_offset_store(i32 addrspace(3)* %out, i32 %val) { diff --git a/test/CodeGen/R600/64bit-kernel-args.ll b/test/CodeGen/R600/64bit-kernel-args.ll index 0d6bfb1..2d82c1e 100644 --- a/test/CodeGen/R600/64bit-kernel-args.ll +++ b/test/CodeGen/R600/64bit-kernel-args.ll @@ -1,8 +1,8 @@ ; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK ; SI-CHECK: @f64_kernel_arg -; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 9 -; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 11 +; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9 +; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb ; SI-CHECK: BUFFER_STORE_DWORDX2 define void @f64_kernel_arg(double addrspace(1)* %out, double %in) { entry: diff --git a/test/CodeGen/R600/add.ll b/test/CodeGen/R600/add.ll index e9db52a..711a2bc 100644 --- a/test/CodeGen/R600/add.ll +++ b/test/CodeGen/R600/add.ll @@ -140,3 +140,28 @@ entry: store i64 %1, i64 addrspace(1)* %out ret void } + +; Test i64 add inside a branch. We don't allow SALU instructions inside of +; branches. +; FIXME: We are being conservative here. We could allow this in some cases. +; FUNC-LABEL: @add64_in_branch +; SI-CHECK-NOT: S_ADD_I32 +; SI-CHECK-NOT: S_ADDC_U32 +define void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) { +entry: + %0 = icmp eq i64 %a, 0 + br i1 %0, label %if, label %else + +if: + %1 = load i64 addrspace(1)* %in + br label %endif + +else: + %2 = add i64 %a, %b + br label %endif + +endif: + %3 = phi i64 [%1, %if], [%2, %else] + store i64 %3, i64 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/R600/add_i64.ll index 7081b07..c9eaeda 100644 --- a/test/CodeGen/R600/add_i64.ll +++ b/test/CodeGen/R600/add_i64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s declare i32 @llvm.r600.read.tidig.x() readnone diff --git a/test/CodeGen/R600/address-space.ll b/test/CodeGen/R600/address-space.ll index 15d2ed2..f75a8ac 100644 --- a/test/CodeGen/R600/address-space.ll +++ b/test/CodeGen/R600/address-space.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s ; Test that codegenprepare understands address space sizes @@ -10,8 +10,8 @@ ; CHECK-LABEL: @do_as_ptr_calcs: ; CHECK: S_LOAD_DWORD [[SREG1:s[0-9]+]], ; CHECK: V_MOV_B32_e32 [[VREG1:v[0-9]+]], [[SREG1]] -; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VREG1]], 20 -; CHECK: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 12 +; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VREG1]], 0x14 +; CHECK: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0xc define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind { entry: %x = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0 diff --git a/test/CodeGen/R600/array-ptr-calc-i32.ll b/test/CodeGen/R600/array-ptr-calc-i32.ll index cb2a1c8..c2362da 100644 --- a/test/CodeGen/R600/array-ptr-calc-i32.ll +++ b/test/CodeGen/R600/array-ptr-calc-i32.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.SI.tid() nounwind readnone declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate diff --git a/test/CodeGen/R600/array-ptr-calc-i64.ll b/test/CodeGen/R600/array-ptr-calc-i64.ll index 652bbfe..e254c5f 100644 --- a/test/CodeGen/R600/array-ptr-calc-i64.ll +++ b/test/CodeGen/R600/array-ptr-calc-i64.ll @@ -1,5 +1,5 @@ ; XFAIL: * -; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s +; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI %s declare i32 @llvm.SI.tid() readnone diff --git a/test/CodeGen/R600/call.ll b/test/CodeGen/R600/call.ll new file mode 100644 index 0000000..d803474 --- /dev/null +++ b/test/CodeGen/R600/call.ll @@ -0,0 +1,33 @@ +; RUN: not llc -march=r600 -mcpu=SI -verify-machineinstrs< %s 2>&1 | FileCheck %s +; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s + +; CHECK: error: unsupported call to function defined_function in test_call + + +declare i32 @external_function(i32) nounwind + +define i32 @defined_function(i32 %x) nounwind noinline { + %y = add i32 %x, 8 + ret i32 %y +} + +define void @test_call(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1 + %a = load i32 addrspace(1)* %in + %b = load i32 addrspace(1)* %b_ptr + %c = call i32 @defined_function(i32 %b) nounwind + %result = add i32 %a, %c + store i32 %result, i32 addrspace(1)* %out + ret void +} + +define void @test_call_external(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1 + %a = load i32 addrspace(1)* %in + %b = load i32 addrspace(1)* %b_ptr + %c = call i32 @external_function(i32 %b) nounwind + %result = add i32 %a, %c + store i32 %result, i32 addrspace(1)* %out + ret void +} + diff --git a/test/CodeGen/R600/extload.ll b/test/CodeGen/R600/extload.ll index 2e70d47..dc056e0 100644 --- a/test/CodeGen/R600/extload.ll +++ b/test/CodeGen/R600/extload.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: @anyext_load_i8: ; EG: AND_INT @@ -87,8 +87,9 @@ define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1) } ; FUNC-LABEL: @zextload_global_i8_to_i64 +; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0 ; SI: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]], -; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0 +; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]] ; SI: BUFFER_STORE_DWORDX2 define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { %a = load i8 addrspace(1)* %in, align 8 @@ -98,8 +99,9 @@ define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* } ; FUNC-LABEL: @zextload_global_i16_to_i64 +; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0 ; SI: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]], -; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0 +; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]] ; SI: BUFFER_STORE_DWORDX2 define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { %a = load i16 addrspace(1)* %in, align 8 @@ -109,8 +111,9 @@ define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1) } ; FUNC-LABEL: @zextload_global_i32_to_i64 +; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0 ; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]], -; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0 +; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]] ; SI: BUFFER_STORE_DWORDX2 define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { %a = load i32 addrspace(1)* %in, align 8 diff --git a/test/CodeGen/R600/extract_vector_elt_i16.ll b/test/CodeGen/R600/extract_vector_elt_i16.ll new file mode 100644 index 0000000..5cd1b04 --- /dev/null +++ b/test/CodeGen/R600/extract_vector_elt_i16.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +; FUNC-LABEL: @extract_vector_elt_v2i16 +; SI: BUFFER_LOAD_USHORT +; SI: BUFFER_STORE_SHORT +; SI: BUFFER_LOAD_USHORT +; SI: BUFFER_STORE_SHORT +define void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> %foo) nounwind { + %p0 = extractelement <2 x i16> %foo, i32 0 + %p1 = extractelement <2 x i16> %foo, i32 1 + %out1 = getelementptr i16 addrspace(1)* %out, i32 1 + store i16 %p1, i16 addrspace(1)* %out, align 2 + store i16 %p0, i16 addrspace(1)* %out1, align 2 + ret void +} + +; FUNC-LABEL: @extract_vector_elt_v4i16 +; SI: BUFFER_LOAD_USHORT +; SI: BUFFER_STORE_SHORT +; SI: BUFFER_LOAD_USHORT +; SI: BUFFER_STORE_SHORT +define void @extract_vector_elt_v4i16(i16 addrspace(1)* %out, <4 x i16> %foo) nounwind { + %p0 = extractelement <4 x i16> %foo, i32 0 + %p1 = extractelement <4 x i16> %foo, i32 2 + %out1 = getelementptr i16 addrspace(1)* %out, i32 1 + store i16 %p1, i16 addrspace(1)* %out, align 2 + store i16 %p0, i16 addrspace(1)* %out1, align 2 + ret void +} diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll index 2cd3a4f..b87ce22 100644 --- a/test/CodeGen/R600/fabs.ll +++ b/test/CodeGen/R600/fabs.ll @@ -49,6 +49,17 @@ entry: ret void } +; SI-CHECK-LABEL: @fabs_fold +; SI-CHECK-NOT: V_AND_B32_e32 +; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, s{{[0-9]+}}, |v{{[0-9]+}}| +define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) { +entry: + %0 = call float @fabs(float %in0) + %1 = fmul float %0, %in1 + store float %1, float addrspace(1)* %out + ret void +} + declare float @fabs(float ) readnone declare <2 x float> @llvm.fabs.v2f32(<2 x float> ) readnone declare <4 x float> @llvm.fabs.v4f32(<4 x float> ) readnone diff --git a/test/CodeGen/R600/fconst64.ll b/test/CodeGen/R600/fconst64.ll index 5c5ee7e..9c3a7e3 100644 --- a/test/CodeGen/R600/fconst64.ll +++ b/test/CodeGen/R600/fconst64.ll @@ -1,8 +1,8 @@ ; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s ; CHECK: @fconst_f64 -; CHECK: V_MOV_B32_e32 {{v[0-9]+}}, 0.000000e+00 -; CHECK-NEXT: V_MOV_B32_e32 {{v[0-9]+}}, 2.312500e+00 +; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0x40140000 +; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0 define void @fconst_f64(double addrspace(1)* %out, double addrspace(1)* %in) { %r1 = load double addrspace(1)* %in diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll index f4e6be6..4cddc73 100644 --- a/test/CodeGen/R600/fneg.ll +++ b/test/CodeGen/R600/fneg.ll @@ -51,7 +51,7 @@ entry: ; R600-CHECK: -KC0[2].Z ; SI-CHECK-LABEL: @fneg_free ; XXX: We could use V_ADD_F32_e64 with the negate bit here instead. -; SI-CHECK: V_SUB_F32_e64 v{{[0-9]}}, 0.000000e+00, s{{[0-9]}}, 0, 0, 0, 0 +; SI-CHECK: V_SUB_F32_e64 v{{[0-9]}}, 0.000000e+00, s{{[0-9]}}, 0, 0 define void @fneg_free(float addrspace(1)* %out, i32 %in) { entry: %0 = bitcast i32 %in to float @@ -59,3 +59,14 @@ entry: store float %1, float addrspace(1)* %out ret void } + +; SI-CHECK-LABEL: @fneg_fold +; SI-CHECK-NOT: V_XOR_B32 +; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}} +define void @fneg_fold(float addrspace(1)* %out, float %in) { +entry: + %0 = fsub float -0.0, %in + %1 = fmul float %0, %in + store float %1, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fp_to_uint.f64.ll b/test/CodeGen/R600/fp_to_uint.f64.ll new file mode 100644 index 0000000..bf607ce --- /dev/null +++ b/test/CodeGen/R600/fp_to_uint.f64.ll @@ -0,0 +1,9 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +; SI-LABEL: @fp_to_uint_i32_f64 +; SI: V_CVT_U32_F64_e32 +define void @fp_to_uint_i32_f64(i32 addrspace(1)* %out, double %in) { + %cast = fptoui double %in to i32 + store i32 %cast, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/gep-address-space.ll b/test/CodeGen/R600/gep-address-space.ll index ee914fa..ab2c0bf 100644 --- a/test/CodeGen/R600/gep-address-space.ll +++ b/test/CodeGen/R600/gep-address-space.ll @@ -1,9 +1,9 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind { ; CHECK-LABEL: @use_gep_address_space: ; CHECK: V_MOV_B32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}} -; CHECK: DS_WRITE_B32 [[PTR]], v{{[0-9]+}}, 64 +; CHECK: DS_WRITE_B32 [[PTR]], v{{[0-9]+}}, 0x40 %p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16 store i32 99, i32 addrspace(3)* %p ret void diff --git a/test/CodeGen/R600/gv-const-addrspace-fail.ll b/test/CodeGen/R600/gv-const-addrspace-fail.ll new file mode 100644 index 0000000..ebd7811 --- /dev/null +++ b/test/CodeGen/R600/gv-const-addrspace-fail.ll @@ -0,0 +1,58 @@ +; XFAIL: * +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + + +@a = internal addrspace(2) constant [1 x i8] [ i8 7 ], align 1 + +; FUNC-LABEL: @test_i8 +; EG: CF_END +; SI: BUFFER_STORE_BYTE +; SI: S_ENDPGM +define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 { + %arrayidx = getelementptr inbounds [1 x i8] addrspace(2)* @a, i32 0, i32 %s + %1 = load i8 addrspace(2)* %arrayidx, align 1 + store i8 %1, i8 addrspace(1)* %out + ret void +} + +@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2 + +; FUNC-LABEL: @test_i16 +; EG: CF_END +; SI: BUFFER_STORE_SHORT +; SI: S_ENDPGM +define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 { + %arrayidx = getelementptr inbounds [1 x i16] addrspace(2)* @b, i32 0, i32 %s + %1 = load i16 addrspace(2)* %arrayidx, align 2 + store i16 %1, i16 addrspace(1)* %out + ret void +} + +%struct.bar = type { float, [5 x i8] } + +; The illegal i8s aren't handled +@struct_bar_gv = internal addrspace(2) unnamed_addr constant [1 x %struct.bar] [ %struct.bar { float 16.0, [5 x i8] [i8 0, i8 1, i8 2, i8 3, i8 4] } ] + +; FUNC-LABEL: @struct_bar_gv_load +define void @struct_bar_gv_load(i8 addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds [1 x %struct.bar] addrspace(2)* @struct_bar_gv, i32 0, i32 0, i32 1, i32 %index + %load = load i8 addrspace(2)* %gep, align 1 + store i8 %load, i8 addrspace(1)* %out, align 1 + ret void +} + + +; The private load isn't scalarzied. +@array_vector_gv = internal addrspace(2) constant [4 x <4 x i32>] [ <4 x i32> <i32 1, i32 2, i32 3, i32 4>, + <4 x i32> <i32 5, i32 6, i32 7, i32 8>, + <4 x i32> <i32 9, i32 10, i32 11, i32 12>, + <4 x i32> <i32 13, i32 14, i32 15, i32 16> ] + +; FUNC-LABEL: @array_vector_gv_load +define void @array_vector_gv_load(<4 x i32> addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds [4 x <4 x i32>] addrspace(2)* @array_vector_gv, i32 0, i32 %index + %load = load <4 x i32> addrspace(2)* %gep, align 16 + store <4 x i32> %load, <4 x i32> addrspace(1)* %out, align 16 + ret void +} diff --git a/test/CodeGen/R600/gv-const-addrspace.ll b/test/CodeGen/R600/gv-const-addrspace.ll index cda7ab1..0176061 100644 --- a/test/CodeGen/R600/gv-const-addrspace.ll +++ b/test/CodeGen/R600/gv-const-addrspace.ll @@ -1,4 +1,8 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 --check-prefix=FUNC +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + + +@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2 ; XXX: Test on SI once 64-bit adds are supportes. @@ -6,12 +10,12 @@ ; FUNC-LABEL: @float -; R600-DAG: MOV {{\** *}}T2.X -; R600-DAG: MOV {{\** *}}T3.X -; R600-DAG: MOV {{\** *}}T4.X -; R600-DAG: MOV {{\** *}}T5.X -; R600-DAG: MOV {{\** *}}T6.X -; R600: MOVA_INT +; EG-DAG: MOV {{\** *}}T2.X +; EG-DAG: MOV {{\** *}}T3.X +; EG-DAG: MOV {{\** *}}T4.X +; EG-DAG: MOV {{\** *}}T5.X +; EG-DAG: MOV {{\** *}}T6.X +; EG: MOVA_INT define void @float(float addrspace(1)* %out, i32 %index) { entry: @@ -25,12 +29,12 @@ entry: ; FUNC-LABEL: @i32 -; R600-DAG: MOV {{\** *}}T2.X -; R600-DAG: MOV {{\** *}}T3.X -; R600-DAG: MOV {{\** *}}T4.X -; R600-DAG: MOV {{\** *}}T5.X -; R600-DAG: MOV {{\** *}}T6.X -; R600: MOVA_INT +; EG-DAG: MOV {{\** *}}T2.X +; EG-DAG: MOV {{\** *}}T3.X +; EG-DAG: MOV {{\** *}}T4.X +; EG-DAG: MOV {{\** *}}T5.X +; EG-DAG: MOV {{\** *}}T6.X +; EG: MOVA_INT define void @i32(i32 addrspace(1)* %out, i32 %index) { entry: @@ -39,3 +43,30 @@ entry: store i32 %1, i32 addrspace(1)* %out ret void } + + +%struct.foo = type { float, [5 x i32] } + +@struct_foo_gv = internal addrspace(2) unnamed_addr constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ] + +; FUNC-LABEL: @struct_foo_gv_load + +define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index + %load = load i32 addrspace(2)* %gep, align 4 + store i32 %load, i32 addrspace(1)* %out, align 4 + ret void +} + +@array_v1_gv = internal addrspace(2) constant [4 x <1 x i32>] [ <1 x i32> <i32 1>, + <1 x i32> <i32 2>, + <1 x i32> <i32 3>, + <1 x i32> <i32 4> ] + +; FUNC-LABEL: @array_v1_gv_load +define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index + %load = load <1 x i32> addrspace(2)* %gep, align 4 + store <1 x i32> %load, <1 x i32> addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/infinite-loop.ll b/test/CodeGen/R600/infinite-loop.ll index a60bc37..68ffaae 100644 --- a/test/CodeGen/R600/infinite-loop.ll +++ b/test/CodeGen/R600/infinite-loop.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: @infinite_loop: -; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 999 +; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x3e7 ; SI: BB0_1: ; SI: BUFFER_STORE_DWORD [[REG]] ; SI: S_WAITCNT vmcnt(0) expcnt(0) diff --git a/test/CodeGen/R600/insert_vector_elt.ll b/test/CodeGen/R600/insert_vector_elt.ll index 530d1cc..43b4efc 100644 --- a/test/CodeGen/R600/insert_vector_elt.ll +++ b/test/CodeGen/R600/insert_vector_elt.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s ; FIXME: Broken on evergreen ; FIXME: For some reason the 8 and 16 vectors are being stored as @@ -173,3 +173,29 @@ define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> store <16 x i8> %vecins, <16 x i8> addrspace(1)* %out, align 16 ret void } + +; This test requires handling INSERT_SUBREG in SIFixSGPRCopies. Check that +; the compiler doesn't crash. +; SI-LABEL: @insert_split_bb +define void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b) { +entry: + %0 = insertelement <2 x i32> undef, i32 %a, i32 0 + %1 = icmp eq i32 %a, 0 + br i1 %1, label %if, label %else + +if: + %2 = load i32 addrspace(1)* %in + %3 = insertelement <2 x i32> %0, i32 %2, i32 1 + br label %endif + +else: + %4 = getelementptr i32 addrspace(1)* %in, i32 1 + %5 = load i32 addrspace(1)* %4 + %6 = insertelement <2 x i32> %0, i32 %5, i32 1 + br label %endif + +endif: + %7 = phi <2 x i32> [%3, %if], [%6, %else] + store <2 x i32> %7, <2 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/insert_vector_elt_f64.ll b/test/CodeGen/R600/insert_vector_elt_f64.ll index e334be1..595bc59 100644 --- a/test/CodeGen/R600/insert_vector_elt_f64.ll +++ b/test/CodeGen/R600/insert_vector_elt_f64.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts ; XFAIL: * -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; SI-LABEL: @dynamic_insertelement_v2f64: diff --git a/test/CodeGen/R600/kernel-args.ll b/test/CodeGen/R600/kernel-args.ll index 247e316..6fc6979 100644 --- a/test/CodeGen/R600/kernel-args.ll +++ b/test/CodeGen/R600/kernel-args.ll @@ -17,7 +17,7 @@ entry: ; EG-CHECK-LABEL: @i8_zext_arg ; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI-CHECK-LABEL: @i8_zext_arg -; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11 +; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { entry: @@ -29,7 +29,7 @@ entry: ; EG-CHECK-LABEL: @i8_sext_arg ; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI-CHECK-LABEL: @i8_sext_arg -; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11 +; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind { entry: @@ -53,7 +53,7 @@ entry: ; EG-CHECK-LABEL: @i16_zext_arg ; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI-CHECK-LABEL: @i16_zext_arg -; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11 +; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { entry: @@ -65,7 +65,7 @@ entry: ; EG-CHECK-LABEL: @i16_sext_arg ; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI-CHECK-LABEL: @i16_sext_arg -; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11 +; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind { entry: @@ -77,7 +77,7 @@ entry: ; EG-CHECK-LABEL: @i32_arg ; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z ; SI-CHECK-LABEL: @i32_arg -; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11 +; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind { entry: store i32 %in, i32 addrspace(1)* %out, align 4 @@ -87,7 +87,7 @@ entry: ; EG-CHECK-LABEL: @f32_arg ; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z ; SI-CHECK-LABEL: @f32_arg -; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11 +; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind { entry: store float %in, float addrspace(1)* %out, align 4 @@ -122,7 +122,7 @@ entry: ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W ; SI-CHECK-LABEL: @v2i32_arg -; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 11 +; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind { entry: store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4 @@ -133,7 +133,7 @@ entry: ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W ; SI-CHECK-LABEL: @v2f32_arg -; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 11 +; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind { entry: store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4 @@ -166,7 +166,7 @@ entry: ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W ; SI-CHECK-LABEL: @v3i32_arg -; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 13 +; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind { entry: store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4 @@ -178,7 +178,7 @@ entry: ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W ; SI-CHECK-LABEL: @v3f32_arg -; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 13 +; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind { entry: store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4 @@ -223,7 +223,7 @@ entry: ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X ; SI-CHECK-LABEL: @v4i32_arg -; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 13 +; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind { entry: store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4 @@ -236,7 +236,7 @@ entry: ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X ; SI-CHECK-LABEL: @v4f32_arg -; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 13 +; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind { entry: store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4 @@ -300,7 +300,7 @@ entry: ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X ; SI-CHECK-LABEL: @v8i32_arg -; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 17 +; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11 define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind { entry: store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4 @@ -317,7 +317,7 @@ entry: ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X ; SI-CHECK-LABEL: @v8f32_arg -; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 17 +; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11 define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind { entry: store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4 @@ -422,7 +422,7 @@ entry: ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X ; SI-CHECK-LABEL: @v16i32_arg -; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 25 +; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19 define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind { entry: store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4 @@ -447,7 +447,7 @@ entry: ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X ; SI-CHECK-LABEL: @v16f32_arg -; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 25 +; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19 define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind { entry: store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4 diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll index c3f000a..eb50942 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll @@ -1,11 +1,12 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone ; FUNC-LABEL: @bfe_i32_arg_arg_arg ; SI: V_BFE_I32 ; EG: BFE_INT +; EG: encoding: [{{[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+}},0xac define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind { %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 @@ -38,3 +39,388 @@ define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) n store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 ret void } + +; FUNC-LABEL: @v_bfe_print_arg +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8 +define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) nounwind { + %load = load i32 addrspace(1)* %src0, align 4 + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 2, i32 8) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_arg_0_width_reg_offset +; SI-NOT: BFE +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 0) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_arg_0_width_imm_offset +; SI-NOT: BFE +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 8, i32 0) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_test_6 +; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} +; SI: S_ENDPGM +define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 1, i32 31) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_test_7 +; SI-NOT: SHL +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 0, i32 31) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FIXME: The shifts should be 1 BFE +; FUNC-LABEL: @bfe_i32_test_8 +; SI: BUFFER_LOAD_DWORD +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1 +; SI: S_ENDPGM +define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_test_9 +; SI-NOT: BFE +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_test_10 +; SI-NOT: BFE +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_test_11 +; SI-NOT: BFE +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_test_12 +; SI-NOT: BFE +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_test_13 +; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = ashr i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void +} + +; FUNC-LABEL: @bfe_i32_test_14 +; SI-NOT: LSHR +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = lshr i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_0 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 0) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_1 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 12334, i32 0, i32 0) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_2 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 1) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_3 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 1, i32 0, i32 1) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_4 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 0, i32 1) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_5 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 7, i32 1) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_6 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0xffffff80 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 0, i32 8) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_7 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 0, i32 8) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_8 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 6, i32 8) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_9 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65536, i32 16, i32 8) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_10 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65535, i32 16, i32 16) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_11 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -6 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 4) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_12 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 31, i32 1) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_13 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 131070, i32 16, i32 16) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_14 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 2, i32 30) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_15 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 28) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_16 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 1, i32 7) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_17 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 1, i32 31) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_i32_constant_fold_test_18 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind { + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 31, i32 1) nounwind readnone + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + ret void +} + +; XXX - This should really be a single BFE, but the sext_inreg of the +; extended type i24 is never custom lowered. +; FUNC-LABEL: @bfe_sext_in_reg_i24 +; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]], +; SI: V_LSHLREV_B32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}} +; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}} +; XSI: V_BFE_I32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8 +; XSI-NOT: SHL +; XSI-NOT: SHR +; XSI: BUFFER_STORE_DWORD [[BFE]], +define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24) + %shl = shl i32 %bfe, 8 + %ashr = ashr i32 %shl, 8 + store i32 %ashr, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll index 0d47863..1a62253 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll @@ -38,3 +38,517 @@ define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) n store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 ret void } + +; FUNC-LABEL: @bfe_u32_arg_0_width_reg_offset +; SI-NOT: BFE +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 0) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_arg_0_width_imm_offset +; SI-NOT: BFE +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 8, i32 0) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zextload_i8 +; SI: BUFFER_LOAD_UBYTE +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { + %load = load i8 addrspace(1)* %in + %ext = zext i8 %load to i32 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zext_in_reg_i8 +; SI: BUFFER_LOAD_DWORD +; SI: V_ADD_I32 +; SI-NEXT: V_AND_B32_e32 +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %add = add i32 %load, 1 + %ext = and i32 %add, 255 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zext_in_reg_i16 +; SI: BUFFER_LOAD_DWORD +; SI: V_ADD_I32 +; SI-NEXT: V_AND_B32_e32 +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %add = add i32 %load, 1 + %ext = and i32 %add, 65535 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 16) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_1 +; SI: BUFFER_LOAD_DWORD +; SI: V_ADD_I32 +; SI: BFE +; SI: S_ENDPGM +define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %add = add i32 %load, 1 + %ext = and i32 %add, 255 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 1, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_3 +; SI: BUFFER_LOAD_DWORD +; SI: V_ADD_I32 +; SI-NEXT: V_AND_B32_e32 {{v[0-9]+}}, 0xf8 +; SI-NEXT: BFE +; SI: S_ENDPGM +define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %add = add i32 %load, 1 + %ext = and i32 %add, 255 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 3, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_7 +; SI: BUFFER_LOAD_DWORD +; SI: V_ADD_I32 +; SI-NEXT: V_AND_B32_e32 {{v[0-9]+}}, 0x80 +; SI-NEXT: BFE +; SI: S_ENDPGM +define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %add = add i32 %load, 1 + %ext = and i32 %add, 255 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 7, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_zext_in_reg_i16_offset_8 +; SI: BUFFER_LOAD_DWORD +; SI: V_ADD_I32 +; SI-NEXT: BFE +; SI: S_ENDPGM +define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %load = load i32 addrspace(1)* %in, align 4 + %add = add i32 %load, 1 + %ext = and i32 %add, 65535 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 8, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_1 +; SI: BUFFER_LOAD_DWORD +; SI: V_AND_B32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} +; SI: S_ENDPGM +; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1, +define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 0, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +define void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_4 +; SI-NOT: LSHL +; SI-NOT: SHR +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %shr = lshr i32 %shl, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_5 +; SI: BUFFER_LOAD_DWORD +; SI-NOT: LSHL +; SI-NOT: SHR +; SI: V_BFE_I32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1 +; SI: S_ENDPGM +define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %shr = ashr i32 %shl, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 0, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_6 +; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} +; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} +; SI: S_ENDPGM +define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 1, i32 31) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_7 +; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 31) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_8 +; SI-NOT: BFE +; SI: V_AND_B32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_9 +; SI-NOT: BFE +; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_10 +; SI-NOT: BFE +; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_11 +; SI-NOT: BFE +; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_12 +; SI-NOT: BFE +; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_test_13 +; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = ashr i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void +} + +; FUNC-LABEL: @bfe_u32_test_14 +; SI-NOT: LSHR +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = lshr i32 %x, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_0 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 0) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_1 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 12334, i32 0, i32 0) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_2 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 1) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_3 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 1, i32 0, i32 1) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_4 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 0, i32 1) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_5 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 7, i32 1) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_6 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x80 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 0, i32 8) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_7 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 0, i32 8) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_8 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 6, i32 8) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_9 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFEfppppppppppppp +define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65536, i32 16, i32 8) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_10 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65535, i32 16, i32 16) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_11 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 4) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_12 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 31, i32 1) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_13 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 131070, i32 16, i32 16) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_14 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 2, i32 30) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_15 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 28) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_16 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 1, i32 7) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_17 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 1, i32 31) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_u32_constant_fold_test_18 +; SI-NOT: BFE +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0 +; SI: BUFFER_STORE_DWORD [[VREG]], +; SI: S_ENDPGM +; EG-NOT: BFE +define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind { + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 31, i32 1) nounwind readnone + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.imad24.ll b/test/CodeGen/R600/llvm.AMDGPU.imad24.ll new file mode 100644 index 0000000..95795ea --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.imad24.ll @@ -0,0 +1,21 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=r770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s + +; FIXME: Store of i32 seems to be broken pre-EG somehow? + +declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) nounwind readnone + +; FUNC-LABEL: @test_imad24 +; SI: V_MAD_I32_I24 +; CM: MULADD_INT24 +; R600: MULLO_INT +; R600: ADD_INT +define void @test_imad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind { + %mad = call i32 @llvm.AMDGPU.imad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone + store i32 %mad, i32 addrspace(1)* %out, align 4 + ret void +} + diff --git a/test/CodeGen/R600/llvm.AMDGPU.imul24.ll b/test/CodeGen/R600/llvm.AMDGPU.imul24.ll new file mode 100644 index 0000000..8ee3520 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.imul24.ll @@ -0,0 +1,15 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s + +declare i32 @llvm.AMDGPU.imul24(i32, i32) nounwind readnone + +; FUNC-LABEL: @test_imul24 +; SI: V_MUL_I32_I24 +; CM: MUL_INT24 +; R600: MULLO_INT +define void @test_imul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { + %mul = call i32 @llvm.AMDGPU.imul24(i32 %src0, i32 %src1) nounwind readnone + store i32 %mul, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll new file mode 100644 index 0000000..afdfb18 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll @@ -0,0 +1,19 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=rv770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s + +declare i32 @llvm.AMDGPU.umad24(i32, i32, i32) nounwind readnone + +; FUNC-LABEL: @test_umad24 +; SI: V_MAD_U32_U24 +; EG: MULADD_UINT24 +; R600: MULLO_UINT +; R600: ADD_INT +define void @test_umad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind { + %mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone + store i32 %mad, i32 addrspace(1)* %out, align 4 + ret void +} + diff --git a/test/CodeGen/R600/llvm.AMDGPU.umul24.ll b/test/CodeGen/R600/llvm.AMDGPU.umul24.ll new file mode 100644 index 0000000..72a3602 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.umul24.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=r770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s + +declare i32 @llvm.AMDGPU.umul24(i32, i32) nounwind readnone + +; FUNC-LABEL: @test_umul24 +; SI: V_MUL_U32_U24 +; R600: MUL_UINT24 +; R600: MULLO_UINT +define void @test_umul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { + %mul = call i32 @llvm.AMDGPU.umul24(i32 %src0, i32 %src1) nounwind readnone + store i32 %mul, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/llvm.SI.tbuffer.store.ll b/test/CodeGen/R600/llvm.SI.tbuffer.store.ll index 569efb6..740581a 100644 --- a/test/CodeGen/R600/llvm.SI.tbuffer.store.ll +++ b/test/CodeGen/R600/llvm.SI.tbuffer.store.ll @@ -1,7 +1,7 @@ ;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s ;CHECK-LABEL: @test1 -;CHECK: TBUFFER_STORE_FORMAT_XYZW {{v\[[0-9]+:[0-9]+\]}}, 32, -1, 0, -1, 0, 14, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0 +;CHECK: TBUFFER_STORE_FORMAT_XYZW {{v\[[0-9]+:[0-9]+\]}}, 0x20, -1, 0, -1, 0, 14, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0 define void @test1(i32 %a1, i32 %vaddr) #0 { %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0 call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata, @@ -11,7 +11,7 @@ define void @test1(i32 %a1, i32 %vaddr) #0 { } ;CHECK-LABEL: @test2 -;CHECK: TBUFFER_STORE_FORMAT_XYZ {{v\[[0-9]+:[0-9]+\]}}, 24, -1, 0, -1, 0, 13, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0 +;CHECK: TBUFFER_STORE_FORMAT_XYZ {{v\[[0-9]+:[0-9]+\]}}, 0x18, -1, 0, -1, 0, 13, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0 define void @test2(i32 %a1, i32 %vaddr) #0 { %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0 call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata, @@ -21,7 +21,7 @@ define void @test2(i32 %a1, i32 %vaddr) #0 { } ;CHECK-LABEL: @test3 -;CHECK: TBUFFER_STORE_FORMAT_XY {{v\[[0-9]+:[0-9]+\]}}, 16, -1, 0, -1, 0, 11, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0 +;CHECK: TBUFFER_STORE_FORMAT_XY {{v\[[0-9]+:[0-9]+\]}}, 0x10, -1, 0, -1, 0, 11, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0 define void @test3(i32 %a1, i32 %vaddr) #0 { %vdata = insertelement <2 x i32> undef, i32 %a1, i32 0 call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> undef, <2 x i32> %vdata, @@ -31,7 +31,7 @@ define void @test3(i32 %a1, i32 %vaddr) #0 { } ;CHECK-LABEL: @test4 -;CHECK: TBUFFER_STORE_FORMAT_X {{v[0-9]+}}, 8, -1, 0, -1, 0, 4, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0 +;CHECK: TBUFFER_STORE_FORMAT_X {{v[0-9]+}}, 0x8, -1, 0, -1, 0, 4, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0 define void @test4(i32 %vdata, i32 %vaddr) #0 { call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %vdata, i32 1, i32 %vaddr, i32 0, i32 8, i32 4, i32 4, i32 1, i32 0, i32 1, diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll index aaf2305..9e7a4de 100644 --- a/test/CodeGen/R600/llvm.cos.ll +++ b/test/CodeGen/R600/llvm.cos.ll @@ -1,19 +1,40 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -check-prefix=EG -check-prefix=FUNC +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s -check-prefix=SI -check-prefix=FUNC -;CHECK: MULADD_IEEE * -;CHECK: FRACT * -;CHECK: ADD * -;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +;FUNC-LABEL: test +;EG: MULADD_IEEE * +;EG: FRACT * +;EG: ADD * +;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +;EG-NOT: COS +;SI: V_COS_F32 +;SI-NOT: V_COS_F32 -define void @test(<4 x float> inreg %reg0) #0 { - %r0 = extractelement <4 x float> %reg0, i32 0 - %r1 = call float @llvm.cos.f32(float %r0) - %vec = insertelement <4 x float> undef, float %r1, i32 0 - call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) +define void @test(float addrspace(1)* %out, float %x) #1 { + %cos = call float @llvm.cos.f32(float %x) + store float %cos, float addrspace(1)* %out + ret void +} + +;FUNC-LABEL: testv +;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +;EG-NOT: COS +;SI: V_COS_F32 +;SI: V_COS_F32 +;SI: V_COS_F32 +;SI: V_COS_F32 +;SI-NOT: V_COS_F32 + +define void @testv(<4 x float> addrspace(1)* %out, <4 x float> inreg %vx) #1 { + %cos = call <4 x float> @llvm.cos.v4f32(<4 x float> %vx) + store <4 x float> %cos, <4 x float> addrspace(1)* %out ret void } declare float @llvm.cos.f32(float) readnone -declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) +declare <4 x float> @llvm.cos.v4f32(<4 x float>) readnone attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/llvm.rint.f64.ll b/test/CodeGen/R600/llvm.rint.f64.ll new file mode 100644 index 0000000..a7a909a --- /dev/null +++ b/test/CodeGen/R600/llvm.rint.f64.ll @@ -0,0 +1,37 @@ +; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s + +; FUNC-LABEL: @f64 +; CI: V_RNDNE_F64_e32 +define void @f64(double addrspace(1)* %out, double %in) { +entry: + %0 = call double @llvm.rint.f64(double %in) + store double %0, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @v2f64 +; CI: V_RNDNE_F64_e32 +; CI: V_RNDNE_F64_e32 +define void @v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) { +entry: + %0 = call <2 x double> @llvm.rint.v2f64(<2 x double> %in) + store <2 x double> %0, <2 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @v4f64 +; CI: V_RNDNE_F64_e32 +; CI: V_RNDNE_F64_e32 +; CI: V_RNDNE_F64_e32 +; CI: V_RNDNE_F64_e32 +define void @v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) { +entry: + %0 = call <4 x double> @llvm.rint.v4f64(<4 x double> %in) + store <4 x double> %0, <4 x double> addrspace(1)* %out + ret void +} + + +declare double @llvm.rint.f64(double) #0 +declare <2 x double> @llvm.rint.v2f64(<2 x double>) #0 +declare <4 x double> @llvm.rint.v4f64(<4 x double>) #0 diff --git a/test/CodeGen/R600/llvm.rint.ll b/test/CodeGen/R600/llvm.rint.ll index c174b33..db8352f 100644 --- a/test/CodeGen/R600/llvm.rint.ll +++ b/test/CodeGen/R600/llvm.rint.ll @@ -1,10 +1,10 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; R600-CHECK: @f32 -; R600-CHECK: RNDNE -; SI-CHECK: @f32 -; SI-CHECK: V_RNDNE_F32_e32 +; FUNC-LABEL: @f32 +; R600: RNDNE + +; SI: V_RNDNE_F32_e32 define void @f32(float addrspace(1)* %out, float %in) { entry: %0 = call float @llvm.rint.f32(float %in) @@ -12,12 +12,12 @@ entry: ret void } -; R600-CHECK: @v2f32 -; R600-CHECK: RNDNE -; R600-CHECK: RNDNE -; SI-CHECK: @v2f32 -; SI-CHECK: V_RNDNE_F32_e32 -; SI-CHECK: V_RNDNE_F32_e32 +; FUNC-LABEL: @v2f32 +; R600: RNDNE +; R600: RNDNE + +; SI: V_RNDNE_F32_e32 +; SI: V_RNDNE_F32_e32 define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { entry: %0 = call <2 x float> @llvm.rint.v2f32(<2 x float> %in) @@ -25,16 +25,16 @@ entry: ret void } -; R600-CHECK: @v4f32 -; R600-CHECK: RNDNE -; R600-CHECK: RNDNE -; R600-CHECK: RNDNE -; R600-CHECK: RNDNE -; SI-CHECK: @v4f32 -; SI-CHECK: V_RNDNE_F32_e32 -; SI-CHECK: V_RNDNE_F32_e32 -; SI-CHECK: V_RNDNE_F32_e32 -; SI-CHECK: V_RNDNE_F32_e32 +; FUNC-LABEL: @v4f32 +; R600: RNDNE +; R600: RNDNE +; R600: RNDNE +; R600: RNDNE + +; SI: V_RNDNE_F32_e32 +; SI: V_RNDNE_F32_e32 +; SI: V_RNDNE_F32_e32 +; SI: V_RNDNE_F32_e32 define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) { entry: %0 = call <4 x float> @llvm.rint.v4f32(<4 x float> %in) @@ -42,13 +42,8 @@ entry: ret void } -; Function Attrs: nounwind readonly declare float @llvm.rint.f32(float) #0 - -; Function Attrs: nounwind readonly declare <2 x float> @llvm.rint.v2f32(<2 x float>) #0 - -; Function Attrs: nounwind readonly declare <4 x float> @llvm.rint.v4f32(<4 x float>) #0 attributes #0 = { nounwind readonly } diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll index 9eb9983..41c363c 100644 --- a/test/CodeGen/R600/llvm.sin.ll +++ b/test/CodeGen/R600/llvm.sin.ll @@ -1,19 +1,41 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -check-prefix=EG -check-prefix=FUNC +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s -check-prefix=SI -check-prefix=FUNC -;CHECK: MULADD_IEEE * -;CHECK: FRACT * -;CHECK: ADD * -;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +;FUNC-LABEL: test +;EG: MULADD_IEEE * +;EG: FRACT * +;EG: ADD * +;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +;EG-NOT: SIN +;SI: V_MUL_F32 +;SI: V_SIN_F32 +;SI-NOT: V_SIN_F32 -define void @test(<4 x float> inreg %reg0) #0 { - %r0 = extractelement <4 x float> %reg0, i32 0 - %r1 = call float @llvm.sin.f32( float %r0) - %vec = insertelement <4 x float> undef, float %r1, i32 0 - call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) +define void @test(float addrspace(1)* %out, float %x) #1 { + %sin = call float @llvm.sin.f32(float %x) + store float %sin, float addrspace(1)* %out + ret void +} + +;FUNC-LABEL: testv +;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +;EG-NOT: SIN +;SI: V_SIN_F32 +;SI: V_SIN_F32 +;SI: V_SIN_F32 +;SI: V_SIN_F32 +;SI-NOT: V_SIN_F32 + +define void @testv(<4 x float> addrspace(1)* %out, <4 x float> %vx) #1 { + %sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx) + store <4 x float> %sin, <4 x float> addrspace(1)* %out ret void } declare float @llvm.sin.f32(float) readnone -declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) +declare <4 x float> @llvm.sin.v4f32(<4 x float>) readnone attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/llvm.sqrt.ll b/test/CodeGen/R600/llvm.sqrt.ll index 0d0d186..4eee37f 100644 --- a/test/CodeGen/R600/llvm.sqrt.ll +++ b/test/CodeGen/R600/llvm.sqrt.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=r600 --mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK -; RUN: llc < %s -march=r600 --mcpu=SI | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc < %s -march=r600 --mcpu=SI -verify-machineinstrs| FileCheck %s --check-prefix=SI-CHECK ; R600-CHECK-LABEL: @sqrt_f32 ; R600-CHECK: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].Z diff --git a/test/CodeGen/R600/load-i1.ll b/test/CodeGen/R600/load-i1.ll new file mode 100644 index 0000000..9ba81b8 --- /dev/null +++ b/test/CodeGen/R600/load-i1.ll @@ -0,0 +1,107 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s + + +; SI-LABEL: @global_copy_i1_to_i1 +; SI: BUFFER_LOAD_UBYTE +; SI: V_AND_B32_e32 v{{[0-9]+}}, 1 +; SI: BUFFER_STORE_BYTE +; SI: S_ENDPGM +define void @global_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %load = load i1 addrspace(1)* %in + store i1 %load, i1 addrspace(1)* %out, align 1 + ret void +} + +; SI-LABEL: @global_sextload_i1_to_i32 +; XSI: BUFFER_LOAD_BYTE +; SI: BUFFER_STORE_DWORD +; SI: S_ENDPGM +define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %load = load i1 addrspace(1)* %in + %ext = sext i1 %load to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @global_zextload_i1_to_i32 +; SI: BUFFER_LOAD_UBYTE +; SI: BUFFER_STORE_DWORD +; SI: S_ENDPGM +define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %load = load i1 addrspace(1)* %in + %ext = zext i1 %load to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @global_sextload_i1_to_i64 +; XSI: BUFFER_LOAD_BYTE +; SI: BUFFER_STORE_DWORDX2 +; SI: S_ENDPGM +define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %load = load i1 addrspace(1)* %in + %ext = sext i1 %load to i64 + store i64 %ext, i64 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @global_zextload_i1_to_i64 +; SI: BUFFER_LOAD_UBYTE +; SI: BUFFER_STORE_DWORDX2 +; SI: S_ENDPGM +define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %load = load i1 addrspace(1)* %in + %ext = zext i1 %load to i64 + store i64 %ext, i64 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @i1_arg +; SI: BUFFER_LOAD_UBYTE +; SI: V_AND_B32_e32 +; SI: BUFFER_STORE_BYTE +; SI: S_ENDPGM +define void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { + store i1 %x, i1 addrspace(1)* %out, align 1 + ret void +} + +; SI-LABEL: @i1_arg_zext_i32 +; SI: BUFFER_LOAD_UBYTE +; SI: BUFFER_STORE_DWORD +; SI: S_ENDPGM +define void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { + %ext = zext i1 %x to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @i1_arg_zext_i64 +; SI: BUFFER_LOAD_UBYTE +; SI: BUFFER_STORE_DWORDX2 +; SI: S_ENDPGM +define void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { + %ext = zext i1 %x to i64 + store i64 %ext, i64 addrspace(1)* %out, align 8 + ret void +} + +; SI-LABEL: @i1_arg_sext_i32 +; XSI: BUFFER_LOAD_BYTE +; SI: BUFFER_STORE_DWORD +; SI: S_ENDPGM +define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { + %ext = sext i1 %x to i32 + store i32 %ext, i32addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @i1_arg_sext_i64 +; XSI: BUFFER_LOAD_BYTE +; SI: BUFFER_STORE_DWORDX2 +; SI: S_ENDPGM +define void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { + %ext = sext i1 %x to i64 + store i64 %ext, i64 addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/local-64.ll b/test/CodeGen/R600/local-64.ll index 87f18ae..c52b41b 100644 --- a/test/CodeGen/R600/local-64.ll +++ b/test/CodeGen/R600/local-64.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; SI-LABEL: @local_i32_load -; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 28, [M0] +; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x1c, [M0] ; SI: BUFFER_STORE_DWORD [[REG]], define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind { %gep = getelementptr i32 addrspace(3)* %in, i32 7 @@ -11,7 +11,7 @@ define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounw } ; SI-LABEL: @local_i32_load_0_offset -; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0, [M0] +; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x0, [M0] ; SI: BUFFER_STORE_DWORD [[REG]], define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind { %val = load i32 addrspace(3)* %in, align 4 @@ -21,7 +21,7 @@ define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* % ; SI-LABEL: @local_i8_load_i16_max_offset ; SI-NOT: ADD -; SI: DS_READ_U8 [[REG:v[0-9]+]], {{v[0-9]+}}, -1, [M0] +; SI: DS_READ_U8 [[REG:v[0-9]+]], {{v[0-9]+}}, 0xffff, [M0] ; SI: BUFFER_STORE_BYTE [[REG]], define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind { %gep = getelementptr i8 addrspace(3)* %in, i32 65535 @@ -31,9 +31,9 @@ define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3) } ; SI-LABEL: @local_i8_load_over_i16_max_offset -; SI: S_ADD_I32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 65536 +; SI: S_ADD_I32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000 ; SI: V_MOV_B32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]] -; SI: DS_READ_U8 [[REG:v[0-9]+]], [[VREGADDR]], 0, [M0] +; SI: DS_READ_U8 [[REG:v[0-9]+]], [[VREGADDR]], 0x0, [M0] ; SI: BUFFER_STORE_BYTE [[REG]], define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind { %gep = getelementptr i8 addrspace(3)* %in, i32 65536 @@ -44,7 +44,7 @@ define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspa ; SI-LABEL: @local_i64_load ; SI-NOT: ADD -; SI: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 56, [M0] +; SI: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0] ; SI: BUFFER_STORE_DWORDX2 [[REG]], define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind { %gep = getelementptr i64 addrspace(3)* %in, i32 7 @@ -54,7 +54,7 @@ define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounw } ; SI-LABEL: @local_i64_load_0_offset -; SI: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0, [M0] +; SI: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0] ; SI: BUFFER_STORE_DWORDX2 [[REG]], define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind { %val = load i64 addrspace(3)* %in, align 8 @@ -64,7 +64,7 @@ define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* % ; SI-LABEL: @local_f64_load ; SI-NOT: ADD -; SI: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 56, [M0] +; SI: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0] ; SI: BUFFER_STORE_DWORDX2 [[REG]], define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind { %gep = getelementptr double addrspace(3)* %in, i32 7 @@ -74,7 +74,7 @@ define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) } ; SI-LABEL: @local_f64_load_0_offset -; SI: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0, [M0] +; SI: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0] ; SI: BUFFER_STORE_DWORDX2 [[REG]], define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind { %val = load double addrspace(3)* %in, align 8 @@ -84,7 +84,7 @@ define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace ; SI-LABEL: @local_i64_store ; SI-NOT: ADD -; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 56 [M0] +; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0] define void @local_i64_store(i64 addrspace(3)* %out) nounwind { %gep = getelementptr i64 addrspace(3)* %out, i32 7 store i64 5678, i64 addrspace(3)* %gep, align 8 @@ -93,7 +93,7 @@ define void @local_i64_store(i64 addrspace(3)* %out) nounwind { ; SI-LABEL: @local_i64_store_0_offset ; SI-NOT: ADD -; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0 [M0] +; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0] define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind { store i64 1234, i64 addrspace(3)* %out, align 8 ret void @@ -101,7 +101,7 @@ define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind { ; SI-LABEL: @local_f64_store ; SI-NOT: ADD -; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 56 [M0] +; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0] define void @local_f64_store(double addrspace(3)* %out) nounwind { %gep = getelementptr double addrspace(3)* %out, i32 7 store double 16.0, double addrspace(3)* %gep, align 8 @@ -109,7 +109,7 @@ define void @local_f64_store(double addrspace(3)* %out) nounwind { } ; SI-LABEL: @local_f64_store_0_offset -; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0 [M0] +; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0] define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind { store double 20.0, double addrspace(3)* %out, align 8 ret void @@ -117,8 +117,8 @@ define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind { ; SI-LABEL: @local_v2i64_store ; SI-NOT: ADD -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 120 [M0] -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 112 [M0] +; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x78 [M0] +; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x70 [M0] define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind { %gep = getelementptr <2 x i64> addrspace(3)* %out, i32 7 store <2 x i64> <i64 5678, i64 5678>, <2 x i64> addrspace(3)* %gep, align 16 @@ -127,8 +127,8 @@ define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind { ; SI-LABEL: @local_v2i64_store_0_offset ; SI-NOT: ADD -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 8 [M0] -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0 [M0] +; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0] +; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0] define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind { store <2 x i64> <i64 1234, i64 1234>, <2 x i64> addrspace(3)* %out, align 16 ret void @@ -136,10 +136,10 @@ define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind { ; SI-LABEL: @local_v4i64_store ; SI-NOT: ADD -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 248 [M0] -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 240 [M0] -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 232 [M0] -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 224 [M0] +; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf8 [M0] +; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf0 [M0] +; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe8 [M0] +; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe0 [M0] define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind { %gep = getelementptr <4 x i64> addrspace(3)* %out, i32 7 store <4 x i64> <i64 5678, i64 5678, i64 5678, i64 5678>, <4 x i64> addrspace(3)* %gep, align 16 @@ -148,10 +148,10 @@ define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind { ; SI-LABEL: @local_v4i64_store_0_offset ; SI-NOT: ADD -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 24 [M0] -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 16 [M0] -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 8 [M0] -; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0 [M0] +; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x18 [M0] +; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x10 [M0] +; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0] +; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0] define void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind { store <4 x i64> <i64 1234, i64 1234, i64 1234, i64 1234>, <4 x i64> addrspace(3)* %out, align 16 ret void diff --git a/test/CodeGen/R600/local-memory-two-objects.ll b/test/CodeGen/R600/local-memory-two-objects.ll index 616000d..1e42285 100644 --- a/test/CodeGen/R600/local-memory-two-objects.ll +++ b/test/CodeGen/R600/local-memory-two-objects.ll @@ -28,8 +28,8 @@ ; constant offsets. ; EG-CHECK: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]] ; EG-CHECK-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]] -; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]], 16 -; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]], 0, +; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]], 0x10 +; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]], 0x0, define void @local_memory_two_objects(i32 addrspace(1)* %out) { entry: diff --git a/test/CodeGen/R600/loop-idiom.ll b/test/CodeGen/R600/loop-idiom.ll index 8a9cba2..128f661 100644 --- a/test/CodeGen/R600/loop-idiom.ll +++ b/test/CodeGen/R600/loop-idiom.ll @@ -1,5 +1,5 @@ ; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s -; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "r600--" diff --git a/test/CodeGen/R600/mad_int24.ll b/test/CodeGen/R600/mad_int24.ll index df063ec..abb5290 100644 --- a/test/CodeGen/R600/mad_int24.ll +++ b/test/CodeGen/R600/mad_int24.ll @@ -1,12 +1,15 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK -; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC +; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC +; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC -; EG-CHECK: @i32_mad24 +; FUNC-LABEL: @i32_mad24 ; Signed 24-bit multiply is not supported on pre-Cayman GPUs. -; EG-CHECK: MULLO_INT -; CM-CHECK: MULADD_INT24 {{[ *]*}}T{{[0-9].[XYZW]}}, KC0[2].Z, KC0[2].W, KC0[3].X -; SI-CHECK: V_MAD_I32_I24 +; EG: MULLO_INT +; Make sure we aren't masking the inputs. +; CM-NOT: AND +; CM: MULADD_INT24 +; SI-NOT: AND +; SI: V_MAD_I32_I24 define void @i32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { entry: %0 = shl i32 %a, 8 diff --git a/test/CodeGen/R600/mad_uint24.ll b/test/CodeGen/R600/mad_uint24.ll index 3dcadc9..0f0893b 100644 --- a/test/CodeGen/R600/mad_uint24.ll +++ b/test/CodeGen/R600/mad_uint24.ll @@ -1,11 +1,10 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK -; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC +; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC +; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC -; EG-CHECK-LABEL: @u32_mad24 -; EG-CHECK: MULADD_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W, KC0[3].X -; SI-CHECK-LABEL: @u32_mad24 -; SI-CHECK: V_MAD_U32_U24 +; FUNC-LABEL: @u32_mad24 +; EG: MULADD_UINT24 +; SI: V_MAD_U32_U24 define void @u32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { entry: @@ -19,18 +18,14 @@ entry: ret void } -; EG-CHECK-LABEL: @i16_mad24 -; EG-CHECK-DAG: VTX_READ_16 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40 -; EG-CHECK-DAG: VTX_READ_16 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44 -; EG-CHECK-DAG: VTX_READ_16 [[C:T[0-9]\.X]], T{{[0-9]}}.X, 48 +; FUNC-LABEL: @i16_mad24 ; The order of A and B does not matter. -; EG-CHECK: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]], [[A]], [[B]], [[C]] +; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]] ; The result must be sign-extended -; EG-CHECK: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x -; EG-CHECK: 16 -; SI-CHECK-LABEL: @i16_mad24 -; SI-CHECK: V_MAD_U32_U24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}} -; SI-CHECK: V_BFE_I32 v{{[0-9]}}, [[MAD]], 0, 16 +; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x +; EG: 16 +; SI: V_MAD_U32_U24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}} +; SI: V_BFE_I32 v{{[0-9]}}, [[MAD]], 0, 16 define void @i16_mad24(i32 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) { entry: @@ -41,18 +36,13 @@ entry: ret void } -; EG-CHECK-LABEL: @i8_mad24 -; EG-CHECK-DAG: VTX_READ_8 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40 -; EG-CHECK-DAG: VTX_READ_8 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44 -; EG-CHECK-DAG: VTX_READ_8 [[C:T[0-9]\.X]], T{{[0-9]}}.X, 48 -; The order of A and B does not matter. -; EG-CHECK: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]], [[A]], [[B]], [[C]] +; FUNC-LABEL: @i8_mad24 +; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]] ; The result must be sign-extended -; EG-CHECK: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x -; EG-CHECK: 8 -; SI-CHECK-LABEL: @i8_mad24 -; SI-CHECK: V_MAD_U32_U24 [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}} -; SI-CHECK: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8 +; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x +; EG: 8 +; SI: V_MAD_U32_U24 [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}} +; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8 define void @i8_mad24(i32 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) { entry: @@ -62,3 +52,24 @@ entry: store i32 %2, i32 addrspace(1)* %out ret void } + +; This tests for a bug where the mad_u24 pattern matcher would call +; SimplifyDemandedBits on the first operand of the mul instruction +; assuming that the pattern would be matched to a 24-bit mad. This +; led to some instructions being incorrectly erased when the entire +; 24-bit mad pattern wasn't being matched. + +; Check that the select instruction is not deleted. +; FUNC-LABEL: @i24_i32_i32_mad +; EG: CNDE_INT +; SI: V_CNDMASK +define void @i24_i32_i32_mad(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) { +entry: + %0 = ashr i32 %a, 8 + %1 = icmp ne i32 %c, 0 + %2 = select i1 %1, i32 %0, i32 34 + %3 = mul i32 %2, %c + %4 = add i32 %3, %d + store i32 %4, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/mubuf.ll b/test/CodeGen/R600/mubuf.ll index 2d5ddeb..f465d3d 100644 --- a/test/CodeGen/R600/mubuf.ll +++ b/test/CodeGen/R600/mubuf.ll @@ -6,7 +6,7 @@ ; MUBUF load with an immediate byte offset that fits into 12-bits ; CHECK-LABEL: @mubuf_load0 -; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 4 ; encoding: [0x04,0x80 +; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80 define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: %0 = getelementptr i32 addrspace(1)* %in, i64 1 @@ -17,7 +17,7 @@ entry: ; MUBUF load with the largest possible immediate offset ; CHECK-LABEL: @mubuf_load1 -; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 4095 ; encoding: [0xff,0x8f +; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0xfff ; encoding: [0xff,0x8f define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) { entry: %0 = getelementptr i8 addrspace(1)* %in, i64 4095 @@ -28,7 +28,7 @@ entry: ; MUBUF load with an immediate byte offset that doesn't fit into 12-bits ; CHECK-LABEL: @mubuf_load2 -; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0 ; encoding: [0x00,0x80 +; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x0 ; encoding: [0x00,0x80 define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: %0 = getelementptr i32 addrspace(1)* %in, i64 1024 @@ -40,7 +40,7 @@ entry: ; MUBUF load with a 12-bit immediate offset and a register offset ; CHECK-LABEL: @mubuf_load3 ; CHECK-NOT: ADD -; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 4 ; encoding: [0x04,0x80 +; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80 define void @mubuf_load3(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i64 %offset) { entry: %0 = getelementptr i32 addrspace(1)* %in, i64 %offset @@ -56,7 +56,7 @@ entry: ; MUBUF store with an immediate byte offset that fits into 12-bits ; CHECK-LABEL: @mubuf_store0 -; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 4 ; encoding: [0x04,0x80 +; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80 define void @mubuf_store0(i32 addrspace(1)* %out) { entry: %0 = getelementptr i32 addrspace(1)* %out, i64 1 @@ -66,7 +66,7 @@ entry: ; MUBUF store with the largest possible immediate offset ; CHECK-LABEL: @mubuf_store1 -; CHECK: BUFFER_STORE_BYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 4095 ; encoding: [0xff,0x8f +; CHECK: BUFFER_STORE_BYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0xfff ; encoding: [0xff,0x8f define void @mubuf_store1(i8 addrspace(1)* %out) { entry: @@ -77,7 +77,7 @@ entry: ; MUBUF store with an immediate byte offset that doesn't fit into 12-bits ; CHECK-LABEL: @mubuf_store2 -; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0 ; encoding: [0x00,0x80 +; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x0 ; encoding: [0x00,0x80 define void @mubuf_store2(i32 addrspace(1)* %out) { entry: %0 = getelementptr i32 addrspace(1)* %out, i64 1024 @@ -88,7 +88,7 @@ entry: ; MUBUF store with a 12-bit immediate offset and a register offset ; CHECK-LABEL: @mubuf_store3 ; CHECK-NOT: ADD -; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 4 ; encoding: [0x04,0x80 +; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80 define void @mubuf_store3(i32 addrspace(1)* %out, i64 %offset) { entry: %0 = getelementptr i32 addrspace(1)* %out, i64 %offset diff --git a/test/CodeGen/R600/mul.ll b/test/CodeGen/R600/mul.ll index e176148..6ed754c 100644 --- a/test/CodeGen/R600/mul.ll +++ b/test/CodeGen/R600/mul.ll @@ -1,15 +1,14 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s -; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s --check-prefix=FUNC +; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s ; mul24 and mad24 are affected -;EG-CHECK: @test2 -;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;FUNC-LABEL: @test2 +;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: @test2 -;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 @@ -20,17 +19,16 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { ret void } -;EG-CHECK: @test4 -;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;FUNC-LABEL: @test4 +;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: @test4 -;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 @@ -52,3 +50,32 @@ define void @trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) { store i32 %trunc, i32 addrspace(1)* %out, align 8 ret void } + +; This 64-bit multiply should just use MUL_HI and MUL_LO, since the top +; 32-bits of both arguments are sign bits. +; FUNC-LABEL: @mul64_sext_c +; EG-DAG: MULLO_INT +; EG-DAG: MULHI_INT +; SI-DAG: V_MUL_LO_I32 +; SI-DAG: V_MUL_HI_I32 +define void @mul64_sext_c(i64 addrspace(1)* %out, i32 %in) { +entry: + %0 = sext i32 %in to i64 + %1 = mul i64 %0, 80 + store i64 %1, i64 addrspace(1)* %out + ret void +} + +; A standard 64-bit multiply. The expansion should be around 6 instructions. +; It would be difficult to match the expansion correctly without writing +; a really complicated list of FileCheck expressions. I don't want +; to confuse people who may 'break' this test with a correct optimization, +; so this test just uses FUNC-LABEL to make sure the compiler does not +; crash with a 'failed to select' error. +; FUNC-LABEL: @mul64 +define void @mul64(i64 addrspace(1)* %out, i64 %a, i64 %b) { +entry: + %0 = mul i64 %a, %b + store i64 %0, i64 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/mul_int24.ll b/test/CodeGen/R600/mul_int24.ll index 66a1a9e..046911b 100644 --- a/test/CodeGen/R600/mul_int24.ll +++ b/test/CodeGen/R600/mul_int24.ll @@ -1,12 +1,15 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK -; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC +; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC +; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC -; EG-CHECK: @i32_mul24 +; FUNC-LABEL: @i32_mul24 ; Signed 24-bit multiply is not supported on pre-Cayman GPUs. -; EG-CHECK: MULLO_INT -; CM-CHECK: MUL_INT24 {{[ *]*}}T{{[0-9].[XYZW]}}, KC0[2].Z, KC0[2].W -; SI-CHECK: V_MUL_I32_I24 +; EG: MULLO_INT +; Make sure we are not masking the inputs +; CM-NOT: AND +; CM: MUL_INT24 +; SI-NOT: AND +; SI: V_MUL_I32_I24 define void @i32_mul24(i32 addrspace(1)* %out, i32 %a, i32 %b) { entry: %0 = shl i32 %a, 8 diff --git a/test/CodeGen/R600/mul_uint24.ll b/test/CodeGen/R600/mul_uint24.ll index a413961..419f275 100644 --- a/test/CodeGen/R600/mul_uint24.ll +++ b/test/CodeGen/R600/mul_uint24.ll @@ -1,11 +1,10 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK -; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC +; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC +; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC -; EG-CHECK-LABEL: @u32_mul24 -; EG-CHECK: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W -; SI-CHECK-LABEL: @u32_mul24 -; SI-CHECK: V_MUL_U32_U24 +; FUNC-LABEL: @u32_mul24 +; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W +; SI: V_MUL_U32_U24 define void @u32_mul24(i32 addrspace(1)* %out, i32 %a, i32 %b) { entry: @@ -18,17 +17,13 @@ entry: ret void } -; EG-CHECK-LABEL: @i16_mul24 -; EG-CHECK-DAG: VTX_READ_16 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40 -; EG-CHECK-DAG: VTX_READ_16 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44 -; The order of A and B does not matter. -; EG-CHECK: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]], [[A]], [[B]] +; FUNC-LABEL: @i16_mul24 +; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]] ; The result must be sign-extended -; EG-CHECK: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x -; EG-CHECK: 16 -; SI-CHECK-LABEL: @i16_mul24 -; SI-CHECK: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}} -; SI-CHECK: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 16, +; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x +; EG: 16 +; SI: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}} +; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 16, define void @i16_mul24(i32 addrspace(1)* %out, i16 %a, i16 %b) { entry: %0 = mul i16 %a, %b @@ -37,16 +32,12 @@ entry: ret void } -; EG-CHECK-LABEL: @i8_mul24 -; EG-CHECK-DAG: VTX_READ_8 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40 -; EG-CHECK-DAG: VTX_READ_8 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44 -; The order of A and B does not matter. -; EG-CHECK: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]], [[A]], [[B]] +; FUNC-LABEL: @i8_mul24 +; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]] ; The result must be sign-extended -; EG-CHECK: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x -; SI-CHECK-LABEL: @i8_mul24 -; SI-CHECK: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}} -; SI-CHECK: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8, +; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x +; SI: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}} +; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8, define void @i8_mul24(i32 addrspace(1)* %out, i8 %a, i8 %b) { entry: @@ -55,3 +46,21 @@ entry: store i32 %1, i32 addrspace(1)* %out ret void } + +; Multiply with 24-bit inputs and 64-bit output +; FUNC_LABEL: @mul24_i64 +; EG; MUL_UINT24 +; EG: MULHI +; SI: V_MUL_U32_U24 +; FIXME: SI support 24-bit mulhi +; SI: V_MUL_HI_U32 +define void @mul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { +entry: + %0 = shl i64 %a, 40 + %a_24 = lshr i64 %0, 40 + %1 = shl i64 %b, 40 + %b_24 = lshr i64 %1, 40 + %2 = mul i64 %a_24, %b_24 + store i64 %2, i64 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/mulhu.ll b/test/CodeGen/R600/mulhu.ll index d5fc014..8640127 100644 --- a/test/CodeGen/R600/mulhu.ll +++ b/test/CodeGen/R600/mulhu.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s -;CHECK: V_MOV_B32_e32 v{{[0-9]+}}, -1431655765 +;CHECK: V_MOV_B32_e32 v{{[0-9]+}}, 0xaaaaaaab ;CHECK: V_MUL_HI_U32 v0, {{[sv][0-9]+}}, {{v[0-9]+}} ;CHECK-NEXT: V_LSHRREV_B32_e32 v0, 1, v0 diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll index 2cc991e..9878366 100644 --- a/test/CodeGen/R600/or.ll +++ b/test/CodeGen/R600/or.ll @@ -89,8 +89,8 @@ define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, } ; SI-LABEL: @vector_or_i64_loadimm -; SI-DAG: S_MOV_B32 [[LO_S_IMM:s[0-9]+]], -545810305 -; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 5231 +; SI-DAG: S_MOV_B32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f +; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 0x146f ; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, ; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] ; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll index 4920320..d3453f2 100644 --- a/test/CodeGen/R600/private-memory.ll +++ b/test/CodeGen/R600/private-memory.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK --check-prefix=FUNC -; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=R600-CHECK --check-prefix=FUNC +; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC ; This test checks that uses and defs of the AR register happen in the same ; instruction clause. @@ -119,7 +119,7 @@ for.end: ; R600-CHECK: * ; R600-CHECK: MOVA_INT -; SI-CHECK: V_MOV_B32_e32 v{{[0-9]}}, 65536 +; SI-CHECK: V_MOV_B32_e32 v{{[0-9]}}, 0x10000 ; SI-CHECK: V_MOVRELS_B32_e32 define void @short_array(i32 addrspace(1)* %out, i32 %index) { entry: @@ -142,7 +142,7 @@ entry: ; R600-CHECK: * ; R600-CHECK-NEXT: MOVA_INT -; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}, 256 +; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}, 0x100 ; SI-CHECK: V_MOVRELS_B32_e32 define void @char_array(i32 addrspace(1)* %out, i32 %index) { entry: diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll index 5a930b2..f322bc7 100644 --- a/test/CodeGen/R600/pv.ll +++ b/test/CodeGen/R600/pv.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=r600 | FileCheck %s ;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED) -;CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X +;CHECK: MAX T{{[0-9].[XYZW]}}, PV.X, 0.0 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7) #0 { main_body: diff --git a/test/CodeGen/R600/register-count-comments.ll b/test/CodeGen/R600/register-count-comments.ll index a64b280..329077c 100644 --- a/test/CodeGen/R600/register-count-comments.ll +++ b/test/CodeGen/R600/register-count-comments.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s declare i32 @llvm.SI.tid() nounwind readnone diff --git a/test/CodeGen/R600/salu-to-valu.ll b/test/CodeGen/R600/salu-to-valu.ll index e461bf9..e7719b6 100644 --- a/test/CodeGen/R600/salu-to-valu.ll +++ b/test/CodeGen/R600/salu-to-valu.ll @@ -46,3 +46,45 @@ declare i32 @llvm.r600.read.tidig.x() #1 declare i32 @llvm.r600.read.tidig.y() #1 attributes #1 = { nounwind readnone } + +; Test moving an SMRD instruction to the VALU + +; CHECK-LABEL: @smrd_valu +; CHECK: BUFFER_LOAD_DWORD [[OUT:v[0-9]+]] +; CHECK: BUFFER_STORE_DWORD [[OUT]] + +define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 addrspace(1)* %out) { +entry: + %0 = icmp ne i32 %a, 0 + br i1 %0, label %if, label %else + +if: + %1 = load i32 addrspace(2)* addrspace(1)* %in + br label %endif + +else: + %2 = getelementptr i32 addrspace(2)* addrspace(1)* %in + %3 = load i32 addrspace(2)* addrspace(1)* %2 + br label %endif + +endif: + %4 = phi i32 addrspace(2)* [%1, %if], [%3, %else] + %5 = getelementptr i32 addrspace(2)* %4, i32 3000 + %6 = load i32 addrspace(2)* %5 + store i32 %6, i32 addrspace(1)* %out + ret void +} + +; Test moving ann SMRD with an immediate offset to the VALU + +; CHECK-LABEL: @smrd_valu2 +; CHECK: BUFFER_LOAD_DWORD +define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) { +entry: + %0 = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %1 = add i32 %0, 4 + %2 = getelementptr [8 x i32] addrspace(2)* %in, i32 %0, i32 4 + %3 = load i32 addrspace(2)* %2 + store i32 %3, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll index 2a286d1..3d2142d 100644 --- a/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll +++ b/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll @@ -1,6 +1,6 @@ ; XFAIL: * ; REQUIRES: asserts -; RUN: llc -O0 -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI +; RUN: llc -O0 -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate diff --git a/test/CodeGen/R600/selectcc.ll b/test/CodeGen/R600/selectcc.ll new file mode 100644 index 0000000..a8f57cf --- /dev/null +++ b/test/CodeGen/R600/selectcc.ll @@ -0,0 +1,19 @@ +; RUN: llc -verify-machineinstrs -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +; FUNC-LABEL: @selectcc_i64 +; EG: XOR_INT +; EG: XOR_INT +; EG: OR_INT +; EG: CNDE_INT +; EG: CNDE_INT +; SI: V_CMP_EQ_I64 +; SI: V_CNDMASK +; SI: V_CNDMASK +define void @selectcc_i64(i64 addrspace(1) * %out, i64 %lhs, i64 %rhs, i64 %true, i64 %false) { +entry: + %0 = icmp eq i64 %lhs, %rhs + %1 = select i1 %0, i64 %true, i64 %false + store i64 %1, i64 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/setcc.ll b/test/CodeGen/R600/setcc.ll index 8d34c4a..5bd95b7 100644 --- a/test/CodeGen/R600/setcc.ll +++ b/test/CodeGen/R600/setcc.ll @@ -1,5 +1,5 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s -;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s +;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s ; FUNC-LABEL: @setcc_v2i32 ; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z @@ -96,7 +96,9 @@ entry: ; R600-DAG: SETNE_INT ; SI: V_CMP_O_F32 ; SI: V_CMP_NEQ_F32 -; SI: S_AND_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_AND_B32_e32 define void @f32_one(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp one float %a, %b @@ -128,7 +130,9 @@ entry: ; R600-DAG: SETNE_INT ; SI: V_CMP_U_F32 ; SI: V_CMP_EQ_F32 -; SI: S_OR_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_OR_B32_e32 define void @f32_ueq(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp ueq float %a, %b @@ -142,7 +146,9 @@ entry: ; R600: SETE_DX10 ; SI: V_CMP_U_F32 ; SI: V_CMP_GT_F32 -; SI: S_OR_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_OR_B32_e32 define void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp ugt float %a, %b @@ -156,7 +162,9 @@ entry: ; R600: SETE_DX10 ; SI: V_CMP_U_F32 ; SI: V_CMP_GE_F32 -; SI: S_OR_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_OR_B32_e32 define void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp uge float %a, %b @@ -170,7 +178,9 @@ entry: ; R600: SETE_DX10 ; SI: V_CMP_U_F32 ; SI: V_CMP_LT_F32 -; SI: S_OR_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_OR_B32_e32 define void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp ult float %a, %b @@ -184,7 +194,9 @@ entry: ; R600: SETE_DX10 ; SI: V_CMP_U_F32 ; SI: V_CMP_LE_F32 -; SI: S_OR_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_OR_B32_e32 define void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp ule float %a, %b diff --git a/test/CodeGen/R600/setcc64.ll b/test/CodeGen/R600/setcc64.ll index 9202fc0..54a33b3 100644 --- a/test/CodeGen/R600/setcc64.ll +++ b/test/CodeGen/R600/setcc64.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s +;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s ; XXX: Merge this into setcc, once R600 supports 64-bit operations @@ -59,7 +59,9 @@ entry: ; FUNC-LABEL: @f64_one ; SI: V_CMP_O_F64 ; SI: V_CMP_NEQ_F64 -; SI: S_AND_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_AND_B32_e32 define void @f64_one(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp one double %a, %b @@ -81,7 +83,9 @@ entry: ; FUNC-LABEL: @f64_ueq ; SI: V_CMP_U_F64 ; SI: V_CMP_EQ_F64 -; SI: S_OR_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_OR_B32_e32 define void @f64_ueq(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp ueq double %a, %b @@ -93,7 +97,9 @@ entry: ; FUNC-LABEL: @f64_ugt ; SI: V_CMP_U_F64 ; SI: V_CMP_GT_F64 -; SI: S_OR_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_OR_B32_e32 define void @f64_ugt(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp ugt double %a, %b @@ -105,7 +111,9 @@ entry: ; FUNC-LABEL: @f64_uge ; SI: V_CMP_U_F64 ; SI: V_CMP_GE_F64 -; SI: S_OR_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_OR_B32_e32 define void @f64_uge(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp uge double %a, %b @@ -117,7 +125,9 @@ entry: ; FUNC-LABEL: @f64_ult ; SI: V_CMP_U_F64 ; SI: V_CMP_LT_F64 -; SI: S_OR_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_OR_B32_e32 define void @f64_ult(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp ult double %a, %b @@ -129,7 +139,9 @@ entry: ; FUNC-LABEL: @f64_ule ; SI: V_CMP_U_F64 ; SI: V_CMP_LE_F64 -; SI: S_OR_B64 +; SI: V_CNDMASK_B32_e64 +; SI: V_CNDMASK_B32_e64 +; SI: V_OR_B32_e32 define void @f64_ule(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp ule double %a, %b diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll index 8633a4b..e90e788 100644 --- a/test/CodeGen/R600/seto.ll +++ b/test/CodeGen/R600/seto.ll @@ -1,6 +1,7 @@ ;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s -;CHECK: V_CMP_O_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0, 0, 0 +;CHECK-LABEL: @main +;CHECK: V_CMP_O_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0 define void @main(float %p) { main_body: diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll index c77a37e..3b1db8b 100644 --- a/test/CodeGen/R600/setuo.ll +++ b/test/CodeGen/R600/setuo.ll @@ -1,6 +1,7 @@ ;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s -;CHECK: V_CMP_U_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0, 0, 0 +;CHECK-LABEL: @main +;CHECK: V_CMP_U_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0 define void @main(float %p) { main_body: diff --git a/test/CodeGen/R600/sext-in-reg.ll b/test/CodeGen/R600/sext-in-reg.ll index eef3f07..1b02e4b 100644 --- a/test/CodeGen/R600/sext-in-reg.ll +++ b/test/CodeGen/R600/sext-in-reg.ll @@ -1,15 +1,18 @@ -; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc < %s -march=r600 -mcpu=cypress | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone ; FUNC-LABEL: @sext_in_reg_i1_i32 ; SI: S_LOAD_DWORD [[ARG:s[0-9]+]], -; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[ARG]], 0, 1 +; SI: S_BFE_I32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000 +; SI: V_MOV_B32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]] ; SI: BUFFER_STORE_DWORD [[EXTRACT]], -; EG: BFE_INT +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] +; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1 +; EG-NEXT: LSHR * [[ADDR]] define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) { %shl = shl i32 %in, 31 %sext = ashr i32 %shl, 31 @@ -19,10 +22,14 @@ define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) { ; FUNC-LABEL: @sext_in_reg_i8_to_i32 ; SI: S_ADD_I32 [[VAL:s[0-9]+]], -; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 8 -; SI: BUFFER_STORE_DWORD [[EXTRACT]], - -; EG: BFE_INT +; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]] +; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] +; SI: BUFFER_STORE_DWORD [[VEXTRACT]], + +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] +; EG: ADD_INT +; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal +; EG-NEXT: LSHR * [[ADDR]] define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %c = add i32 %a, %b ; add to prevent folding into extload %shl = shl i32 %c, 24 @@ -33,10 +40,14 @@ define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounw ; FUNC-LABEL: @sext_in_reg_i16_to_i32 ; SI: S_ADD_I32 [[VAL:s[0-9]+]], -; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 16 -; SI: BUFFER_STORE_DWORD [[EXTRACT]], - -; EG: BFE_INT +; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]] +; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] +; SI: BUFFER_STORE_DWORD [[VEXTRACT]], + +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] +; EG: ADD_INT +; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal +; EG-NEXT: LSHR * [[ADDR]] define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %c = add i32 %a, %b ; add to prevent folding into extload %shl = shl i32 %c, 16 @@ -47,10 +58,14 @@ define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) noun ; FUNC-LABEL: @sext_in_reg_i8_to_v1i32 ; SI: S_ADD_I32 [[VAL:s[0-9]+]], -; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 8 -; SI: BUFFER_STORE_DWORD [[EXTRACT]], - -; EG: BFE_INT +; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]] +; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] +; SI: BUFFER_STORE_DWORD [[VEXTRACT]], + +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] +; EG: ADD_INT +; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal +; EG-NEXT: LSHR * [[ADDR]] define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind { %c = add <1 x i32> %a, %b ; add to prevent folding into extload %shl = shl <1 x i32> %c, <i32 24> @@ -59,13 +74,35 @@ define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, ret void } +; FUNC-LABEL: @sext_in_reg_i1_to_i64 +; SI: S_ADD_I32 [[VAL:s[0-9]+]], +; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10000 +; SI: S_MOV_B32 {{s[0-9]+}}, -1 +; SI: BUFFER_STORE_DWORDX2 +define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %c = add i64 %a, %b + %shl = shl i64 %c, 63 + %ashr = ashr i64 %shl, 63 + store i64 %ashr, i64 addrspace(1)* %out, align 8 + ret void +} + ; FUNC-LABEL: @sext_in_reg_i8_to_i64 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, -; SI: BUFFER_STORE_DWORD +; SI: S_ADD_I32 [[VAL:s[0-9]+]], +; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]] +; SI: S_MOV_B32 {{s[0-9]+}}, -1 +; SI: BUFFER_STORE_DWORDX2 -; EG: BFE_INT -; EG: ASHR +; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]] +; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]] +; EG: ADD_INT +; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal +; EG: ASHR [[RES_HI]] +; EG-NOT: BFE_INT +; EG: LSHR +; EG: LSHR +;; TODO Check address computation, using | with variables in {{}} does not work, +;; also the _LO/_HI order might be different define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { %c = add i64 %a, %b %shl = shl i64 %c, 56 @@ -75,12 +112,21 @@ define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounw } ; FUNC-LABEL: @sext_in_reg_i16_to_i64 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 16 -; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, -; SI: BUFFER_STORE_DWORD +; SI: S_ADD_I32 [[VAL:s[0-9]+]], +; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]] +; SI: S_MOV_B32 {{s[0-9]+}}, -1 +; SI: BUFFER_STORE_DWORDX2 -; EG: BFE_INT -; EG: ASHR +; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]] +; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]] +; EG: ADD_INT +; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal +; EG: ASHR [[RES_HI]] +; EG-NOT: BFE_INT +; EG: LSHR +; EG: LSHR +;; TODO Check address computation, using | with variables in {{}} does not work, +;; also the _LO/_HI order might be different define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { %c = add i64 %a, %b %shl = shl i64 %c, 48 @@ -95,6 +141,17 @@ define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun ; SI: S_ADD_I32 [[ADD:s[0-9]+]], ; SI: S_ASHR_I32 s{{[0-9]+}}, [[ADD]], 31 ; SI: BUFFER_STORE_DWORDX2 + +; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]] +; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]] +; EG-NOT: BFE_INT +; EG: ADD_INT {{\*?}} [[RES_LO]] +; EG: ASHR [[RES_HI]] +; EG: ADD_INT +; EG: LSHR +; EG: LSHR +;; TODO Check address computation, using | with variables in {{}} does not work, +;; also the _LO/_HI order might be different define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { %c = add i64 %a, %b %shl = shl i64 %c, 32 @@ -105,8 +162,8 @@ define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun ; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments. ; XFUNC-LABEL: @sext_in_reg_i8_to_v1i64 -; XSI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; XSI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, +; XSI: S_BFE_I32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288 +; XSI: S_ASHR_I32 {{v[0-9]+}}, [[EXTRACT]], 31 ; XSI: BUFFER_STORE_DWORD ; XEG: BFE_INT ; XEG: ASHR @@ -122,7 +179,13 @@ define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun ; SI-NOT: BFE ; SI: S_LSHL_B32 [[REG:s[0-9]+]], {{s[0-9]+}}, 6 ; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG]], 7 + +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] ; EG-NOT: BFE +; EG: ADD_INT +; EG: LSHL +; EG: ASHR [[RES]] +; EG: LSHR {{\*?}} [[ADDR]] define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %c = add i32 %a, %b %x = shl i32 %c, 6 @@ -136,7 +199,15 @@ define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, ; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG0]], 7 ; SI: S_LSHL_B32 [[REG1:s[0-9]+]], {{s[0-9]}}, 6 ; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG1]], 7 + +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] ; EG-NOT: BFE +; EG: ADD_INT +; EG: LSHL +; EG: ASHR [[RES]] +; EG: LSHL +; EG: ASHR [[RES]] +; EG: LSHR {{\*?}} [[ADDR]] define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { %c = add <2 x i32> %a, %b %x = shl <2 x i32> %c, <i32 6, i32 6> @@ -147,11 +218,14 @@ define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out ; FUNC-LABEL: @sext_in_reg_v2i1_to_v2i32 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1 +; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 +; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 ; SI: BUFFER_STORE_DWORDX2 -; EG: BFE -; EG: BFE + +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] +; EG: BFE_INT [[RES]] +; EG: BFE_INT [[RES]] +; EG: LSHR {{\*?}} [[ADDR]] define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { %c = add <2 x i32> %a, %b ; add to prevent folding into extload %shl = shl <2 x i32> %c, <i32 31, i32 31> @@ -161,16 +235,18 @@ define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> % } ; FUNC-LABEL: @sext_in_reg_v4i1_to_v4i32 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1 +; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 +; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 +; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 +; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 ; SI: BUFFER_STORE_DWORDX4 -; EG: BFE -; EG: BFE -; EG: BFE -; EG: BFE +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] +; EG: BFE_INT [[RES]] +; EG: BFE_INT [[RES]] +; EG: BFE_INT [[RES]] +; EG: BFE_INT [[RES]] +; EG: LSHR {{\*?}} [[ADDR]] define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind { %c = add <4 x i32> %a, %b ; add to prevent folding into extload %shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31> @@ -180,12 +256,14 @@ define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> % } ; FUNC-LABEL: @sext_in_reg_v2i8_to_v2i32 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 +; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} +; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} ; SI: BUFFER_STORE_DWORDX2 -; EG: BFE -; EG: BFE +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] +; EG: BFE_INT [[RES]] +; EG: BFE_INT [[RES]] +; EG: LSHR {{\*?}} [[ADDR]] define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { %c = add <2 x i32> %a, %b ; add to prevent folding into extload %shl = shl <2 x i32> %c, <i32 24, i32 24> @@ -195,16 +273,18 @@ define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> % } ; FUNC-LABEL: @sext_in_reg_v4i8_to_v4i32 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 +; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} +; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} +; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} +; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} ; SI: BUFFER_STORE_DWORDX4 -; EG: BFE -; EG: BFE -; EG: BFE -; EG: BFE +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] +; EG: BFE_INT [[RES]] +; EG: BFE_INT [[RES]] +; EG: BFE_INT [[RES]] +; EG: BFE_INT [[RES]] +; EG: LSHR {{\*?}} [[ADDR]] define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind { %c = add <4 x i32> %a, %b ; add to prevent folding into extload %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24> @@ -214,16 +294,18 @@ define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> % } ; FUNC-LABEL: @sext_in_reg_v2i16_to_v2i32 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 +; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}} +; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}} ; SI: BUFFER_STORE_DWORDX2 -; EG: BFE -; EG: BFE +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] +; EG: BFE_INT [[RES]] +; EG: BFE_INT [[RES]] +; EG: LSHR {{\*?}} [[ADDR]] define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { %c = add <2 x i32> %a, %b ; add to prevent folding into extload - %shl = shl <2 x i32> %c, <i32 24, i32 24> - %ashr = ashr <2 x i32> %shl, <i32 24, i32 24> + %shl = shl <2 x i32> %c, <i32 16, i32 16> + %ashr = ashr <2 x i32> %shl, <i32 16, i32 16> store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8 ret void } @@ -252,8 +334,36 @@ define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind { ret void } +; FUNC-LABEL: @vgpr_sext_in_reg_v4i8_to_v4i32 +; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 +; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 +; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 +; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 +define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind { + %loada = load <4 x i32> addrspace(1)* %a, align 16 + %loadb = load <4 x i32> addrspace(1)* %b, align 16 + %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload + %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24> + %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24> + store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @vgpr_sext_in_reg_v4i16_to_v4i32 +; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16 +; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16 +define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind { + %loada = load <4 x i32> addrspace(1)* %a, align 16 + %loadb = load <4 x i32> addrspace(1)* %b, align 16 + %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload + %shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16> + %ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16> + store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 + ret void +} + ; FIXME: The BFE should really be eliminated. I think it should happen -; when computeMaskedBitsForTargetNode is implemented for imax. +; when computeKnownBitsForTargetNode is implemented for imax. ; FUNC-LABEL: @sext_in_reg_to_illegal_type ; SI: BUFFER_LOAD_SBYTE @@ -269,3 +379,146 @@ define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 ad store i16 %tmp6, i16 addrspace(1)* %out, align 2 ret void } + +declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone + +; FUNC-LABEL: @bfe_0_width +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %load = load i32 addrspace(1)* %ptr, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_8_bfe_8 +; SI: V_BFE_I32 +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %load = load i32 addrspace(1)* %ptr, align 4 + %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone + %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone + store i32 %bfe1, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_8_bfe_16 +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 +; SI: S_ENDPGM +define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %load = load i32 addrspace(1)* %ptr, align 4 + %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone + %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone + store i32 %bfe1, i32 addrspace(1)* %out, align 4 + ret void +} + +; This really should be folded into 1 +; FUNC-LABEL: @bfe_16_bfe_8 +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %load = load i32 addrspace(1)* %ptr, align 4 + %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone + %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone + store i32 %bfe1, i32 addrspace(1)* %out, align 4 + ret void +} + +; Make sure there isn't a redundant BFE +; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe +; SI: S_SEXT_I32_I8 s{{[0-9]+}}, s{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %c = add i32 %a, %b ; add to prevent folding into extload + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone + %shl = shl i32 %bfe, 24 + %ashr = ashr i32 %shl, 24 + store i32 %ashr, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe_wrong +define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %c = add i32 %a, %b ; add to prevent folding into extload + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone + %shl = shl i32 %bfe, 24 + %ashr = ashr i32 %shl, 24 + store i32 %ashr, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sextload_i8_to_i32_bfe +; SI: BUFFER_LOAD_SBYTE +; SI-NOT: BFE +; SI: S_ENDPGM +define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind { + %load = load i8 addrspace(1)* %ptr, align 1 + %sext = sext i8 %load to i32 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone + %shl = shl i32 %bfe, 24 + %ashr = ashr i32 %shl, 24 + store i32 %ashr, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sextload_i8_to_i32_bfe_0: +; SI-NOT: BFE +; SI: S_ENDPGM +define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind { + %load = load i8 addrspace(1)* %ptr, align 1 + %sext = sext i8 %load to i32 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone + %shl = shl i32 %bfe, 24 + %ashr = ashr i32 %shl, 24 + store i32 %ashr, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_0: +; SI-NOT: SHR +; SI-NOT: SHL +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1 +; SI: S_ENDPGM +define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %shr = ashr i32 %shl, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_1 +; SI: BUFFER_LOAD_DWORD +; SI-NOT: SHL +; SI-NOT: SHR +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1 +; SI: S_ENDPGM +define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 30 + %shr = ashr i32 %shl, 30 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sext_in_reg_i2_bfe_offset_1: +; SI: BUFFER_LOAD_DWORD +; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}} +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}} +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2 +; SI: S_ENDPGM +define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 30 + %shr = ashr i32 %shl, 30 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/sgpr-control-flow.ll b/test/CodeGen/R600/sgpr-control-flow.ll new file mode 100644 index 0000000..06ad24d --- /dev/null +++ b/test/CodeGen/R600/sgpr-control-flow.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; +; +; Most SALU instructions ignore control flow, so we need to make sure +; they don't overwrite values from other blocks. + +; SI-NOT: S_ADD + +define void @sgpr_if_else(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) { +entry: + %0 = icmp eq i32 %a, 0 + br i1 %0, label %if, label %else + +if: + %1 = add i32 %b, %c + br label %endif + +else: + %2 = add i32 %d, %e + br label %endif + +endif: + %3 = phi i32 [%1, %if], [%2, %else] + %4 = add i32 %3, %a + store i32 %4, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll index d74161b..9d8a623 100644 --- a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll +++ b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; Copy VGPR -> SGPR used twice as an instruction operand, which is then ; used in an REG_SEQUENCE that also needs to be handled. diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/R600/sgpr-copy.ll index 5472c1b..c581d86 100644 --- a/test/CodeGen/R600/sgpr-copy.ll +++ b/test/CodeGen/R600/sgpr-copy.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s +; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s ; This test checks that no VGPR to SGPR copies are created by the register ; allocator. diff --git a/test/CodeGen/R600/si-annotate-cf-assertion.ll b/test/CodeGen/R600/si-annotate-cf-assertion.ll index cd3ba2b..daa4667 100644 --- a/test/CodeGen/R600/si-annotate-cf-assertion.ll +++ b/test/CodeGen/R600/si-annotate-cf-assertion.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts ; XFAIL: * -; RUN: llc -march=r600 -mcpu=SI -asm-verbose=false < %s | FileCheck %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs-asm-verbose=false < %s | FileCheck %s define void @test(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind { diff --git a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll new file mode 100644 index 0000000..d9f60ea --- /dev/null +++ b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll @@ -0,0 +1,36 @@ +; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s + +; 64-bit select was originally lowered with a build_pair, and this +; could be simplified to 1 cndmask instead of 2, but that broken when +; it started being implemented with a v2i32 build_vector and +; bitcasting. +define void @trunc_select_i64(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, i64 %a, i64 %b + %trunc = trunc i64 %select to i32 + store i32 %trunc, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @trunc_load_alloca_i64: +; SI: V_MOVRELS_B32 +; SI-NOT: V_MOVRELS_B32 +; SI: S_ENDPGM +define void @trunc_load_alloca_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) { + %idx = add i32 %a, %b + %alloca = alloca i64, i32 4 + %gep0 = getelementptr i64* %alloca, i64 0 + %gep1 = getelementptr i64* %alloca, i64 1 + %gep2 = getelementptr i64* %alloca, i64 2 + %gep3 = getelementptr i64* %alloca, i64 3 + store i64 24, i64* %gep0, align 8 + store i64 9334, i64* %gep1, align 8 + store i64 3935, i64* %gep2, align 8 + store i64 9342, i64* %gep3, align 8 + %gep = getelementptr i64* %alloca, i32 %idx + %load = load i64* %gep, align 8 + %mask = and i64 %load, 4294967296 + %add = add i64 %mask, -1 + store i64 %add, i64 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/smrd.ll b/test/CodeGen/R600/smrd.ll index 43231df..dec6185 100644 --- a/test/CodeGen/R600/smrd.ll +++ b/test/CodeGen/R600/smrd.ll @@ -2,7 +2,7 @@ ; SMRD load with an immediate offset. ; CHECK-LABEL: @smrd0 -; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 1 ; encoding: [0x01 +; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01 define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { entry: %0 = getelementptr i32 addrspace(2)* %ptr, i64 1 @@ -13,7 +13,7 @@ entry: ; SMRD load with the largest possible immediate offset. ; CHECK-LABEL: @smrd1 -; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 255 ; encoding: [0xff +; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { entry: %0 = getelementptr i32 addrspace(2)* %ptr, i64 255 @@ -24,7 +24,7 @@ entry: ; SMRD load with an offset greater than the largest possible immediate. ; CHECK-LABEL: @smrd2 -; CHECK: S_MOV_B32 s[[OFFSET:[0-9]]], 1024 +; CHECK: S_MOV_B32 s[[OFFSET:[0-9]]], 0x400 ; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]] define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { entry: @@ -34,9 +34,27 @@ entry: ret void } +; SMRD load with a 64-bit offset +; CHECK-LABEL: @smrd3 +; CHECK-DAG: S_MOV_B32 s[[SHI:[0-9]+]], 4 +; CHECK-DAG: S_MOV_B32 s[[SLO:[0-9]+]], 0 +; FIXME: We don't need to copy these values to VGPRs +; CHECK-DAG: V_MOV_B32_e32 v[[VHI:[0-9]+]], s[[SHI]] +; CHECK-DAG: V_MOV_B32_e32 v[[VLO:[0-9]+]], s[[SLO]] +; FIXME: We should be able to use S_LOAD_DWORD here +; BUFFER_LOAD_DWORD v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] + v[[[VLO]]:[[VHI]]] + 0x0 + +define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { +entry: + %0 = getelementptr i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32 + %1 = load i32 addrspace(2)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + ; SMRD load using the load.const intrinsic with an immediate offset ; CHECK-LABEL: @smrd_load_const0 -; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 4 ; encoding: [0x04 +; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04 define void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0 @@ -49,7 +67,7 @@ main_body: ; SMRD load using the load.const intrinsic with an offset greater largest possible ; immediate offset. ; CHECK-LABEL: @smrd_load_const1 -; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 255 ; encoding: [0xff +; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff define void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0 diff --git a/test/CodeGen/R600/store-v3i64.ll b/test/CodeGen/R600/store-v3i64.ll index 58229f6..58d28b5 100644 --- a/test/CodeGen/R600/store-v3i64.ll +++ b/test/CodeGen/R600/store-v3i64.ll @@ -1,5 +1,5 @@ ; XFAIL: * -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI ; SI-LABEL: @global_store_v3i64: ; SI: BUFFER_STORE_DWORDX4 diff --git a/test/CodeGen/R600/store-vector-ptrs.ll b/test/CodeGen/R600/store-vector-ptrs.ll index 3af7d91..41c5edc 100644 --- a/test/CodeGen/R600/store-vector-ptrs.ll +++ b/test/CodeGen/R600/store-vector-ptrs.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts ; XFAIL: * -; RUN: llc -march=r600 -mcpu=SI < %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s define void @store_vector_ptrs(<4 x i32*>* %out, <4 x [1024 x i32]*> %array) nounwind { %p = getelementptr <4 x [1024 x i32]*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16> diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll index a3c5331..c0c8ccc 100644 --- a/test/CodeGen/R600/store.ll +++ b/test/CodeGen/R600/store.ll @@ -177,6 +177,26 @@ entry: ret void } +; FUNC-LABEL: @store_i64_i8 +; EG-CHECK: MEM_RAT MSKOR +; SI-CHECK: BUFFER_STORE_BYTE +define void @store_i64_i8(i8 addrspace(1)* %out, i64 %in) { +entry: + %0 = trunc i64 %in to i8 + store i8 %0, i8 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @store_i64_i16 +; EG-CHECK: MEM_RAT MSKOR +; SI-CHECK: BUFFER_STORE_SHORT +define void @store_i64_i16(i16 addrspace(1)* %out, i64 %in) { +entry: + %0 = trunc i64 %in to i16 + store i16 %0, i16 addrspace(1)* %out + ret void +} + ;===------------------------------------------------------------------------===; ; Local Address Space ;===------------------------------------------------------------------------===; @@ -272,6 +292,26 @@ entry: ret void } +; FUNC-LABEL: @store_local_i64_i8 +; EG-CHECK: LDS_BYTE_WRITE +; SI-CHECK: DS_WRITE_B8 +define void @store_local_i64_i8(i8 addrspace(3)* %out, i64 %in) { +entry: + %0 = trunc i64 %in to i8 + store i8 %0, i8 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: @store_local_i64_i16 +; EG-CHECK: LDS_SHORT_WRITE +; SI-CHECK: DS_WRITE_B16 +define void @store_local_i64_i16(i16 addrspace(3)* %out, i64 %in) { +entry: + %0 = trunc i64 %in to i16 + store i16 %0, i16 addrspace(3)* %out + ret void +} + ; The stores in this function are combined by the optimizer to create a ; 64-bit store with 32-bit alignment. This is legal for SI and the legalizer ; should not try to split the 64-bit store back into 2 32-bit stores. @@ -297,3 +337,29 @@ entry: } attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } + +; When i128 was a legal type this program generated cannot select errors: + +; FUNC-LABEL: @i128-const-store +; FIXME: We should be able to to this with one store instruction +; EG-CHECK: STORE_RAW +; EG-CHECK: STORE_RAW +; EG-CHECK: STORE_RAW +; EG-CHECK: STORE_RAW +; CM-CHECK: STORE_DWORD +; CM-CHECK: STORE_DWORD +; CM-CHECK: STORE_DWORD +; CM-CHECK: STORE_DWORD +; SI: BUFFER_STORE_DWORDX2 +; SI: BUFFER_STORE_DWORDX2 +define void @i128-const-store(i32 addrspace(1)* %out) { +entry: + store i32 1, i32 addrspace(1)* %out, align 4 + %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %out, i64 1 + store i32 1, i32 addrspace(1)* %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds i32 addrspace(1)* %out, i64 2 + store i32 2, i32 addrspace(1)* %arrayidx4, align 4 + %arrayidx6 = getelementptr inbounds i32 addrspace(1)* %out, i64 3 + store i32 2, i32 addrspace(1)* %arrayidx6, align 4 + ret void +} diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll index 5fdd2b8..e321ed6 100644 --- a/test/CodeGen/R600/sub.ll +++ b/test/CodeGen/R600/sub.ll @@ -1,13 +1,12 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s +;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s -;EG-CHECK: @test2 -;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;FUNC-LABEL: @test2 +;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: @test2 -;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 @@ -18,17 +17,16 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { ret void } -;EG-CHECK: @test4 -;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;FUNC-LABEL: @test4 +;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI-CHECK: @test4 -;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 @@ -38,3 +36,24 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { store <4 x i32> %result, <4 x i32> addrspace(1)* %out ret void } + +;FUNC_LABEL: @test5 + +;EG-DAG: SETGE_UINT +;EG-DAG: CNDE_INT +;EG-DAG: SUB_INT +;EG-DAG: SUB_INT +;EG-DAG: SUB_INT + +;SI: S_XOR_B64 +;SI-DAG: S_ADD_I32 +;SI-DAG: S_ADDC_U32 +;SI-DAG: S_ADD_I32 +;SI-DAG: S_ADDC_U32 + +define void @test5(i64 addrspace(1)* %out, i64 %a, i64 %b) { +entry: + %0 = sub i64 %a, %b + store i64 %0, i64 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/trunc-store-i1.ll b/test/CodeGen/R600/trunc-store-i1.ll index a888943..a3975c8 100644 --- a/test/CodeGen/R600/trunc-store-i1.ll +++ b/test/CodeGen/R600/trunc-store-i1.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; SI-LABEL: @global_truncstore_i32_to_i1 diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/R600/trunc.ll index 8a759dc..31cdfcd 100644 --- a/test/CodeGen/R600/trunc.ll +++ b/test/CodeGen/R600/trunc.ll @@ -3,7 +3,7 @@ define void @trunc_i64_to_i32_store(i32 addrspace(1)* %out, i64 %in) { ; SI-LABEL: @trunc_i64_to_i32_store -; SI: S_LOAD_DWORD s0, s[0:1], 11 +; SI: S_LOAD_DWORD s0, s[0:1], 0xb ; SI: V_MOV_B32_e32 v0, s0 ; SI: BUFFER_STORE_DWORD v0 @@ -31,8 +31,9 @@ define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) { ; SI-LABEL: @trunc_shl_i64: ; SI: S_LOAD_DWORDX2 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}}, -; SI: V_ADD_I32_e32 v[[LO_ADD:[0-9]+]], s[[LO_SREG]], -; SI: V_LSHL_B64 v{{\[}}[[LO_VREG:[0-9]+]]:{{[0-9]+\]}}, v{{\[}}[[LO_ADD]]:{{[0-9]+\]}}, 2 +; SI: S_ADD_I32 s[[LO_ADD:[0-9]+]], s[[LO_SREG]], +; SI: S_LSHL_B64 s{{\[}}[[LO_SREG2:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_ADD]]:{{[0-9]+\]}}, 2 +; SI: V_MOV_B32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]] ; SI: BUFFER_STORE_DWORD v[[LO_VREG]], define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) { %aa = add i64 %a, 234 ; Prevent shrinking store. diff --git a/test/CodeGen/R600/uaddo.ll b/test/CodeGen/R600/uaddo.ll new file mode 100644 index 0000000..3b69687 --- /dev/null +++ b/test/CodeGen/R600/uaddo.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s + +declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone + +; SI-LABEL: @uaddo_i64_zext +; SI: ADD +; SI: ADDC +; SI: ADDC +define void @uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind + %val = extractvalue { i64, i1 } %uadd, 0 + %carry = extractvalue { i64, i1 } %uadd, 1 + %ext = zext i1 %carry to i64 + %add2 = add i64 %val, %ext + store i64 %add2, i64 addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/udivrem64.ll b/test/CodeGen/R600/udivrem64.ll new file mode 100644 index 0000000..a71315a --- /dev/null +++ b/test/CodeGen/R600/udivrem64.ll @@ -0,0 +1,82 @@ +;XUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s + +;FUNC-LABEL: @test_udiv +;EG: RECIP_UINT +;EG: LSHL {{.*}}, 1, +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;SI: S_ENDPGM +define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { + %result = udiv i64 %x, %y + store i64 %result, i64 addrspace(1)* %out + ret void +} + +;FUNC-LABEL: @test_urem +;EG: RECIP_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: BFE_UINT +;EG: AND_INT {{.*}}, 1, +;SI: S_ENDPGM +define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) { + %result = urem i64 %x, %y + store i64 %result, i64 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/uint_to_fp.f64.ll b/test/CodeGen/R600/uint_to_fp.f64.ll new file mode 100644 index 0000000..75150c2 --- /dev/null +++ b/test/CodeGen/R600/uint_to_fp.f64.ll @@ -0,0 +1,9 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +; SI-LABEL: @uint_to_fp_f64_i32 +; SI: V_CVT_F64_U32_e32 +define void @uint_to_fp_f64_i32(double addrspace(1)* %out, i32 %in) { + %cast = uitofp i32 %in to double + store double %cast, double addrspace(1)* %out, align 8 + ret void +} diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll index 2824ff8..4df69d1 100644 --- a/test/CodeGen/R600/unaligned-load-store.ll +++ b/test/CodeGen/R600/unaligned-load-store.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; SI-LABEL: @unaligned_load_store_i32: ; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]] diff --git a/test/CodeGen/R600/v_cndmask.ll b/test/CodeGen/R600/v_cndmask.ll index f8e9655..84087ee 100644 --- a/test/CodeGen/R600/v_cndmask.ll +++ b/test/CodeGen/R600/v_cndmask.ll @@ -3,7 +3,8 @@ ; SI: @v_cnd_nan ; SI: V_CNDMASK_B32_e64 v{{[0-9]}}, ; SI-DAG: v{{[0-9]}} -; SI-DAG: {{nan|#QNAN}} +; All nan values are converted to 0xffffffff +; SI-DAG: -1 define void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) { entry: %0 = icmp ne i32 %c, 0 diff --git a/test/CodeGen/R600/valu-i1.ll b/test/CodeGen/R600/valu-i1.ll new file mode 100644 index 0000000..5d5e3ff --- /dev/null +++ b/test/CodeGen/R600/valu-i1.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s + +; Make sure the i1 values created by the cfg structurizer pass are +; moved using VALU instructions +; SI-NOT: S_MOV_B64 s[{{[0-9]:[0-9]}}], -1 +; SI: V_MOV_B32_e32 v{{[0-9]}}, -1 +define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) { +entry: + switch i32 %a, label %default [ + i32 0, label %case0 + i32 1, label %case1 + ] + +case0: + %arrayidx1 = getelementptr i32 addrspace(1)* %dst, i32 %b + store i32 0, i32 addrspace(1)* %arrayidx1, align 4 + br label %end + +case1: + %arrayidx5 = getelementptr i32 addrspace(1)* %dst, i32 %b + store i32 1, i32 addrspace(1)* %arrayidx5, align 4 + br label %end + +default: + %cmp8 = icmp eq i32 %a, 2 + %arrayidx10 = getelementptr i32 addrspace(1)* %dst, i32 %b + br i1 %cmp8, label %if, label %else + +if: + store i32 2, i32 addrspace(1)* %arrayidx10, align 4 + br label %end + +else: + store i32 3, i32 addrspace(1)* %arrayidx10, align 4 + br label %end + +end: + ret void +} diff --git a/test/CodeGen/R600/work-item-intrinsics.ll b/test/CodeGen/R600/work-item-intrinsics.ll index 9618d7f..90079b0 100644 --- a/test/CodeGen/R600/work-item-intrinsics.ll +++ b/test/CodeGen/R600/work-item-intrinsics.ll @@ -19,7 +19,7 @@ entry: ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] ; R600-CHECK: MOV [[VAL]], KC0[0].Y ; SI-CHECK: @ngroups_y -; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 1 +; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x1 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]] define void @ngroups_y (i32 addrspace(1)* %out) { @@ -33,7 +33,7 @@ entry: ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] ; R600-CHECK: MOV [[VAL]], KC0[0].Z ; SI-CHECK: @ngroups_z -; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 2 +; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x2 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]] define void @ngroups_z (i32 addrspace(1)* %out) { @@ -47,7 +47,7 @@ entry: ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] ; R600-CHECK: MOV [[VAL]], KC0[0].W ; SI-CHECK: @global_size_x -; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 3 +; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x3 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]] define void @global_size_x (i32 addrspace(1)* %out) { @@ -61,7 +61,7 @@ entry: ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] ; R600-CHECK: MOV [[VAL]], KC0[1].X ; SI-CHECK: @global_size_y -; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 4 +; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x4 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]] define void @global_size_y (i32 addrspace(1)* %out) { @@ -75,7 +75,7 @@ entry: ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] ; R600-CHECK: MOV [[VAL]], KC0[1].Y ; SI-CHECK: @global_size_z -; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 5 +; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x5 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]] define void @global_size_z (i32 addrspace(1)* %out) { @@ -89,7 +89,7 @@ entry: ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] ; R600-CHECK: MOV [[VAL]], KC0[1].Z ; SI-CHECK: @local_size_x -; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 6 +; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x6 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]] define void @local_size_x (i32 addrspace(1)* %out) { @@ -103,7 +103,7 @@ entry: ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] ; R600-CHECK: MOV [[VAL]], KC0[1].W ; SI-CHECK: @local_size_y -; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 7 +; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x7 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]] define void @local_size_y (i32 addrspace(1)* %out) { @@ -117,7 +117,7 @@ entry: ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] ; R600-CHECK: MOV [[VAL]], KC0[2].X ; SI-CHECK: @local_size_z -; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 8 +; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x8 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]] define void @local_size_z (i32 addrspace(1)* %out) { diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/R600/xor.ll index 49ed12d..5a5c86d 100644 --- a/test/CodeGen/R600/xor.ll +++ b/test/CodeGen/R600/xor.ll @@ -72,3 +72,21 @@ define void @scalar_xor_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { store i32 %result, i32 addrspace(1)* %out ret void } + +; SI-CHECK-LABEL: @scalar_not_i32 +; SI-CHECK: S_NOT_B32 +define void @scalar_not_i32(i32 addrspace(1)* %out, i32 %a) { + %result = xor i32 %a, -1 + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; SI-CHECK-LABEL: @vector_not_i32 +; SI-CHECK: V_NOT_B32 +define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) { + %a = load i32 addrspace(1)* %in0 + %b = load i32 addrspace(1)* %in1 + %result = xor i32 %a, -1 + store i32 %result, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/zero_extend.ll b/test/CodeGen/R600/zero_extend.ll index a114bfc..8585d4a 100644 --- a/test/CodeGen/R600/zero_extend.ll +++ b/test/CodeGen/R600/zero_extend.ll @@ -6,8 +6,9 @@ ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW ; SI-CHECK: @test -; SI-CHECK: V_MOV_B32_e32 v[[ZERO:[0-9]]], 0 -; SI-CHECK: BUFFER_STORE_DWORDX2 v[0:[[ZERO]]{{\]}} +; SI-CHECK: S_MOV_B32 [[ZERO:s[0-9]]], 0 +; SI-CHECK: V_MOV_B32_e32 v[[V_ZERO:[0-9]]], [[ZERO]] +; SI-CHECK: BUFFER_STORE_DWORDX2 v[0:[[V_ZERO]]{{\]}} define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { entry: %0 = mul i32 %a, %b @@ -26,3 +27,14 @@ entry: store i32 %1, i32 addrspace(1)* %out ret void } + +; SI-CHECK-LABEL: @zext_i1_to_i64 +; SI-CHECK: V_CMP_EQ_I32 +; SI-CHECK: V_CNDMASK_B32 +; SI-CHECK: S_MOV_B32 s{{[0-9]+}}, 0 +define void @zext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %cmp = icmp eq i32 %a, %b + %ext = zext i1 %cmp to i64 + store i64 %ext, i64 addrspace(1)* %out, align 8 + ret void +} |