aboutsummaryrefslogtreecommitdiffstats
path: root/test/CodeGen/R600
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-05-29 02:49:00 -0700
committerStephen Hines <srhines@google.com>2014-05-29 02:49:00 -0700
commitdce4a407a24b04eebc6a376f8e62b41aaa7b071f (patch)
treedcebc53f2b182f145a2e659393bf9a0472cedf23 /test/CodeGen/R600
parent220b921aed042f9e520c26cffd8282a94c66c3d5 (diff)
downloadexternal_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.zip
external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.gz
external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.bz2
Update LLVM for 3.5 rebase (r209712).
Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
Diffstat (limited to 'test/CodeGen/R600')
-rw-r--r--test/CodeGen/R600/32-bit-local-address-space.ll8
-rw-r--r--test/CodeGen/R600/64bit-kernel-args.ll4
-rw-r--r--test/CodeGen/R600/add.ll25
-rw-r--r--test/CodeGen/R600/add_i64.ll2
-rw-r--r--test/CodeGen/R600/address-space.ll6
-rw-r--r--test/CodeGen/R600/array-ptr-calc-i32.ll2
-rw-r--r--test/CodeGen/R600/array-ptr-calc-i64.ll2
-rw-r--r--test/CodeGen/R600/call.ll33
-rw-r--r--test/CodeGen/R600/extload.ll11
-rw-r--r--test/CodeGen/R600/extract_vector_elt_i16.ll29
-rw-r--r--test/CodeGen/R600/fabs.ll11
-rw-r--r--test/CodeGen/R600/fconst64.ll4
-rw-r--r--test/CodeGen/R600/fneg.ll13
-rw-r--r--test/CodeGen/R600/fp_to_uint.f64.ll9
-rw-r--r--test/CodeGen/R600/gep-address-space.ll4
-rw-r--r--test/CodeGen/R600/gv-const-addrspace-fail.ll58
-rw-r--r--test/CodeGen/R600/gv-const-addrspace.ll57
-rw-r--r--test/CodeGen/R600/infinite-loop.ll2
-rw-r--r--test/CodeGen/R600/insert_vector_elt.ll28
-rw-r--r--test/CodeGen/R600/insert_vector_elt_f64.ll2
-rw-r--r--test/CodeGen/R600/kernel-args.ll32
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll388
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll514
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.imad24.ll21
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.imul24.ll15
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.umad24.ll19
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.umul24.ll17
-rw-r--r--test/CodeGen/R600/llvm.SI.tbuffer.store.ll8
-rw-r--r--test/CodeGen/R600/llvm.cos.ll43
-rw-r--r--test/CodeGen/R600/llvm.rint.f64.ll37
-rw-r--r--test/CodeGen/R600/llvm.rint.ll49
-rw-r--r--test/CodeGen/R600/llvm.sin.ll44
-rw-r--r--test/CodeGen/R600/llvm.sqrt.ll2
-rw-r--r--test/CodeGen/R600/load-i1.ll107
-rw-r--r--test/CodeGen/R600/local-64.ll52
-rw-r--r--test/CodeGen/R600/local-memory-two-objects.ll4
-rw-r--r--test/CodeGen/R600/loop-idiom.ll2
-rw-r--r--test/CodeGen/R600/mad_int24.ll17
-rw-r--r--test/CodeGen/R600/mad_uint24.ll67
-rw-r--r--test/CodeGen/R600/mubuf.ll16
-rw-r--r--test/CodeGen/R600/mul.ll63
-rw-r--r--test/CodeGen/R600/mul_int24.ll17
-rw-r--r--test/CodeGen/R600/mul_uint24.ll61
-rw-r--r--test/CodeGen/R600/mulhu.ll2
-rw-r--r--test/CodeGen/R600/or.ll4
-rw-r--r--test/CodeGen/R600/private-memory.ll8
-rw-r--r--test/CodeGen/R600/pv.ll2
-rw-r--r--test/CodeGen/R600/register-count-comments.ll2
-rw-r--r--test/CodeGen/R600/salu-to-valu.ll42
-rw-r--r--test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll2
-rw-r--r--test/CodeGen/R600/selectcc.ll19
-rw-r--r--test/CodeGen/R600/setcc.ll26
-rw-r--r--test/CodeGen/R600/setcc64.ll26
-rw-r--r--test/CodeGen/R600/seto.ll3
-rw-r--r--test/CodeGen/R600/setuo.ll3
-rw-r--r--test/CodeGen/R600/sext-in-reg.ll371
-rw-r--r--test/CodeGen/R600/sgpr-control-flow.ll27
-rw-r--r--test/CodeGen/R600/sgpr-copy-duplicate-operand.ll2
-rw-r--r--test/CodeGen/R600/sgpr-copy.ll2
-rw-r--r--test/CodeGen/R600/si-annotate-cf-assertion.ll2
-rw-r--r--test/CodeGen/R600/simplify-demanded-bits-build-pair.ll36
-rw-r--r--test/CodeGen/R600/smrd.ll28
-rw-r--r--test/CodeGen/R600/store-v3i64.ll2
-rw-r--r--test/CodeGen/R600/store-vector-ptrs.ll2
-rw-r--r--test/CodeGen/R600/store.ll66
-rw-r--r--test/CodeGen/R600/sub.ll55
-rw-r--r--test/CodeGen/R600/trunc-store-i1.ll2
-rw-r--r--test/CodeGen/R600/trunc.ll7
-rw-r--r--test/CodeGen/R600/uaddo.ll17
-rw-r--r--test/CodeGen/R600/udivrem64.ll82
-rw-r--r--test/CodeGen/R600/uint_to_fp.f64.ll9
-rw-r--r--test/CodeGen/R600/unaligned-load-store.ll2
-rw-r--r--test/CodeGen/R600/v_cndmask.ll3
-rw-r--r--test/CodeGen/R600/valu-i1.ll39
-rw-r--r--test/CodeGen/R600/work-item-intrinsics.ll16
-rw-r--r--test/CodeGen/R600/xor.ll18
-rw-r--r--test/CodeGen/R600/zero_extend.ll16
77 files changed, 2488 insertions, 360 deletions
diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll
index fffaefe..7dec426 100644
--- a/test/CodeGen/R600/32-bit-local-address-space.ll
+++ b/test/CodeGen/R600/32-bit-local-address-space.ll
@@ -33,7 +33,7 @@ entry:
; CHECK-LABEL: @local_address_gep_const_offset
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
-; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VPTR]], 4,
+; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VPTR]], 0x4,
define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
%0 = getelementptr i32 addrspace(3)* %in, i32 1
@@ -44,7 +44,7 @@ entry:
; Offset too large, can't fold into 16-bit immediate offset.
; CHECK-LABEL: @local_address_gep_large_const_offset
-; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 65540
+; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; CHECK: DS_READ_B32 [[VPTR]]
define void @local_address_gep_large_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
@@ -119,7 +119,7 @@ define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32
; CHECK-LABEL: @local_address_gep_const_offset_store
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
; CHECK: V_MOV_B32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
-; CHECK: DS_WRITE_B32 [[VPTR]], [[VAL]], 4
+; CHECK: DS_WRITE_B32 [[VPTR]], [[VAL]], 0x4
define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
%gep = getelementptr i32 addrspace(3)* %out, i32 1
store i32 %val, i32 addrspace(3)* %gep, align 4
@@ -128,7 +128,7 @@ define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %v
; Offset too large, can't fold into 16-bit immediate offset.
; CHECK-LABEL: @local_address_gep_large_const_offset_store
-; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 65540
+; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
; CHECK: DS_WRITE_B32 [[VPTR]], v{{[0-9]+}}, 0
define void @local_address_gep_large_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
diff --git a/test/CodeGen/R600/64bit-kernel-args.ll b/test/CodeGen/R600/64bit-kernel-args.ll
index 0d6bfb1..2d82c1e 100644
--- a/test/CodeGen/R600/64bit-kernel-args.ll
+++ b/test/CodeGen/R600/64bit-kernel-args.ll
@@ -1,8 +1,8 @@
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
; SI-CHECK: @f64_kernel_arg
-; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 9
-; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 11
+; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
+; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
; SI-CHECK: BUFFER_STORE_DWORDX2
define void @f64_kernel_arg(double addrspace(1)* %out, double %in) {
entry:
diff --git a/test/CodeGen/R600/add.ll b/test/CodeGen/R600/add.ll
index e9db52a..711a2bc 100644
--- a/test/CodeGen/R600/add.ll
+++ b/test/CodeGen/R600/add.ll
@@ -140,3 +140,28 @@ entry:
store i64 %1, i64 addrspace(1)* %out
ret void
}
+
+; Test i64 add inside a branch. We don't allow SALU instructions inside of
+; branches.
+; FIXME: We are being conservative here. We could allow this in some cases.
+; FUNC-LABEL: @add64_in_branch
+; SI-CHECK-NOT: S_ADD_I32
+; SI-CHECK-NOT: S_ADDC_U32
+define void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
+entry:
+ %0 = icmp eq i64 %a, 0
+ br i1 %0, label %if, label %else
+
+if:
+ %1 = load i64 addrspace(1)* %in
+ br label %endif
+
+else:
+ %2 = add i64 %a, %b
+ br label %endif
+
+endif:
+ %3 = phi i64 [%1, %if], [%2, %else]
+ store i64 %3, i64 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/R600/add_i64.ll
index 7081b07..c9eaeda 100644
--- a/test/CodeGen/R600/add_i64.ll
+++ b/test/CodeGen/R600/add_i64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
declare i32 @llvm.r600.read.tidig.x() readnone
diff --git a/test/CodeGen/R600/address-space.ll b/test/CodeGen/R600/address-space.ll
index 15d2ed2..f75a8ac 100644
--- a/test/CodeGen/R600/address-space.ll
+++ b/test/CodeGen/R600/address-space.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s
; Test that codegenprepare understands address space sizes
@@ -10,8 +10,8 @@
; CHECK-LABEL: @do_as_ptr_calcs:
; CHECK: S_LOAD_DWORD [[SREG1:s[0-9]+]],
; CHECK: V_MOV_B32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
-; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VREG1]], 20
-; CHECK: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 12
+; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VREG1]], 0x14
+; CHECK: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0xc
define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
entry:
%x = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
diff --git a/test/CodeGen/R600/array-ptr-calc-i32.ll b/test/CodeGen/R600/array-ptr-calc-i32.ll
index cb2a1c8..c2362da 100644
--- a/test/CodeGen/R600/array-ptr-calc-i32.ll
+++ b/test/CodeGen/R600/array-ptr-calc-i32.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
declare i32 @llvm.SI.tid() nounwind readnone
declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
diff --git a/test/CodeGen/R600/array-ptr-calc-i64.ll b/test/CodeGen/R600/array-ptr-calc-i64.ll
index 652bbfe..e254c5f 100644
--- a/test/CodeGen/R600/array-ptr-calc-i64.ll
+++ b/test/CodeGen/R600/array-ptr-calc-i64.ll
@@ -1,5 +1,5 @@
; XFAIL: *
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI %s
declare i32 @llvm.SI.tid() readnone
diff --git a/test/CodeGen/R600/call.ll b/test/CodeGen/R600/call.ll
new file mode 100644
index 0000000..d803474
--- /dev/null
+++ b/test/CodeGen/R600/call.ll
@@ -0,0 +1,33 @@
+; RUN: not llc -march=r600 -mcpu=SI -verify-machineinstrs< %s 2>&1 | FileCheck %s
+; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s
+
+; CHECK: error: unsupported call to function defined_function in test_call
+
+
+declare i32 @external_function(i32) nounwind
+
+define i32 @defined_function(i32 %x) nounwind noinline {
+ %y = add i32 %x, 8
+ ret i32 %y
+}
+
+define void @test_call(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %a = load i32 addrspace(1)* %in
+ %b = load i32 addrspace(1)* %b_ptr
+ %c = call i32 @defined_function(i32 %b) nounwind
+ %result = add i32 %a, %c
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+define void @test_call_external(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+ %a = load i32 addrspace(1)* %in
+ %b = load i32 addrspace(1)* %b_ptr
+ %c = call i32 @external_function(i32 %b) nounwind
+ %result = add i32 %a, %c
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
diff --git a/test/CodeGen/R600/extload.ll b/test/CodeGen/R600/extload.ll
index 2e70d47..dc056e0 100644
--- a/test/CodeGen/R600/extload.ll
+++ b/test/CodeGen/R600/extload.ll
@@ -1,5 +1,5 @@
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: @anyext_load_i8:
; EG: AND_INT
@@ -87,8 +87,9 @@ define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)
}
; FUNC-LABEL: @zextload_global_i8_to_i64
+; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0
; SI: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]],
-; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
+; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
; SI: BUFFER_STORE_DWORDX2
define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
%a = load i8 addrspace(1)* %in, align 8
@@ -98,8 +99,9 @@ define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)*
}
; FUNC-LABEL: @zextload_global_i16_to_i64
+; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0
; SI: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]],
-; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
+; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
; SI: BUFFER_STORE_DWORDX2
define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
%a = load i16 addrspace(1)* %in, align 8
@@ -109,8 +111,9 @@ define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)
}
; FUNC-LABEL: @zextload_global_i32_to_i64
+; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0
; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
-; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
+; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
; SI: BUFFER_STORE_DWORDX2
define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%a = load i32 addrspace(1)* %in, align 8
diff --git a/test/CodeGen/R600/extract_vector_elt_i16.ll b/test/CodeGen/R600/extract_vector_elt_i16.ll
new file mode 100644
index 0000000..5cd1b04
--- /dev/null
+++ b/test/CodeGen/R600/extract_vector_elt_i16.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FUNC-LABEL: @extract_vector_elt_v2i16
+; SI: BUFFER_LOAD_USHORT
+; SI: BUFFER_STORE_SHORT
+; SI: BUFFER_LOAD_USHORT
+; SI: BUFFER_STORE_SHORT
+define void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> %foo) nounwind {
+ %p0 = extractelement <2 x i16> %foo, i32 0
+ %p1 = extractelement <2 x i16> %foo, i32 1
+ %out1 = getelementptr i16 addrspace(1)* %out, i32 1
+ store i16 %p1, i16 addrspace(1)* %out, align 2
+ store i16 %p0, i16 addrspace(1)* %out1, align 2
+ ret void
+}
+
+; FUNC-LABEL: @extract_vector_elt_v4i16
+; SI: BUFFER_LOAD_USHORT
+; SI: BUFFER_STORE_SHORT
+; SI: BUFFER_LOAD_USHORT
+; SI: BUFFER_STORE_SHORT
+define void @extract_vector_elt_v4i16(i16 addrspace(1)* %out, <4 x i16> %foo) nounwind {
+ %p0 = extractelement <4 x i16> %foo, i32 0
+ %p1 = extractelement <4 x i16> %foo, i32 2
+ %out1 = getelementptr i16 addrspace(1)* %out, i32 1
+ store i16 %p1, i16 addrspace(1)* %out, align 2
+ store i16 %p0, i16 addrspace(1)* %out1, align 2
+ ret void
+}
diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll
index 2cd3a4f..b87ce22 100644
--- a/test/CodeGen/R600/fabs.ll
+++ b/test/CodeGen/R600/fabs.ll
@@ -49,6 +49,17 @@ entry:
ret void
}
+; SI-CHECK-LABEL: @fabs_fold
+; SI-CHECK-NOT: V_AND_B32_e32
+; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, s{{[0-9]+}}, |v{{[0-9]+}}|
+define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) {
+entry:
+ %0 = call float @fabs(float %in0)
+ %1 = fmul float %0, %in1
+ store float %1, float addrspace(1)* %out
+ ret void
+}
+
declare float @fabs(float ) readnone
declare <2 x float> @llvm.fabs.v2f32(<2 x float> ) readnone
declare <4 x float> @llvm.fabs.v4f32(<4 x float> ) readnone
diff --git a/test/CodeGen/R600/fconst64.ll b/test/CodeGen/R600/fconst64.ll
index 5c5ee7e..9c3a7e3 100644
--- a/test/CodeGen/R600/fconst64.ll
+++ b/test/CodeGen/R600/fconst64.ll
@@ -1,8 +1,8 @@
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
; CHECK: @fconst_f64
-; CHECK: V_MOV_B32_e32 {{v[0-9]+}}, 0.000000e+00
-; CHECK-NEXT: V_MOV_B32_e32 {{v[0-9]+}}, 2.312500e+00
+; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0x40140000
+; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0
define void @fconst_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
%r1 = load double addrspace(1)* %in
diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll
index f4e6be6..4cddc73 100644
--- a/test/CodeGen/R600/fneg.ll
+++ b/test/CodeGen/R600/fneg.ll
@@ -51,7 +51,7 @@ entry:
; R600-CHECK: -KC0[2].Z
; SI-CHECK-LABEL: @fneg_free
; XXX: We could use V_ADD_F32_e64 with the negate bit here instead.
-; SI-CHECK: V_SUB_F32_e64 v{{[0-9]}}, 0.000000e+00, s{{[0-9]}}, 0, 0, 0, 0
+; SI-CHECK: V_SUB_F32_e64 v{{[0-9]}}, 0.000000e+00, s{{[0-9]}}, 0, 0
define void @fneg_free(float addrspace(1)* %out, i32 %in) {
entry:
%0 = bitcast i32 %in to float
@@ -59,3 +59,14 @@ entry:
store float %1, float addrspace(1)* %out
ret void
}
+
+; SI-CHECK-LABEL: @fneg_fold
+; SI-CHECK-NOT: V_XOR_B32
+; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
+define void @fneg_fold(float addrspace(1)* %out, float %in) {
+entry:
+ %0 = fsub float -0.0, %in
+ %1 = fmul float %0, %in
+ store float %1, float addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/fp_to_uint.f64.ll b/test/CodeGen/R600/fp_to_uint.f64.ll
new file mode 100644
index 0000000..bf607ce
--- /dev/null
+++ b/test/CodeGen/R600/fp_to_uint.f64.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; SI-LABEL: @fp_to_uint_i32_f64
+; SI: V_CVT_U32_F64_e32
+define void @fp_to_uint_i32_f64(i32 addrspace(1)* %out, double %in) {
+ %cast = fptoui double %in to i32
+ store i32 %cast, i32 addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/gep-address-space.ll b/test/CodeGen/R600/gep-address-space.ll
index ee914fa..ab2c0bf 100644
--- a/test/CodeGen/R600/gep-address-space.ll
+++ b/test/CodeGen/R600/gep-address-space.ll
@@ -1,9 +1,9 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s
define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
; CHECK-LABEL: @use_gep_address_space:
; CHECK: V_MOV_B32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}}
-; CHECK: DS_WRITE_B32 [[PTR]], v{{[0-9]+}}, 64
+; CHECK: DS_WRITE_B32 [[PTR]], v{{[0-9]+}}, 0x40
%p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16
store i32 99, i32 addrspace(3)* %p
ret void
diff --git a/test/CodeGen/R600/gv-const-addrspace-fail.ll b/test/CodeGen/R600/gv-const-addrspace-fail.ll
new file mode 100644
index 0000000..ebd7811
--- /dev/null
+++ b/test/CodeGen/R600/gv-const-addrspace-fail.ll
@@ -0,0 +1,58 @@
+; XFAIL: *
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+
+@a = internal addrspace(2) constant [1 x i8] [ i8 7 ], align 1
+
+; FUNC-LABEL: @test_i8
+; EG: CF_END
+; SI: BUFFER_STORE_BYTE
+; SI: S_ENDPGM
+define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 {
+ %arrayidx = getelementptr inbounds [1 x i8] addrspace(2)* @a, i32 0, i32 %s
+ %1 = load i8 addrspace(2)* %arrayidx, align 1
+ store i8 %1, i8 addrspace(1)* %out
+ ret void
+}
+
+@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2
+
+; FUNC-LABEL: @test_i16
+; EG: CF_END
+; SI: BUFFER_STORE_SHORT
+; SI: S_ENDPGM
+define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 {
+ %arrayidx = getelementptr inbounds [1 x i16] addrspace(2)* @b, i32 0, i32 %s
+ %1 = load i16 addrspace(2)* %arrayidx, align 2
+ store i16 %1, i16 addrspace(1)* %out
+ ret void
+}
+
+%struct.bar = type { float, [5 x i8] }
+
+; The illegal i8s aren't handled
+@struct_bar_gv = internal addrspace(2) unnamed_addr constant [1 x %struct.bar] [ %struct.bar { float 16.0, [5 x i8] [i8 0, i8 1, i8 2, i8 3, i8 4] } ]
+
+; FUNC-LABEL: @struct_bar_gv_load
+define void @struct_bar_gv_load(i8 addrspace(1)* %out, i32 %index) {
+ %gep = getelementptr inbounds [1 x %struct.bar] addrspace(2)* @struct_bar_gv, i32 0, i32 0, i32 1, i32 %index
+ %load = load i8 addrspace(2)* %gep, align 1
+ store i8 %load, i8 addrspace(1)* %out, align 1
+ ret void
+}
+
+
+; The private load isn't scalarzied.
+@array_vector_gv = internal addrspace(2) constant [4 x <4 x i32>] [ <4 x i32> <i32 1, i32 2, i32 3, i32 4>,
+ <4 x i32> <i32 5, i32 6, i32 7, i32 8>,
+ <4 x i32> <i32 9, i32 10, i32 11, i32 12>,
+ <4 x i32> <i32 13, i32 14, i32 15, i32 16> ]
+
+; FUNC-LABEL: @array_vector_gv_load
+define void @array_vector_gv_load(<4 x i32> addrspace(1)* %out, i32 %index) {
+ %gep = getelementptr inbounds [4 x <4 x i32>] addrspace(2)* @array_vector_gv, i32 0, i32 %index
+ %load = load <4 x i32> addrspace(2)* %gep, align 16
+ store <4 x i32> %load, <4 x i32> addrspace(1)* %out, align 16
+ ret void
+}
diff --git a/test/CodeGen/R600/gv-const-addrspace.ll b/test/CodeGen/R600/gv-const-addrspace.ll
index cda7ab1..0176061 100644
--- a/test/CodeGen/R600/gv-const-addrspace.ll
+++ b/test/CodeGen/R600/gv-const-addrspace.ll
@@ -1,4 +1,8 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 --check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+
+@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2
; XXX: Test on SI once 64-bit adds are supportes.
@@ -6,12 +10,12 @@
; FUNC-LABEL: @float
-; R600-DAG: MOV {{\** *}}T2.X
-; R600-DAG: MOV {{\** *}}T3.X
-; R600-DAG: MOV {{\** *}}T4.X
-; R600-DAG: MOV {{\** *}}T5.X
-; R600-DAG: MOV {{\** *}}T6.X
-; R600: MOVA_INT
+; EG-DAG: MOV {{\** *}}T2.X
+; EG-DAG: MOV {{\** *}}T3.X
+; EG-DAG: MOV {{\** *}}T4.X
+; EG-DAG: MOV {{\** *}}T5.X
+; EG-DAG: MOV {{\** *}}T6.X
+; EG: MOVA_INT
define void @float(float addrspace(1)* %out, i32 %index) {
entry:
@@ -25,12 +29,12 @@ entry:
; FUNC-LABEL: @i32
-; R600-DAG: MOV {{\** *}}T2.X
-; R600-DAG: MOV {{\** *}}T3.X
-; R600-DAG: MOV {{\** *}}T4.X
-; R600-DAG: MOV {{\** *}}T5.X
-; R600-DAG: MOV {{\** *}}T6.X
-; R600: MOVA_INT
+; EG-DAG: MOV {{\** *}}T2.X
+; EG-DAG: MOV {{\** *}}T3.X
+; EG-DAG: MOV {{\** *}}T4.X
+; EG-DAG: MOV {{\** *}}T5.X
+; EG-DAG: MOV {{\** *}}T6.X
+; EG: MOVA_INT
define void @i32(i32 addrspace(1)* %out, i32 %index) {
entry:
@@ -39,3 +43,30 @@ entry:
store i32 %1, i32 addrspace(1)* %out
ret void
}
+
+
+%struct.foo = type { float, [5 x i32] }
+
+@struct_foo_gv = internal addrspace(2) unnamed_addr constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
+
+; FUNC-LABEL: @struct_foo_gv_load
+
+define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
+ %gep = getelementptr inbounds [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
+ %load = load i32 addrspace(2)* %gep, align 4
+ store i32 %load, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+@array_v1_gv = internal addrspace(2) constant [4 x <1 x i32>] [ <1 x i32> <i32 1>,
+ <1 x i32> <i32 2>,
+ <1 x i32> <i32 3>,
+ <1 x i32> <i32 4> ]
+
+; FUNC-LABEL: @array_v1_gv_load
+define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
+ %gep = getelementptr inbounds [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
+ %load = load <1 x i32> addrspace(2)* %gep, align 4
+ store <1 x i32> %load, <1 x i32> addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/infinite-loop.ll b/test/CodeGen/R600/infinite-loop.ll
index a60bc37..68ffaae 100644
--- a/test/CodeGen/R600/infinite-loop.ll
+++ b/test/CodeGen/R600/infinite-loop.ll
@@ -1,7 +1,7 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: @infinite_loop:
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 999
+; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x3e7
; SI: BB0_1:
; SI: BUFFER_STORE_DWORD [[REG]]
; SI: S_WAITCNT vmcnt(0) expcnt(0)
diff --git a/test/CodeGen/R600/insert_vector_elt.ll b/test/CodeGen/R600/insert_vector_elt.ll
index 530d1cc..43b4efc 100644
--- a/test/CodeGen/R600/insert_vector_elt.ll
+++ b/test/CodeGen/R600/insert_vector_elt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
; FIXME: Broken on evergreen
; FIXME: For some reason the 8 and 16 vectors are being stored as
@@ -173,3 +173,29 @@ define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8>
store <16 x i8> %vecins, <16 x i8> addrspace(1)* %out, align 16
ret void
}
+
+; This test requires handling INSERT_SUBREG in SIFixSGPRCopies. Check that
+; the compiler doesn't crash.
+; SI-LABEL: @insert_split_bb
+define void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b) {
+entry:
+ %0 = insertelement <2 x i32> undef, i32 %a, i32 0
+ %1 = icmp eq i32 %a, 0
+ br i1 %1, label %if, label %else
+
+if:
+ %2 = load i32 addrspace(1)* %in
+ %3 = insertelement <2 x i32> %0, i32 %2, i32 1
+ br label %endif
+
+else:
+ %4 = getelementptr i32 addrspace(1)* %in, i32 1
+ %5 = load i32 addrspace(1)* %4
+ %6 = insertelement <2 x i32> %0, i32 %5, i32 1
+ br label %endif
+
+endif:
+ %7 = phi <2 x i32> [%3, %if], [%6, %else]
+ store <2 x i32> %7, <2 x i32> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/insert_vector_elt_f64.ll b/test/CodeGen/R600/insert_vector_elt_f64.ll
index e334be1..595bc59 100644
--- a/test/CodeGen/R600/insert_vector_elt_f64.ll
+++ b/test/CodeGen/R600/insert_vector_elt_f64.ll
@@ -1,6 +1,6 @@
; REQUIRES: asserts
; XFAIL: *
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; SI-LABEL: @dynamic_insertelement_v2f64:
diff --git a/test/CodeGen/R600/kernel-args.ll b/test/CodeGen/R600/kernel-args.ll
index 247e316..6fc6979 100644
--- a/test/CodeGen/R600/kernel-args.ll
+++ b/test/CodeGen/R600/kernel-args.ll
@@ -17,7 +17,7 @@ entry:
; EG-CHECK-LABEL: @i8_zext_arg
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: @i8_zext_arg
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
entry:
@@ -29,7 +29,7 @@ entry:
; EG-CHECK-LABEL: @i8_sext_arg
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: @i8_sext_arg
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
entry:
@@ -53,7 +53,7 @@ entry:
; EG-CHECK-LABEL: @i16_zext_arg
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: @i16_zext_arg
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
entry:
@@ -65,7 +65,7 @@ entry:
; EG-CHECK-LABEL: @i16_sext_arg
; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: @i16_sext_arg
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
entry:
@@ -77,7 +77,7 @@ entry:
; EG-CHECK-LABEL: @i32_arg
; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: @i32_arg
-; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
entry:
store i32 %in, i32 addrspace(1)* %out, align 4
@@ -87,7 +87,7 @@ entry:
; EG-CHECK-LABEL: @f32_arg
; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
; SI-CHECK-LABEL: @f32_arg
-; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
entry:
store float %in, float addrspace(1)* %out, align 4
@@ -122,7 +122,7 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
; SI-CHECK-LABEL: @v2i32_arg
-; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 11
+; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
entry:
store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
@@ -133,7 +133,7 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
; SI-CHECK-LABEL: @v2f32_arg
-; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 11
+; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
entry:
store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
@@ -166,7 +166,7 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; SI-CHECK-LABEL: @v3i32_arg
-; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 13
+; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
entry:
store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
@@ -178,7 +178,7 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; SI-CHECK-LABEL: @v3f32_arg
-; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 13
+; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
entry:
store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
@@ -223,7 +223,7 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
; SI-CHECK-LABEL: @v4i32_arg
-; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 13
+; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
entry:
store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
@@ -236,7 +236,7 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
; SI-CHECK-LABEL: @v4f32_arg
-; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 13
+; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
entry:
store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
@@ -300,7 +300,7 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
; SI-CHECK-LABEL: @v8i32_arg
-; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 17
+; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
entry:
store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
@@ -317,7 +317,7 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
; SI-CHECK-LABEL: @v8f32_arg
-; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 17
+; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
entry:
store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
@@ -422,7 +422,7 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
; SI-CHECK-LABEL: @v16i32_arg
-; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 25
+; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
entry:
store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
@@ -447,7 +447,7 @@ entry:
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
; SI-CHECK-LABEL: @v16f32_arg
-; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 25
+; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
entry:
store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
index c3f000a..eb50942 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
@@ -1,11 +1,12 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
; FUNC-LABEL: @bfe_i32_arg_arg_arg
; SI: V_BFE_I32
; EG: BFE_INT
+; EG: encoding: [{{[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+}},0xac
define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
@@ -38,3 +39,388 @@ define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) n
store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
ret void
}
+
+; FUNC-LABEL: @v_bfe_print_arg
+; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
+define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) nounwind {
+ %load = load i32 addrspace(1)* %src0, align 4
+ %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 2, i32 8) nounwind readnone
+ store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_arg_0_width_reg_offset
+; SI-NOT: BFE
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 0) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_arg_0_width_imm_offset
+; SI-NOT: BFE
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 8, i32 0) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_test_6
+; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI: S_ENDPGM
+define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %shl = shl i32 %x, 31
+ %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 1, i32 31)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_test_7
+; SI-NOT: SHL
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %shl = shl i32 %x, 31
+ %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 0, i32 31)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FIXME: The shifts should be 1 BFE
+; FUNC-LABEL: @bfe_i32_test_8
+; SI: BUFFER_LOAD_DWORD
+; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
+; SI: S_ENDPGM
+define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %shl = shl i32 %x, 31
+ %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_test_9
+; SI-NOT: BFE
+; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_test_10
+; SI-NOT: BFE
+; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_test_11
+; SI-NOT: BFE
+; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_test_12
+; SI-NOT: BFE
+; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_test_13
+; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %shl = ashr i32 %x, 31
+ %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
+}
+
+; FUNC-LABEL: @bfe_i32_test_14
+; SI-NOT: LSHR
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %shl = lshr i32 %x, 31
+ %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_0
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
+ %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 0) nounwind readnone
+ store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_1
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
+ %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 12334, i32 0, i32 0) nounwind readnone
+ store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_2
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
+ %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 1) nounwind readnone
+ store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_3
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
+ %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 1, i32 0, i32 1) nounwind readnone
+ store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_4
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
+ %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 0, i32 1) nounwind readnone
+ store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_5
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
+ %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 7, i32 1) nounwind readnone
+ store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_6
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0xffffff80
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
+ %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 0, i32 8) nounwind readnone
+ store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_7
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
+ %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 0, i32 8) nounwind readnone
+ store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_8
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
+ %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 6, i32 8) nounwind readnone
+ store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_9
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
+ %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65536, i32 16, i32 8) nounwind readnone
+ store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_10
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
+ %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65535, i32 16, i32 16) nounwind readnone
+ store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_11
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -6
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
+ %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 4) nounwind readnone
+ store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_12
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
+ %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 31, i32 1) nounwind readnone
+ store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_13
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
+ %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 131070, i32 16, i32 16) nounwind readnone
+ store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_14
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
+ %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 2, i32 30) nounwind readnone
+ store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_15
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
+ %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 28) nounwind readnone
+ store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_16
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
+ %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 1, i32 7) nounwind readnone
+ store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_17
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
+ %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 1, i32 31) nounwind readnone
+ store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_18
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
+ %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 31, i32 1) nounwind readnone
+ store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; XXX - This should really be a single BFE, but the sext_inreg of the
+; extended type i24 is never custom lowered.
+; FUNC-LABEL: @bfe_sext_in_reg_i24
+; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
+; SI: V_LSHLREV_B32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
+; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
+; XSI: V_BFE_I32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8
+; XSI-NOT: SHL
+; XSI-NOT: SHR
+; XSI: BUFFER_STORE_DWORD [[BFE]],
+define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24)
+ %shl = shl i32 %bfe, 8
+ %ashr = ashr i32 %shl, 8
+ store i32 %ashr, i32 addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
index 0d47863..1a62253 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
@@ -38,3 +38,517 @@ define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) n
store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
ret void
}
+
+; FUNC-LABEL: @bfe_u32_arg_0_width_reg_offset
+; SI-NOT: BFE
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 0) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_arg_0_width_imm_offset
+; SI-NOT: BFE
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 8, i32 0) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_zextload_i8
+; SI: BUFFER_LOAD_UBYTE
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
+ %load = load i8 addrspace(1)* %in
+ %ext = zext i8 %load to i32
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_zext_in_reg_i8
+; SI: BUFFER_LOAD_DWORD
+; SI: V_ADD_I32
+; SI-NEXT: V_AND_B32_e32
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %load = load i32 addrspace(1)* %in, align 4
+ %add = add i32 %load, 1
+ %ext = and i32 %add, 255
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_zext_in_reg_i16
+; SI: BUFFER_LOAD_DWORD
+; SI: V_ADD_I32
+; SI-NEXT: V_AND_B32_e32
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %load = load i32 addrspace(1)* %in, align 4
+ %add = add i32 %load, 1
+ %ext = and i32 %add, 65535
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 16)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_1
+; SI: BUFFER_LOAD_DWORD
+; SI: V_ADD_I32
+; SI: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %load = load i32 addrspace(1)* %in, align 4
+ %add = add i32 %load, 1
+ %ext = and i32 %add, 255
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 1, i32 8)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_3
+; SI: BUFFER_LOAD_DWORD
+; SI: V_ADD_I32
+; SI-NEXT: V_AND_B32_e32 {{v[0-9]+}}, 0xf8
+; SI-NEXT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %load = load i32 addrspace(1)* %in, align 4
+ %add = add i32 %load, 1
+ %ext = and i32 %add, 255
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 3, i32 8)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_7
+; SI: BUFFER_LOAD_DWORD
+; SI: V_ADD_I32
+; SI-NEXT: V_AND_B32_e32 {{v[0-9]+}}, 0x80
+; SI-NEXT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %load = load i32 addrspace(1)* %in, align 4
+ %add = add i32 %load, 1
+ %ext = and i32 %add, 255
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 7, i32 8)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_zext_in_reg_i16_offset_8
+; SI: BUFFER_LOAD_DWORD
+; SI: V_ADD_I32
+; SI-NEXT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %load = load i32 addrspace(1)* %in, align 4
+ %add = add i32 %load, 1
+ %ext = and i32 %add, 65535
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 8, i32 8)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_1
+; SI: BUFFER_LOAD_DWORD
+; SI: V_AND_B32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
+; SI: S_ENDPGM
+; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1,
+define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 0, i32 1)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+define void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %shl = shl i32 %x, 31
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 8)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %shl = shl i32 %x, 31
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 1)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_4
+; SI-NOT: LSHL
+; SI-NOT: SHR
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %shl = shl i32 %x, 31
+ %shr = lshr i32 %shl, 31
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 31, i32 1)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_5
+; SI: BUFFER_LOAD_DWORD
+; SI-NOT: LSHL
+; SI-NOT: SHR
+; SI: V_BFE_I32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
+; SI: S_ENDPGM
+define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %shl = shl i32 %x, 31
+ %shr = ashr i32 %shl, 31
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 0, i32 1)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_6
+; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI: S_ENDPGM
+define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %shl = shl i32 %x, 31
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 1, i32 31)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_7
+; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %shl = shl i32 %x, 31
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 31)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_8
+; SI-NOT: BFE
+; SI: V_AND_B32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %shl = shl i32 %x, 31
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_9
+; SI-NOT: BFE
+; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_10
+; SI-NOT: BFE
+; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_11
+; SI-NOT: BFE
+; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_12
+; SI-NOT: BFE
+; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_13
+; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %shl = ashr i32 %x, 31
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_14
+; SI-NOT: LSHR
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %shl = lshr i32 %x, 31
+ %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_0
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 0) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_1
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 12334, i32 0, i32 0) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_2
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 1) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_3
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 1, i32 0, i32 1) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_4
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 0, i32 1) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_5
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 7, i32 1) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_6
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x80
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 0, i32 8) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_7
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 0, i32 8) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_8
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 6, i32 8) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_9
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFEfppppppppppppp
+define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65536, i32 16, i32 8) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_10
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65535, i32 16, i32 16) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_11
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 4) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_12
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 31, i32 1) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_13
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 131070, i32 16, i32 16) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_14
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 2, i32 30) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_15
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 28) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_16
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 1, i32 7) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_17
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 1, i32 31) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_18
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
+ %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 31, i32 1) nounwind readnone
+ store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imad24.ll b/test/CodeGen/R600/llvm.AMDGPU.imad24.ll
new file mode 100644
index 0000000..95795ea
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.imad24.ll
@@ -0,0 +1,21 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=r770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+
+; FIXME: Store of i32 seems to be broken pre-EG somehow?
+
+declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) nounwind readnone
+
+; FUNC-LABEL: @test_imad24
+; SI: V_MAD_I32_I24
+; CM: MULADD_INT24
+; R600: MULLO_INT
+; R600: ADD_INT
+define void @test_imad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
+ %mad = call i32 @llvm.AMDGPU.imad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone
+ store i32 %mad, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imul24.ll b/test/CodeGen/R600/llvm.AMDGPU.imul24.ll
new file mode 100644
index 0000000..8ee3520
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.imul24.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+
+declare i32 @llvm.AMDGPU.imul24(i32, i32) nounwind readnone
+
+; FUNC-LABEL: @test_imul24
+; SI: V_MUL_I32_I24
+; CM: MUL_INT24
+; R600: MULLO_INT
+define void @test_imul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
+ %mul = call i32 @llvm.AMDGPU.imul24(i32 %src0, i32 %src1) nounwind readnone
+ store i32 %mul, i32 addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
new file mode 100644
index 0000000..afdfb18
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=rv770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+
+declare i32 @llvm.AMDGPU.umad24(i32, i32, i32) nounwind readnone
+
+; FUNC-LABEL: @test_umad24
+; SI: V_MAD_U32_U24
+; EG: MULADD_UINT24
+; R600: MULLO_UINT
+; R600: ADD_INT
+define void @test_umad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
+ %mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone
+ store i32 %mad, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umul24.ll b/test/CodeGen/R600/llvm.AMDGPU.umul24.ll
new file mode 100644
index 0000000..72a3602
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.umul24.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=r770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+
+declare i32 @llvm.AMDGPU.umul24(i32, i32) nounwind readnone
+
+; FUNC-LABEL: @test_umul24
+; SI: V_MUL_U32_U24
+; R600: MUL_UINT24
+; R600: MULLO_UINT
+define void @test_umul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
+ %mul = call i32 @llvm.AMDGPU.umul24(i32 %src0, i32 %src1) nounwind readnone
+ store i32 %mul, i32 addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.SI.tbuffer.store.ll b/test/CodeGen/R600/llvm.SI.tbuffer.store.ll
index 569efb6..740581a 100644
--- a/test/CodeGen/R600/llvm.SI.tbuffer.store.ll
+++ b/test/CodeGen/R600/llvm.SI.tbuffer.store.ll
@@ -1,7 +1,7 @@
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
;CHECK-LABEL: @test1
-;CHECK: TBUFFER_STORE_FORMAT_XYZW {{v\[[0-9]+:[0-9]+\]}}, 32, -1, 0, -1, 0, 14, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+;CHECK: TBUFFER_STORE_FORMAT_XYZW {{v\[[0-9]+:[0-9]+\]}}, 0x20, -1, 0, -1, 0, 14, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
define void @test1(i32 %a1, i32 %vaddr) #0 {
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
@@ -11,7 +11,7 @@ define void @test1(i32 %a1, i32 %vaddr) #0 {
}
;CHECK-LABEL: @test2
-;CHECK: TBUFFER_STORE_FORMAT_XYZ {{v\[[0-9]+:[0-9]+\]}}, 24, -1, 0, -1, 0, 13, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+;CHECK: TBUFFER_STORE_FORMAT_XYZ {{v\[[0-9]+:[0-9]+\]}}, 0x18, -1, 0, -1, 0, 13, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
define void @test2(i32 %a1, i32 %vaddr) #0 {
%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
@@ -21,7 +21,7 @@ define void @test2(i32 %a1, i32 %vaddr) #0 {
}
;CHECK-LABEL: @test3
-;CHECK: TBUFFER_STORE_FORMAT_XY {{v\[[0-9]+:[0-9]+\]}}, 16, -1, 0, -1, 0, 11, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+;CHECK: TBUFFER_STORE_FORMAT_XY {{v\[[0-9]+:[0-9]+\]}}, 0x10, -1, 0, -1, 0, 11, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
define void @test3(i32 %a1, i32 %vaddr) #0 {
%vdata = insertelement <2 x i32> undef, i32 %a1, i32 0
call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> undef, <2 x i32> %vdata,
@@ -31,7 +31,7 @@ define void @test3(i32 %a1, i32 %vaddr) #0 {
}
;CHECK-LABEL: @test4
-;CHECK: TBUFFER_STORE_FORMAT_X {{v[0-9]+}}, 8, -1, 0, -1, 0, 4, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+;CHECK: TBUFFER_STORE_FORMAT_X {{v[0-9]+}}, 0x8, -1, 0, -1, 0, 4, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
define void @test4(i32 %vdata, i32 %vaddr) #0 {
call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %vdata,
i32 1, i32 %vaddr, i32 0, i32 8, i32 4, i32 4, i32 1, i32 0, i32 1,
diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll
index aaf2305..9e7a4de 100644
--- a/test/CodeGen/R600/llvm.cos.ll
+++ b/test/CodeGen/R600/llvm.cos.ll
@@ -1,19 +1,40 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -check-prefix=EG -check-prefix=FUNC
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s -check-prefix=SI -check-prefix=FUNC
-;CHECK: MULADD_IEEE *
-;CHECK: FRACT *
-;CHECK: ADD *
-;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;FUNC-LABEL: test
+;EG: MULADD_IEEE *
+;EG: FRACT *
+;EG: ADD *
+;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;EG-NOT: COS
+;SI: V_COS_F32
+;SI-NOT: V_COS_F32
-define void @test(<4 x float> inreg %reg0) #0 {
- %r0 = extractelement <4 x float> %reg0, i32 0
- %r1 = call float @llvm.cos.f32(float %r0)
- %vec = insertelement <4 x float> undef, float %r1, i32 0
- call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
+define void @test(float addrspace(1)* %out, float %x) #1 {
+ %cos = call float @llvm.cos.f32(float %x)
+ store float %cos, float addrspace(1)* %out
+ ret void
+}
+
+;FUNC-LABEL: testv
+;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;EG-NOT: COS
+;SI: V_COS_F32
+;SI: V_COS_F32
+;SI: V_COS_F32
+;SI: V_COS_F32
+;SI-NOT: V_COS_F32
+
+define void @testv(<4 x float> addrspace(1)* %out, <4 x float> inreg %vx) #1 {
+ %cos = call <4 x float> @llvm.cos.v4f32(<4 x float> %vx)
+ store <4 x float> %cos, <4 x float> addrspace(1)* %out
ret void
}
declare float @llvm.cos.f32(float) readnone
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+declare <4 x float> @llvm.cos.v4f32(<4 x float>) readnone
attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/llvm.rint.f64.ll b/test/CodeGen/R600/llvm.rint.f64.ll
new file mode 100644
index 0000000..a7a909a
--- /dev/null
+++ b/test/CodeGen/R600/llvm.rint.f64.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+
+; FUNC-LABEL: @f64
+; CI: V_RNDNE_F64_e32
+define void @f64(double addrspace(1)* %out, double %in) {
+entry:
+ %0 = call double @llvm.rint.f64(double %in)
+ store double %0, double addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: @v2f64
+; CI: V_RNDNE_F64_e32
+; CI: V_RNDNE_F64_e32
+define void @v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
+entry:
+ %0 = call <2 x double> @llvm.rint.v2f64(<2 x double> %in)
+ store <2 x double> %0, <2 x double> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: @v4f64
+; CI: V_RNDNE_F64_e32
+; CI: V_RNDNE_F64_e32
+; CI: V_RNDNE_F64_e32
+; CI: V_RNDNE_F64_e32
+define void @v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
+entry:
+ %0 = call <4 x double> @llvm.rint.v4f64(<4 x double> %in)
+ store <4 x double> %0, <4 x double> addrspace(1)* %out
+ ret void
+}
+
+
+declare double @llvm.rint.f64(double) #0
+declare <2 x double> @llvm.rint.v2f64(<2 x double>) #0
+declare <4 x double> @llvm.rint.v4f64(<4 x double>) #0
diff --git a/test/CodeGen/R600/llvm.rint.ll b/test/CodeGen/R600/llvm.rint.ll
index c174b33..db8352f 100644
--- a/test/CodeGen/R600/llvm.rint.ll
+++ b/test/CodeGen/R600/llvm.rint.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; R600-CHECK: @f32
-; R600-CHECK: RNDNE
-; SI-CHECK: @f32
-; SI-CHECK: V_RNDNE_F32_e32
+; FUNC-LABEL: @f32
+; R600: RNDNE
+
+; SI: V_RNDNE_F32_e32
define void @f32(float addrspace(1)* %out, float %in) {
entry:
%0 = call float @llvm.rint.f32(float %in)
@@ -12,12 +12,12 @@ entry:
ret void
}
-; R600-CHECK: @v2f32
-; R600-CHECK: RNDNE
-; R600-CHECK: RNDNE
-; SI-CHECK: @v2f32
-; SI-CHECK: V_RNDNE_F32_e32
-; SI-CHECK: V_RNDNE_F32_e32
+; FUNC-LABEL: @v2f32
+; R600: RNDNE
+; R600: RNDNE
+
+; SI: V_RNDNE_F32_e32
+; SI: V_RNDNE_F32_e32
define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
entry:
%0 = call <2 x float> @llvm.rint.v2f32(<2 x float> %in)
@@ -25,16 +25,16 @@ entry:
ret void
}
-; R600-CHECK: @v4f32
-; R600-CHECK: RNDNE
-; R600-CHECK: RNDNE
-; R600-CHECK: RNDNE
-; R600-CHECK: RNDNE
-; SI-CHECK: @v4f32
-; SI-CHECK: V_RNDNE_F32_e32
-; SI-CHECK: V_RNDNE_F32_e32
-; SI-CHECK: V_RNDNE_F32_e32
-; SI-CHECK: V_RNDNE_F32_e32
+; FUNC-LABEL: @v4f32
+; R600: RNDNE
+; R600: RNDNE
+; R600: RNDNE
+; R600: RNDNE
+
+; SI: V_RNDNE_F32_e32
+; SI: V_RNDNE_F32_e32
+; SI: V_RNDNE_F32_e32
+; SI: V_RNDNE_F32_e32
define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
entry:
%0 = call <4 x float> @llvm.rint.v4f32(<4 x float> %in)
@@ -42,13 +42,8 @@ entry:
ret void
}
-; Function Attrs: nounwind readonly
declare float @llvm.rint.f32(float) #0
-
-; Function Attrs: nounwind readonly
declare <2 x float> @llvm.rint.v2f32(<2 x float>) #0
-
-; Function Attrs: nounwind readonly
declare <4 x float> @llvm.rint.v4f32(<4 x float>) #0
attributes #0 = { nounwind readonly }
diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll
index 9eb9983..41c363c 100644
--- a/test/CodeGen/R600/llvm.sin.ll
+++ b/test/CodeGen/R600/llvm.sin.ll
@@ -1,19 +1,41 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -check-prefix=EG -check-prefix=FUNC
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s -check-prefix=SI -check-prefix=FUNC
-;CHECK: MULADD_IEEE *
-;CHECK: FRACT *
-;CHECK: ADD *
-;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;FUNC-LABEL: test
+;EG: MULADD_IEEE *
+;EG: FRACT *
+;EG: ADD *
+;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;EG-NOT: SIN
+;SI: V_MUL_F32
+;SI: V_SIN_F32
+;SI-NOT: V_SIN_F32
-define void @test(<4 x float> inreg %reg0) #0 {
- %r0 = extractelement <4 x float> %reg0, i32 0
- %r1 = call float @llvm.sin.f32( float %r0)
- %vec = insertelement <4 x float> undef, float %r1, i32 0
- call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
+define void @test(float addrspace(1)* %out, float %x) #1 {
+ %sin = call float @llvm.sin.f32(float %x)
+ store float %sin, float addrspace(1)* %out
+ ret void
+}
+
+;FUNC-LABEL: testv
+;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;EG-NOT: SIN
+;SI: V_SIN_F32
+;SI: V_SIN_F32
+;SI: V_SIN_F32
+;SI: V_SIN_F32
+;SI-NOT: V_SIN_F32
+
+define void @testv(<4 x float> addrspace(1)* %out, <4 x float> %vx) #1 {
+ %sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx)
+ store <4 x float> %sin, <4 x float> addrspace(1)* %out
ret void
}
declare float @llvm.sin.f32(float) readnone
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+declare <4 x float> @llvm.sin.v4f32(<4 x float>) readnone
attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/llvm.sqrt.ll b/test/CodeGen/R600/llvm.sqrt.ll
index 0d0d186..4eee37f 100644
--- a/test/CodeGen/R600/llvm.sqrt.ll
+++ b/test/CodeGen/R600/llvm.sqrt.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=r600 --mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 --mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 --mcpu=SI -verify-machineinstrs| FileCheck %s --check-prefix=SI-CHECK
; R600-CHECK-LABEL: @sqrt_f32
; R600-CHECK: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].Z
diff --git a/test/CodeGen/R600/load-i1.ll b/test/CodeGen/R600/load-i1.ll
new file mode 100644
index 0000000..9ba81b8
--- /dev/null
+++ b/test/CodeGen/R600/load-i1.ll
@@ -0,0 +1,107 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+
+
+; SI-LABEL: @global_copy_i1_to_i1
+; SI: BUFFER_LOAD_UBYTE
+; SI: V_AND_B32_e32 v{{[0-9]+}}, 1
+; SI: BUFFER_STORE_BYTE
+; SI: S_ENDPGM
+define void @global_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %load = load i1 addrspace(1)* %in
+ store i1 %load, i1 addrspace(1)* %out, align 1
+ ret void
+}
+
+; SI-LABEL: @global_sextload_i1_to_i32
+; XSI: BUFFER_LOAD_BYTE
+; SI: BUFFER_STORE_DWORD
+; SI: S_ENDPGM
+define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %load = load i1 addrspace(1)* %in
+ %ext = sext i1 %load to i32
+ store i32 %ext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: @global_zextload_i1_to_i32
+; SI: BUFFER_LOAD_UBYTE
+; SI: BUFFER_STORE_DWORD
+; SI: S_ENDPGM
+define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %load = load i1 addrspace(1)* %in
+ %ext = zext i1 %load to i32
+ store i32 %ext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: @global_sextload_i1_to_i64
+; XSI: BUFFER_LOAD_BYTE
+; SI: BUFFER_STORE_DWORDX2
+; SI: S_ENDPGM
+define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %load = load i1 addrspace(1)* %in
+ %ext = sext i1 %load to i64
+ store i64 %ext, i64 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: @global_zextload_i1_to_i64
+; SI: BUFFER_LOAD_UBYTE
+; SI: BUFFER_STORE_DWORDX2
+; SI: S_ENDPGM
+define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %load = load i1 addrspace(1)* %in
+ %ext = zext i1 %load to i64
+ store i64 %ext, i64 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: @i1_arg
+; SI: BUFFER_LOAD_UBYTE
+; SI: V_AND_B32_e32
+; SI: BUFFER_STORE_BYTE
+; SI: S_ENDPGM
+define void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
+ store i1 %x, i1 addrspace(1)* %out, align 1
+ ret void
+}
+
+; SI-LABEL: @i1_arg_zext_i32
+; SI: BUFFER_LOAD_UBYTE
+; SI: BUFFER_STORE_DWORD
+; SI: S_ENDPGM
+define void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
+ %ext = zext i1 %x to i32
+ store i32 %ext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: @i1_arg_zext_i64
+; SI: BUFFER_LOAD_UBYTE
+; SI: BUFFER_STORE_DWORDX2
+; SI: S_ENDPGM
+define void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
+ %ext = zext i1 %x to i64
+ store i64 %ext, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; SI-LABEL: @i1_arg_sext_i32
+; XSI: BUFFER_LOAD_BYTE
+; SI: BUFFER_STORE_DWORD
+; SI: S_ENDPGM
+define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
+ %ext = sext i1 %x to i32
+ store i32 %ext, i32addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: @i1_arg_sext_i64
+; XSI: BUFFER_LOAD_BYTE
+; SI: BUFFER_STORE_DWORDX2
+; SI: S_ENDPGM
+define void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
+ %ext = sext i1 %x to i64
+ store i64 %ext, i64 addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/test/CodeGen/R600/local-64.ll b/test/CodeGen/R600/local-64.ll
index 87f18ae..c52b41b 100644
--- a/test/CodeGen/R600/local-64.ll
+++ b/test/CodeGen/R600/local-64.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; SI-LABEL: @local_i32_load
-; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 28, [M0]
+; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x1c, [M0]
; SI: BUFFER_STORE_DWORD [[REG]],
define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
%gep = getelementptr i32 addrspace(3)* %in, i32 7
@@ -11,7 +11,7 @@ define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounw
}
; SI-LABEL: @local_i32_load_0_offset
-; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0, [M0]
+; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x0, [M0]
; SI: BUFFER_STORE_DWORD [[REG]],
define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
%val = load i32 addrspace(3)* %in, align 4
@@ -21,7 +21,7 @@ define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %
; SI-LABEL: @local_i8_load_i16_max_offset
; SI-NOT: ADD
-; SI: DS_READ_U8 [[REG:v[0-9]+]], {{v[0-9]+}}, -1, [M0]
+; SI: DS_READ_U8 [[REG:v[0-9]+]], {{v[0-9]+}}, 0xffff, [M0]
; SI: BUFFER_STORE_BYTE [[REG]],
define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
%gep = getelementptr i8 addrspace(3)* %in, i32 65535
@@ -31,9 +31,9 @@ define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)
}
; SI-LABEL: @local_i8_load_over_i16_max_offset
-; SI: S_ADD_I32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 65536
+; SI: S_ADD_I32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
; SI: V_MOV_B32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]]
-; SI: DS_READ_U8 [[REG:v[0-9]+]], [[VREGADDR]], 0, [M0]
+; SI: DS_READ_U8 [[REG:v[0-9]+]], [[VREGADDR]], 0x0, [M0]
; SI: BUFFER_STORE_BYTE [[REG]],
define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
%gep = getelementptr i8 addrspace(3)* %in, i32 65536
@@ -44,7 +44,7 @@ define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspa
; SI-LABEL: @local_i64_load
; SI-NOT: ADD
-; SI: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 56, [M0]
+; SI: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0]
; SI: BUFFER_STORE_DWORDX2 [[REG]],
define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
%gep = getelementptr i64 addrspace(3)* %in, i32 7
@@ -54,7 +54,7 @@ define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounw
}
; SI-LABEL: @local_i64_load_0_offset
-; SI: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0, [M0]
+; SI: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0]
; SI: BUFFER_STORE_DWORDX2 [[REG]],
define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
%val = load i64 addrspace(3)* %in, align 8
@@ -64,7 +64,7 @@ define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %
; SI-LABEL: @local_f64_load
; SI-NOT: ADD
-; SI: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 56, [M0]
+; SI: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0]
; SI: BUFFER_STORE_DWORDX2 [[REG]],
define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
%gep = getelementptr double addrspace(3)* %in, i32 7
@@ -74,7 +74,7 @@ define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in)
}
; SI-LABEL: @local_f64_load_0_offset
-; SI: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0, [M0]
+; SI: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0]
; SI: BUFFER_STORE_DWORDX2 [[REG]],
define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
%val = load double addrspace(3)* %in, align 8
@@ -84,7 +84,7 @@ define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace
; SI-LABEL: @local_i64_store
; SI-NOT: ADD
-; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 56 [M0]
+; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0]
define void @local_i64_store(i64 addrspace(3)* %out) nounwind {
%gep = getelementptr i64 addrspace(3)* %out, i32 7
store i64 5678, i64 addrspace(3)* %gep, align 8
@@ -93,7 +93,7 @@ define void @local_i64_store(i64 addrspace(3)* %out) nounwind {
; SI-LABEL: @local_i64_store_0_offset
; SI-NOT: ADD
-; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0 [M0]
+; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind {
store i64 1234, i64 addrspace(3)* %out, align 8
ret void
@@ -101,7 +101,7 @@ define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind {
; SI-LABEL: @local_f64_store
; SI-NOT: ADD
-; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 56 [M0]
+; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0]
define void @local_f64_store(double addrspace(3)* %out) nounwind {
%gep = getelementptr double addrspace(3)* %out, i32 7
store double 16.0, double addrspace(3)* %gep, align 8
@@ -109,7 +109,7 @@ define void @local_f64_store(double addrspace(3)* %out) nounwind {
}
; SI-LABEL: @local_f64_store_0_offset
-; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0 [M0]
+; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind {
store double 20.0, double addrspace(3)* %out, align 8
ret void
@@ -117,8 +117,8 @@ define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind {
; SI-LABEL: @local_v2i64_store
; SI-NOT: ADD
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 120 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 112 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x78 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x70 [M0]
define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
%gep = getelementptr <2 x i64> addrspace(3)* %out, i32 7
store <2 x i64> <i64 5678, i64 5678>, <2 x i64> addrspace(3)* %gep, align 16
@@ -127,8 +127,8 @@ define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
; SI-LABEL: @local_v2i64_store_0_offset
; SI-NOT: ADD
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 8 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
store <2 x i64> <i64 1234, i64 1234>, <2 x i64> addrspace(3)* %out, align 16
ret void
@@ -136,10 +136,10 @@ define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
; SI-LABEL: @local_v4i64_store
; SI-NOT: ADD
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 248 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 240 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 232 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 224 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf8 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf0 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe8 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe0 [M0]
define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
%gep = getelementptr <4 x i64> addrspace(3)* %out, i32 7
store <4 x i64> <i64 5678, i64 5678, i64 5678, i64 5678>, <4 x i64> addrspace(3)* %gep, align 16
@@ -148,10 +148,10 @@ define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
; SI-LABEL: @local_v4i64_store_0_offset
; SI-NOT: ADD
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 24 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 16 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 8 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x18 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x10 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
define void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind {
store <4 x i64> <i64 1234, i64 1234, i64 1234, i64 1234>, <4 x i64> addrspace(3)* %out, align 16
ret void
diff --git a/test/CodeGen/R600/local-memory-two-objects.ll b/test/CodeGen/R600/local-memory-two-objects.ll
index 616000d..1e42285 100644
--- a/test/CodeGen/R600/local-memory-two-objects.ll
+++ b/test/CodeGen/R600/local-memory-two-objects.ll
@@ -28,8 +28,8 @@
; constant offsets.
; EG-CHECK: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
; EG-CHECK-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
-; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]], 16
-; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]], 0,
+; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]], 0x10
+; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]], 0x0,
define void @local_memory_two_objects(i32 addrspace(1)* %out) {
entry:
diff --git a/test/CodeGen/R600/loop-idiom.ll b/test/CodeGen/R600/loop-idiom.ll
index 8a9cba2..128f661 100644
--- a/test/CodeGen/R600/loop-idiom.ll
+++ b/test/CodeGen/R600/loop-idiom.ll
@@ -1,5 +1,5 @@
; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
-; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "r600--"
diff --git a/test/CodeGen/R600/mad_int24.ll b/test/CodeGen/R600/mad_int24.ll
index df063ec..abb5290 100644
--- a/test/CodeGen/R600/mad_int24.ll
+++ b/test/CodeGen/R600/mad_int24.ll
@@ -1,12 +1,15 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-; EG-CHECK: @i32_mad24
+; FUNC-LABEL: @i32_mad24
; Signed 24-bit multiply is not supported on pre-Cayman GPUs.
-; EG-CHECK: MULLO_INT
-; CM-CHECK: MULADD_INT24 {{[ *]*}}T{{[0-9].[XYZW]}}, KC0[2].Z, KC0[2].W, KC0[3].X
-; SI-CHECK: V_MAD_I32_I24
+; EG: MULLO_INT
+; Make sure we aren't masking the inputs.
+; CM-NOT: AND
+; CM: MULADD_INT24
+; SI-NOT: AND
+; SI: V_MAD_I32_I24
define void @i32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
entry:
%0 = shl i32 %a, 8
diff --git a/test/CodeGen/R600/mad_uint24.ll b/test/CodeGen/R600/mad_uint24.ll
index 3dcadc9..0f0893b 100644
--- a/test/CodeGen/R600/mad_uint24.ll
+++ b/test/CodeGen/R600/mad_uint24.ll
@@ -1,11 +1,10 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-; EG-CHECK-LABEL: @u32_mad24
-; EG-CHECK: MULADD_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W, KC0[3].X
-; SI-CHECK-LABEL: @u32_mad24
-; SI-CHECK: V_MAD_U32_U24
+; FUNC-LABEL: @u32_mad24
+; EG: MULADD_UINT24
+; SI: V_MAD_U32_U24
define void @u32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
entry:
@@ -19,18 +18,14 @@ entry:
ret void
}
-; EG-CHECK-LABEL: @i16_mad24
-; EG-CHECK-DAG: VTX_READ_16 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
-; EG-CHECK-DAG: VTX_READ_16 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
-; EG-CHECK-DAG: VTX_READ_16 [[C:T[0-9]\.X]], T{{[0-9]}}.X, 48
+; FUNC-LABEL: @i16_mad24
; The order of A and B does not matter.
-; EG-CHECK: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]], [[A]], [[B]], [[C]]
+; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
; The result must be sign-extended
-; EG-CHECK: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG-CHECK: 16
-; SI-CHECK-LABEL: @i16_mad24
-; SI-CHECK: V_MAD_U32_U24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI-CHECK: V_BFE_I32 v{{[0-9]}}, [[MAD]], 0, 16
+; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
+; EG: 16
+; SI: V_MAD_U32_U24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI: V_BFE_I32 v{{[0-9]}}, [[MAD]], 0, 16
define void @i16_mad24(i32 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) {
entry:
@@ -41,18 +36,13 @@ entry:
ret void
}
-; EG-CHECK-LABEL: @i8_mad24
-; EG-CHECK-DAG: VTX_READ_8 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
-; EG-CHECK-DAG: VTX_READ_8 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
-; EG-CHECK-DAG: VTX_READ_8 [[C:T[0-9]\.X]], T{{[0-9]}}.X, 48
-; The order of A and B does not matter.
-; EG-CHECK: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]], [[A]], [[B]], [[C]]
+; FUNC-LABEL: @i8_mad24
+; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
; The result must be sign-extended
-; EG-CHECK: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG-CHECK: 8
-; SI-CHECK-LABEL: @i8_mad24
-; SI-CHECK: V_MAD_U32_U24 [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI-CHECK: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8
+; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
+; EG: 8
+; SI: V_MAD_U32_U24 [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8
define void @i8_mad24(i32 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) {
entry:
@@ -62,3 +52,24 @@ entry:
store i32 %2, i32 addrspace(1)* %out
ret void
}
+
+; This tests for a bug where the mad_u24 pattern matcher would call
+; SimplifyDemandedBits on the first operand of the mul instruction
+; assuming that the pattern would be matched to a 24-bit mad. This
+; led to some instructions being incorrectly erased when the entire
+; 24-bit mad pattern wasn't being matched.
+
+; Check that the select instruction is not deleted.
+; FUNC-LABEL: @i24_i32_i32_mad
+; EG: CNDE_INT
+; SI: V_CNDMASK
+define void @i24_i32_i32_mad(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
+entry:
+ %0 = ashr i32 %a, 8
+ %1 = icmp ne i32 %c, 0
+ %2 = select i1 %1, i32 %0, i32 34
+ %3 = mul i32 %2, %c
+ %4 = add i32 %3, %d
+ store i32 %4, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/mubuf.ll b/test/CodeGen/R600/mubuf.ll
index 2d5ddeb..f465d3d 100644
--- a/test/CodeGen/R600/mubuf.ll
+++ b/test/CodeGen/R600/mubuf.ll
@@ -6,7 +6,7 @@
; MUBUF load with an immediate byte offset that fits into 12-bits
; CHECK-LABEL: @mubuf_load0
-; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 4 ; encoding: [0x04,0x80
+; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80
define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%0 = getelementptr i32 addrspace(1)* %in, i64 1
@@ -17,7 +17,7 @@ entry:
; MUBUF load with the largest possible immediate offset
; CHECK-LABEL: @mubuf_load1
-; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 4095 ; encoding: [0xff,0x8f
+; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0xfff ; encoding: [0xff,0x8f
define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%0 = getelementptr i8 addrspace(1)* %in, i64 4095
@@ -28,7 +28,7 @@ entry:
; MUBUF load with an immediate byte offset that doesn't fit into 12-bits
; CHECK-LABEL: @mubuf_load2
-; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0 ; encoding: [0x00,0x80
+; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x0 ; encoding: [0x00,0x80
define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%0 = getelementptr i32 addrspace(1)* %in, i64 1024
@@ -40,7 +40,7 @@ entry:
; MUBUF load with a 12-bit immediate offset and a register offset
; CHECK-LABEL: @mubuf_load3
; CHECK-NOT: ADD
-; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 4 ; encoding: [0x04,0x80
+; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80
define void @mubuf_load3(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i64 %offset) {
entry:
%0 = getelementptr i32 addrspace(1)* %in, i64 %offset
@@ -56,7 +56,7 @@ entry:
; MUBUF store with an immediate byte offset that fits into 12-bits
; CHECK-LABEL: @mubuf_store0
-; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 4 ; encoding: [0x04,0x80
+; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80
define void @mubuf_store0(i32 addrspace(1)* %out) {
entry:
%0 = getelementptr i32 addrspace(1)* %out, i64 1
@@ -66,7 +66,7 @@ entry:
; MUBUF store with the largest possible immediate offset
; CHECK-LABEL: @mubuf_store1
-; CHECK: BUFFER_STORE_BYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 4095 ; encoding: [0xff,0x8f
+; CHECK: BUFFER_STORE_BYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0xfff ; encoding: [0xff,0x8f
define void @mubuf_store1(i8 addrspace(1)* %out) {
entry:
@@ -77,7 +77,7 @@ entry:
; MUBUF store with an immediate byte offset that doesn't fit into 12-bits
; CHECK-LABEL: @mubuf_store2
-; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0 ; encoding: [0x00,0x80
+; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x0 ; encoding: [0x00,0x80
define void @mubuf_store2(i32 addrspace(1)* %out) {
entry:
%0 = getelementptr i32 addrspace(1)* %out, i64 1024
@@ -88,7 +88,7 @@ entry:
; MUBUF store with a 12-bit immediate offset and a register offset
; CHECK-LABEL: @mubuf_store3
; CHECK-NOT: ADD
-; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 4 ; encoding: [0x04,0x80
+; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80
define void @mubuf_store3(i32 addrspace(1)* %out, i64 %offset) {
entry:
%0 = getelementptr i32 addrspace(1)* %out, i64 %offset
diff --git a/test/CodeGen/R600/mul.ll b/test/CodeGen/R600/mul.ll
index e176148..6ed754c 100644
--- a/test/CodeGen/R600/mul.ll
+++ b/test/CodeGen/R600/mul.ll
@@ -1,15 +1,14 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
; mul24 and mad24 are affected
-;EG-CHECK: @test2
-;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;FUNC-LABEL: @test2
+;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: @test2
-;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -20,17 +19,16 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
ret void
}
-;EG-CHECK: @test4
-;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;FUNC-LABEL: @test4
+;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: @test4
-;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -52,3 +50,32 @@ define void @trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
store i32 %trunc, i32 addrspace(1)* %out, align 8
ret void
}
+
+; This 64-bit multiply should just use MUL_HI and MUL_LO, since the top
+; 32-bits of both arguments are sign bits.
+; FUNC-LABEL: @mul64_sext_c
+; EG-DAG: MULLO_INT
+; EG-DAG: MULHI_INT
+; SI-DAG: V_MUL_LO_I32
+; SI-DAG: V_MUL_HI_I32
+define void @mul64_sext_c(i64 addrspace(1)* %out, i32 %in) {
+entry:
+ %0 = sext i32 %in to i64
+ %1 = mul i64 %0, 80
+ store i64 %1, i64 addrspace(1)* %out
+ ret void
+}
+
+; A standard 64-bit multiply. The expansion should be around 6 instructions.
+; It would be difficult to match the expansion correctly without writing
+; a really complicated list of FileCheck expressions. I don't want
+; to confuse people who may 'break' this test with a correct optimization,
+; so this test just uses FUNC-LABEL to make sure the compiler does not
+; crash with a 'failed to select' error.
+; FUNC-LABEL: @mul64
+define void @mul64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+ %0 = mul i64 %a, %b
+ store i64 %0, i64 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/mul_int24.ll b/test/CodeGen/R600/mul_int24.ll
index 66a1a9e..046911b 100644
--- a/test/CodeGen/R600/mul_int24.ll
+++ b/test/CodeGen/R600/mul_int24.ll
@@ -1,12 +1,15 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-; EG-CHECK: @i32_mul24
+; FUNC-LABEL: @i32_mul24
; Signed 24-bit multiply is not supported on pre-Cayman GPUs.
-; EG-CHECK: MULLO_INT
-; CM-CHECK: MUL_INT24 {{[ *]*}}T{{[0-9].[XYZW]}}, KC0[2].Z, KC0[2].W
-; SI-CHECK: V_MUL_I32_I24
+; EG: MULLO_INT
+; Make sure we are not masking the inputs
+; CM-NOT: AND
+; CM: MUL_INT24
+; SI-NOT: AND
+; SI: V_MUL_I32_I24
define void @i32_mul24(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = shl i32 %a, 8
diff --git a/test/CodeGen/R600/mul_uint24.ll b/test/CodeGen/R600/mul_uint24.ll
index a413961..419f275 100644
--- a/test/CodeGen/R600/mul_uint24.ll
+++ b/test/CodeGen/R600/mul_uint24.ll
@@ -1,11 +1,10 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-; EG-CHECK-LABEL: @u32_mul24
-; EG-CHECK: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
-; SI-CHECK-LABEL: @u32_mul24
-; SI-CHECK: V_MUL_U32_U24
+; FUNC-LABEL: @u32_mul24
+; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
+; SI: V_MUL_U32_U24
define void @u32_mul24(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
@@ -18,17 +17,13 @@ entry:
ret void
}
-; EG-CHECK-LABEL: @i16_mul24
-; EG-CHECK-DAG: VTX_READ_16 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
-; EG-CHECK-DAG: VTX_READ_16 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
-; The order of A and B does not matter.
-; EG-CHECK: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]], [[A]], [[B]]
+; FUNC-LABEL: @i16_mul24
+; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
; The result must be sign-extended
-; EG-CHECK: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
-; EG-CHECK: 16
-; SI-CHECK-LABEL: @i16_mul24
-; SI-CHECK: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI-CHECK: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 16,
+; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
+; EG: 16
+; SI: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 16,
define void @i16_mul24(i32 addrspace(1)* %out, i16 %a, i16 %b) {
entry:
%0 = mul i16 %a, %b
@@ -37,16 +32,12 @@ entry:
ret void
}
-; EG-CHECK-LABEL: @i8_mul24
-; EG-CHECK-DAG: VTX_READ_8 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
-; EG-CHECK-DAG: VTX_READ_8 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
-; The order of A and B does not matter.
-; EG-CHECK: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]], [[A]], [[B]]
+; FUNC-LABEL: @i8_mul24
+; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
; The result must be sign-extended
-; EG-CHECK: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
-; SI-CHECK-LABEL: @i8_mul24
-; SI-CHECK: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI-CHECK: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8,
+; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
+; SI: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8,
define void @i8_mul24(i32 addrspace(1)* %out, i8 %a, i8 %b) {
entry:
@@ -55,3 +46,21 @@ entry:
store i32 %1, i32 addrspace(1)* %out
ret void
}
+
+; Multiply with 24-bit inputs and 64-bit output
+; FUNC_LABEL: @mul24_i64
+; EG; MUL_UINT24
+; EG: MULHI
+; SI: V_MUL_U32_U24
+; FIXME: SI support 24-bit mulhi
+; SI: V_MUL_HI_U32
+define void @mul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+ %0 = shl i64 %a, 40
+ %a_24 = lshr i64 %0, 40
+ %1 = shl i64 %b, 40
+ %b_24 = lshr i64 %1, 40
+ %2 = mul i64 %a_24, %b_24
+ store i64 %2, i64 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/mulhu.ll b/test/CodeGen/R600/mulhu.ll
index d5fc014..8640127 100644
--- a/test/CodeGen/R600/mulhu.ll
+++ b/test/CodeGen/R600/mulhu.ll
@@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK: V_MOV_B32_e32 v{{[0-9]+}}, -1431655765
+;CHECK: V_MOV_B32_e32 v{{[0-9]+}}, 0xaaaaaaab
;CHECK: V_MUL_HI_U32 v0, {{[sv][0-9]+}}, {{v[0-9]+}}
;CHECK-NEXT: V_LSHRREV_B32_e32 v0, 1, v0
diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll
index 2cc991e..9878366 100644
--- a/test/CodeGen/R600/or.ll
+++ b/test/CodeGen/R600/or.ll
@@ -89,8 +89,8 @@ define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a,
}
; SI-LABEL: @vector_or_i64_loadimm
-; SI-DAG: S_MOV_B32 [[LO_S_IMM:s[0-9]+]], -545810305
-; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 5231
+; SI-DAG: S_MOV_B32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f
+; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 0x146f
; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll
index 4920320..d3453f2 100644
--- a/test/CodeGen/R600/private-memory.ll
+++ b/test/CodeGen/R600/private-memory.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK --check-prefix=FUNC
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=R600-CHECK --check-prefix=FUNC
+; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
; This test checks that uses and defs of the AR register happen in the same
; instruction clause.
@@ -119,7 +119,7 @@ for.end:
; R600-CHECK: *
; R600-CHECK: MOVA_INT
-; SI-CHECK: V_MOV_B32_e32 v{{[0-9]}}, 65536
+; SI-CHECK: V_MOV_B32_e32 v{{[0-9]}}, 0x10000
; SI-CHECK: V_MOVRELS_B32_e32
define void @short_array(i32 addrspace(1)* %out, i32 %index) {
entry:
@@ -142,7 +142,7 @@ entry:
; R600-CHECK: *
; R600-CHECK-NEXT: MOVA_INT
-; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}, 256
+; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}, 0x100
; SI-CHECK: V_MOVRELS_B32_e32
define void @char_array(i32 addrspace(1)* %out, i32 %index) {
entry:
diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll
index 5a930b2..f322bc7 100644
--- a/test/CodeGen/R600/pv.ll
+++ b/test/CodeGen/R600/pv.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=r600 | FileCheck %s
;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
-;CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X
+;CHECK: MAX T{{[0-9].[XYZW]}}, PV.X, 0.0
define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7) #0 {
main_body:
diff --git a/test/CodeGen/R600/register-count-comments.ll b/test/CodeGen/R600/register-count-comments.ll
index a64b280..329077c 100644
--- a/test/CodeGen/R600/register-count-comments.ll
+++ b/test/CodeGen/R600/register-count-comments.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
declare i32 @llvm.SI.tid() nounwind readnone
diff --git a/test/CodeGen/R600/salu-to-valu.ll b/test/CodeGen/R600/salu-to-valu.ll
index e461bf9..e7719b6 100644
--- a/test/CodeGen/R600/salu-to-valu.ll
+++ b/test/CodeGen/R600/salu-to-valu.ll
@@ -46,3 +46,45 @@ declare i32 @llvm.r600.read.tidig.x() #1
declare i32 @llvm.r600.read.tidig.y() #1
attributes #1 = { nounwind readnone }
+
+; Test moving an SMRD instruction to the VALU
+
+; CHECK-LABEL: @smrd_valu
+; CHECK: BUFFER_LOAD_DWORD [[OUT:v[0-9]+]]
+; CHECK: BUFFER_STORE_DWORD [[OUT]]
+
+define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 addrspace(1)* %out) {
+entry:
+ %0 = icmp ne i32 %a, 0
+ br i1 %0, label %if, label %else
+
+if:
+ %1 = load i32 addrspace(2)* addrspace(1)* %in
+ br label %endif
+
+else:
+ %2 = getelementptr i32 addrspace(2)* addrspace(1)* %in
+ %3 = load i32 addrspace(2)* addrspace(1)* %2
+ br label %endif
+
+endif:
+ %4 = phi i32 addrspace(2)* [%1, %if], [%3, %else]
+ %5 = getelementptr i32 addrspace(2)* %4, i32 3000
+ %6 = load i32 addrspace(2)* %5
+ store i32 %6, i32 addrspace(1)* %out
+ ret void
+}
+
+; Test moving ann SMRD with an immediate offset to the VALU
+
+; CHECK-LABEL: @smrd_valu2
+; CHECK: BUFFER_LOAD_DWORD
+define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) {
+entry:
+ %0 = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %1 = add i32 %0, 4
+ %2 = getelementptr [8 x i32] addrspace(2)* %in, i32 %0, i32 4
+ %3 = load i32 addrspace(2)* %2
+ store i32 %3, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
index 2a286d1..3d2142d 100644
--- a/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
+++ b/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
@@ -1,6 +1,6 @@
; XFAIL: *
; REQUIRES: asserts
-; RUN: llc -O0 -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI
+; RUN: llc -O0 -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI
declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
diff --git a/test/CodeGen/R600/selectcc.ll b/test/CodeGen/R600/selectcc.ll
new file mode 100644
index 0000000..a8f57cf
--- /dev/null
+++ b/test/CodeGen/R600/selectcc.ll
@@ -0,0 +1,19 @@
+; RUN: llc -verify-machineinstrs -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FUNC-LABEL: @selectcc_i64
+; EG: XOR_INT
+; EG: XOR_INT
+; EG: OR_INT
+; EG: CNDE_INT
+; EG: CNDE_INT
+; SI: V_CMP_EQ_I64
+; SI: V_CNDMASK
+; SI: V_CNDMASK
+define void @selectcc_i64(i64 addrspace(1) * %out, i64 %lhs, i64 %rhs, i64 %true, i64 %false) {
+entry:
+ %0 = icmp eq i64 %lhs, %rhs
+ %1 = select i1 %0, i64 %true, i64 %false
+ store i64 %1, i64 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/setcc.ll b/test/CodeGen/R600/setcc.ll
index 8d34c4a..5bd95b7 100644
--- a/test/CodeGen/R600/setcc.ll
+++ b/test/CodeGen/R600/setcc.ll
@@ -1,5 +1,5 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
; FUNC-LABEL: @setcc_v2i32
; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z
@@ -96,7 +96,9 @@ entry:
; R600-DAG: SETNE_INT
; SI: V_CMP_O_F32
; SI: V_CMP_NEQ_F32
-; SI: S_AND_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_AND_B32_e32
define void @f32_one(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp one float %a, %b
@@ -128,7 +130,9 @@ entry:
; R600-DAG: SETNE_INT
; SI: V_CMP_U_F32
; SI: V_CMP_EQ_F32
-; SI: S_OR_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_OR_B32_e32
define void @f32_ueq(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp ueq float %a, %b
@@ -142,7 +146,9 @@ entry:
; R600: SETE_DX10
; SI: V_CMP_U_F32
; SI: V_CMP_GT_F32
-; SI: S_OR_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_OR_B32_e32
define void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp ugt float %a, %b
@@ -156,7 +162,9 @@ entry:
; R600: SETE_DX10
; SI: V_CMP_U_F32
; SI: V_CMP_GE_F32
-; SI: S_OR_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_OR_B32_e32
define void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp uge float %a, %b
@@ -170,7 +178,9 @@ entry:
; R600: SETE_DX10
; SI: V_CMP_U_F32
; SI: V_CMP_LT_F32
-; SI: S_OR_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_OR_B32_e32
define void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp ult float %a, %b
@@ -184,7 +194,9 @@ entry:
; R600: SETE_DX10
; SI: V_CMP_U_F32
; SI: V_CMP_LE_F32
-; SI: S_OR_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_OR_B32_e32
define void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fcmp ule float %a, %b
diff --git a/test/CodeGen/R600/setcc64.ll b/test/CodeGen/R600/setcc64.ll
index 9202fc0..54a33b3 100644
--- a/test/CodeGen/R600/setcc64.ll
+++ b/test/CodeGen/R600/setcc64.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
; XXX: Merge this into setcc, once R600 supports 64-bit operations
@@ -59,7 +59,9 @@ entry:
; FUNC-LABEL: @f64_one
; SI: V_CMP_O_F64
; SI: V_CMP_NEQ_F64
-; SI: S_AND_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_AND_B32_e32
define void @f64_one(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp one double %a, %b
@@ -81,7 +83,9 @@ entry:
; FUNC-LABEL: @f64_ueq
; SI: V_CMP_U_F64
; SI: V_CMP_EQ_F64
-; SI: S_OR_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_OR_B32_e32
define void @f64_ueq(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp ueq double %a, %b
@@ -93,7 +97,9 @@ entry:
; FUNC-LABEL: @f64_ugt
; SI: V_CMP_U_F64
; SI: V_CMP_GT_F64
-; SI: S_OR_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_OR_B32_e32
define void @f64_ugt(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp ugt double %a, %b
@@ -105,7 +111,9 @@ entry:
; FUNC-LABEL: @f64_uge
; SI: V_CMP_U_F64
; SI: V_CMP_GE_F64
-; SI: S_OR_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_OR_B32_e32
define void @f64_uge(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp uge double %a, %b
@@ -117,7 +125,9 @@ entry:
; FUNC-LABEL: @f64_ult
; SI: V_CMP_U_F64
; SI: V_CMP_LT_F64
-; SI: S_OR_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_OR_B32_e32
define void @f64_ult(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp ult double %a, %b
@@ -129,7 +139,9 @@ entry:
; FUNC-LABEL: @f64_ule
; SI: V_CMP_U_F64
; SI: V_CMP_LE_F64
-; SI: S_OR_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_OR_B32_e32
define void @f64_ule(i32 addrspace(1)* %out, double %a, double %b) {
entry:
%0 = fcmp ule double %a, %b
diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll
index 8633a4b..e90e788 100644
--- a/test/CodeGen/R600/seto.ll
+++ b/test/CodeGen/R600/seto.ll
@@ -1,6 +1,7 @@
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK: V_CMP_O_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0, 0, 0
+;CHECK-LABEL: @main
+;CHECK: V_CMP_O_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0
define void @main(float %p) {
main_body:
diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll
index c77a37e..3b1db8b 100644
--- a/test/CodeGen/R600/setuo.ll
+++ b/test/CodeGen/R600/setuo.ll
@@ -1,6 +1,7 @@
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-;CHECK: V_CMP_U_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0, 0, 0
+;CHECK-LABEL: @main
+;CHECK: V_CMP_U_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0
define void @main(float %p) {
main_body:
diff --git a/test/CodeGen/R600/sext-in-reg.ll b/test/CodeGen/R600/sext-in-reg.ll
index eef3f07..1b02e4b 100644
--- a/test/CodeGen/R600/sext-in-reg.ll
+++ b/test/CodeGen/R600/sext-in-reg.ll
@@ -1,15 +1,18 @@
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc < %s -march=r600 -mcpu=cypress | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone
; FUNC-LABEL: @sext_in_reg_i1_i32
; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[ARG]], 0, 1
+; SI: S_BFE_I32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000
+; SI: V_MOV_B32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]]
; SI: BUFFER_STORE_DWORD [[EXTRACT]],
-; EG: BFE_INT
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
+; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1
+; EG-NEXT: LSHR * [[ADDR]]
define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
%shl = shl i32 %in, 31
%sext = ashr i32 %shl, 31
@@ -19,10 +22,14 @@ define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
; FUNC-LABEL: @sext_in_reg_i8_to_i32
; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 8
-; SI: BUFFER_STORE_DWORD [[EXTRACT]],
-
-; EG: BFE_INT
+; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
+; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
+
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
+; EG: ADD_INT
+; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
+; EG-NEXT: LSHR * [[ADDR]]
define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%c = add i32 %a, %b ; add to prevent folding into extload
%shl = shl i32 %c, 24
@@ -33,10 +40,14 @@ define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounw
; FUNC-LABEL: @sext_in_reg_i16_to_i32
; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 16
-; SI: BUFFER_STORE_DWORD [[EXTRACT]],
-
-; EG: BFE_INT
+; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
+; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
+
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
+; EG: ADD_INT
+; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
+; EG-NEXT: LSHR * [[ADDR]]
define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%c = add i32 %a, %b ; add to prevent folding into extload
%shl = shl i32 %c, 16
@@ -47,10 +58,14 @@ define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) noun
; FUNC-LABEL: @sext_in_reg_i8_to_v1i32
; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 8
-; SI: BUFFER_STORE_DWORD [[EXTRACT]],
-
-; EG: BFE_INT
+; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
+; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
+
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
+; EG: ADD_INT
+; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
+; EG-NEXT: LSHR * [[ADDR]]
define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
%c = add <1 x i32> %a, %b ; add to prevent folding into extload
%shl = shl <1 x i32> %c, <i32 24>
@@ -59,13 +74,35 @@ define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a,
ret void
}
+; FUNC-LABEL: @sext_in_reg_i1_to_i64
+; SI: S_ADD_I32 [[VAL:s[0-9]+]],
+; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10000
+; SI: S_MOV_B32 {{s[0-9]+}}, -1
+; SI: BUFFER_STORE_DWORDX2
+define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+ %c = add i64 %a, %b
+ %shl = shl i64 %c, 63
+ %ashr = ashr i64 %shl, 63
+ store i64 %ashr, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
; FUNC-LABEL: @sext_in_reg_i8_to_i64
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
-; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31,
-; SI: BUFFER_STORE_DWORD
+; SI: S_ADD_I32 [[VAL:s[0-9]+]],
+; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: S_MOV_B32 {{s[0-9]+}}, -1
+; SI: BUFFER_STORE_DWORDX2
-; EG: BFE_INT
-; EG: ASHR
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
+; EG: ADD_INT
+; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
+; EG: ASHR [[RES_HI]]
+; EG-NOT: BFE_INT
+; EG: LSHR
+; EG: LSHR
+;; TODO Check address computation, using | with variables in {{}} does not work,
+;; also the _LO/_HI order might be different
define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%c = add i64 %a, %b
%shl = shl i64 %c, 56
@@ -75,12 +112,21 @@ define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounw
}
; FUNC-LABEL: @sext_in_reg_i16_to_i64
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 16
-; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31,
-; SI: BUFFER_STORE_DWORD
+; SI: S_ADD_I32 [[VAL:s[0-9]+]],
+; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: S_MOV_B32 {{s[0-9]+}}, -1
+; SI: BUFFER_STORE_DWORDX2
-; EG: BFE_INT
-; EG: ASHR
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
+; EG: ADD_INT
+; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
+; EG: ASHR [[RES_HI]]
+; EG-NOT: BFE_INT
+; EG: LSHR
+; EG: LSHR
+;; TODO Check address computation, using | with variables in {{}} does not work,
+;; also the _LO/_HI order might be different
define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%c = add i64 %a, %b
%shl = shl i64 %c, 48
@@ -95,6 +141,17 @@ define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun
; SI: S_ADD_I32 [[ADD:s[0-9]+]],
; SI: S_ASHR_I32 s{{[0-9]+}}, [[ADD]], 31
; SI: BUFFER_STORE_DWORDX2
+
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
+; EG-NOT: BFE_INT
+; EG: ADD_INT {{\*?}} [[RES_LO]]
+; EG: ASHR [[RES_HI]]
+; EG: ADD_INT
+; EG: LSHR
+; EG: LSHR
+;; TODO Check address computation, using | with variables in {{}} does not work,
+;; also the _LO/_HI order might be different
define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%c = add i64 %a, %b
%shl = shl i64 %c, 32
@@ -105,8 +162,8 @@ define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun
; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments.
; XFUNC-LABEL: @sext_in_reg_i8_to_v1i64
-; XSI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
-; XSI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31,
+; XSI: S_BFE_I32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288
+; XSI: S_ASHR_I32 {{v[0-9]+}}, [[EXTRACT]], 31
; XSI: BUFFER_STORE_DWORD
; XEG: BFE_INT
; XEG: ASHR
@@ -122,7 +179,13 @@ define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun
; SI-NOT: BFE
; SI: S_LSHL_B32 [[REG:s[0-9]+]], {{s[0-9]+}}, 6
; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG]], 7
+
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG-NOT: BFE
+; EG: ADD_INT
+; EG: LSHL
+; EG: ASHR [[RES]]
+; EG: LSHR {{\*?}} [[ADDR]]
define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%c = add i32 %a, %b
%x = shl i32 %c, 6
@@ -136,7 +199,15 @@ define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a,
; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG0]], 7
; SI: S_LSHL_B32 [[REG1:s[0-9]+]], {{s[0-9]}}, 6
; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG1]], 7
+
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
; EG-NOT: BFE
+; EG: ADD_INT
+; EG: LSHL
+; EG: ASHR [[RES]]
+; EG: LSHL
+; EG: ASHR [[RES]]
+; EG: LSHR {{\*?}} [[ADDR]]
define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
%c = add <2 x i32> %a, %b
%x = shl <2 x i32> %c, <i32 6, i32 6>
@@ -147,11 +218,14 @@ define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out
; FUNC-LABEL: @sext_in_reg_v2i1_to_v2i32
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1
+; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
; SI: BUFFER_STORE_DWORDX2
-; EG: BFE
-; EG: BFE
+
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
+; EG: BFE_INT [[RES]]
+; EG: BFE_INT [[RES]]
+; EG: LSHR {{\*?}} [[ADDR]]
define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
%c = add <2 x i32> %a, %b ; add to prevent folding into extload
%shl = shl <2 x i32> %c, <i32 31, i32 31>
@@ -161,16 +235,18 @@ define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %
}
; FUNC-LABEL: @sext_in_reg_v4i1_to_v4i32
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1
+; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
; SI: BUFFER_STORE_DWORDX4
-; EG: BFE
-; EG: BFE
-; EG: BFE
-; EG: BFE
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
+; EG: BFE_INT [[RES]]
+; EG: BFE_INT [[RES]]
+; EG: BFE_INT [[RES]]
+; EG: BFE_INT [[RES]]
+; EG: LSHR {{\*?}} [[ADDR]]
define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
%c = add <4 x i32> %a, %b ; add to prevent folding into extload
%shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
@@ -180,12 +256,14 @@ define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %
}
; FUNC-LABEL: @sext_in_reg_v2i8_to_v2i32
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
+; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
; SI: BUFFER_STORE_DWORDX2
-; EG: BFE
-; EG: BFE
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
+; EG: BFE_INT [[RES]]
+; EG: BFE_INT [[RES]]
+; EG: LSHR {{\*?}} [[ADDR]]
define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
%c = add <2 x i32> %a, %b ; add to prevent folding into extload
%shl = shl <2 x i32> %c, <i32 24, i32 24>
@@ -195,16 +273,18 @@ define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %
}
; FUNC-LABEL: @sext_in_reg_v4i8_to_v4i32
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
+; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
; SI: BUFFER_STORE_DWORDX4
-; EG: BFE
-; EG: BFE
-; EG: BFE
-; EG: BFE
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
+; EG: BFE_INT [[RES]]
+; EG: BFE_INT [[RES]]
+; EG: BFE_INT [[RES]]
+; EG: BFE_INT [[RES]]
+; EG: LSHR {{\*?}} [[ADDR]]
define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
%c = add <4 x i32> %a, %b ; add to prevent folding into extload
%shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
@@ -214,16 +294,18 @@ define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %
}
; FUNC-LABEL: @sext_in_reg_v2i16_to_v2i32
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
+; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
; SI: BUFFER_STORE_DWORDX2
-; EG: BFE
-; EG: BFE
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
+; EG: BFE_INT [[RES]]
+; EG: BFE_INT [[RES]]
+; EG: LSHR {{\*?}} [[ADDR]]
define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
%c = add <2 x i32> %a, %b ; add to prevent folding into extload
- %shl = shl <2 x i32> %c, <i32 24, i32 24>
- %ashr = ashr <2 x i32> %shl, <i32 24, i32 24>
+ %shl = shl <2 x i32> %c, <i32 16, i32 16>
+ %ashr = ashr <2 x i32> %shl, <i32 16, i32 16>
store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
ret void
}
@@ -252,8 +334,36 @@ define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind {
ret void
}
+; FUNC-LABEL: @vgpr_sext_in_reg_v4i8_to_v4i32
+; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
+; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
+; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
+; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
+define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ %loada = load <4 x i32> addrspace(1)* %a, align 16
+ %loadb = load <4 x i32> addrspace(1)* %b, align 16
+ %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
+ %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
+ %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
+ store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @vgpr_sext_in_reg_v4i16_to_v4i32
+; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
+; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
+define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+ %loada = load <4 x i32> addrspace(1)* %a, align 16
+ %loadb = load <4 x i32> addrspace(1)* %b, align 16
+ %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
+ %shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
+ %ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16>
+ store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
+ ret void
+}
+
; FIXME: The BFE should really be eliminated. I think it should happen
-; when computeMaskedBitsForTargetNode is implemented for imax.
+; when computeKnownBitsForTargetNode is implemented for imax.
; FUNC-LABEL: @sext_in_reg_to_illegal_type
; SI: BUFFER_LOAD_SBYTE
@@ -269,3 +379,146 @@ define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 ad
store i16 %tmp6, i16 addrspace(1)* %out, align 2
ret void
}
+
+declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
+
+; FUNC-LABEL: @bfe_0_width
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
+ %load = load i32 addrspace(1)* %ptr, align 4
+ %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_8_bfe_8
+; SI: V_BFE_I32
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
+ %load = load i32 addrspace(1)* %ptr, align 4
+ %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
+ %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
+ store i32 %bfe1, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @bfe_8_bfe_16
+; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
+; SI: S_ENDPGM
+define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
+ %load = load i32 addrspace(1)* %ptr, align 4
+ %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
+ %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone
+ store i32 %bfe1, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; This really should be folded into 1
+; FUNC-LABEL: @bfe_16_bfe_8
+; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
+ %load = load i32 addrspace(1)* %ptr, align 4
+ %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone
+ %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
+ store i32 %bfe1, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; Make sure there isn't a redundant BFE
+; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe
+; SI: S_SEXT_I32_I8 s{{[0-9]+}}, s{{[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %c = add i32 %a, %b ; add to prevent folding into extload
+ %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone
+ %shl = shl i32 %bfe, 24
+ %ashr = ashr i32 %shl, 24
+ store i32 %ashr, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe_wrong
+define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %c = add i32 %a, %b ; add to prevent folding into extload
+ %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone
+ %shl = shl i32 %bfe, 24
+ %ashr = ashr i32 %shl, 24
+ store i32 %ashr, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @sextload_i8_to_i32_bfe
+; SI: BUFFER_LOAD_SBYTE
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
+ %load = load i8 addrspace(1)* %ptr, align 1
+ %sext = sext i8 %load to i32
+ %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone
+ %shl = shl i32 %bfe, 24
+ %ashr = ashr i32 %shl, 24
+ store i32 %ashr, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @sextload_i8_to_i32_bfe_0:
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
+ %load = load i8 addrspace(1)* %ptr, align 1
+ %sext = sext i8 %load to i32
+ %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone
+ %shl = shl i32 %bfe, 24
+ %ashr = ashr i32 %shl, 24
+ store i32 %ashr, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_0:
+; SI-NOT: SHR
+; SI-NOT: SHL
+; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
+; SI: S_ENDPGM
+define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %shl = shl i32 %x, 31
+ %shr = ashr i32 %shl, 31
+ %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_1
+; SI: BUFFER_LOAD_DWORD
+; SI-NOT: SHL
+; SI-NOT: SHR
+; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
+; SI: S_ENDPGM
+define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %shl = shl i32 %x, 30
+ %shr = ashr i32 %shl, 30
+ %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @sext_in_reg_i2_bfe_offset_1:
+; SI: BUFFER_LOAD_DWORD
+; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
+; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
+; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
+; SI: S_ENDPGM
+define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %x = load i32 addrspace(1)* %in, align 4
+ %shl = shl i32 %x, 30
+ %shr = ashr i32 %shl, 30
+ %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2)
+ store i32 %bfe, i32 addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/sgpr-control-flow.ll b/test/CodeGen/R600/sgpr-control-flow.ll
new file mode 100644
index 0000000..06ad24d
--- /dev/null
+++ b/test/CodeGen/R600/sgpr-control-flow.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+;
+;
+; Most SALU instructions ignore control flow, so we need to make sure
+; they don't overwrite values from other blocks.
+
+; SI-NOT: S_ADD
+
+define void @sgpr_if_else(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+entry:
+ %0 = icmp eq i32 %a, 0
+ br i1 %0, label %if, label %else
+
+if:
+ %1 = add i32 %b, %c
+ br label %endif
+
+else:
+ %2 = add i32 %d, %e
+ br label %endif
+
+endif:
+ %3 = phi i32 [%1, %if], [%2, %else]
+ %4 = add i32 %3, %a
+ store i32 %4, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
index d74161b..9d8a623 100644
--- a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
+++ b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; Copy VGPR -> SGPR used twice as an instruction operand, which is then
; used in an REG_SEQUENCE that also needs to be handled.
diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/R600/sgpr-copy.ll
index 5472c1b..c581d86 100644
--- a/test/CodeGen/R600/sgpr-copy.ll
+++ b/test/CodeGen/R600/sgpr-copy.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
; This test checks that no VGPR to SGPR copies are created by the register
; allocator.
diff --git a/test/CodeGen/R600/si-annotate-cf-assertion.ll b/test/CodeGen/R600/si-annotate-cf-assertion.ll
index cd3ba2b..daa4667 100644
--- a/test/CodeGen/R600/si-annotate-cf-assertion.ll
+++ b/test/CodeGen/R600/si-annotate-cf-assertion.ll
@@ -1,6 +1,6 @@
; REQUIRES: asserts
; XFAIL: *
-; RUN: llc -march=r600 -mcpu=SI -asm-verbose=false < %s | FileCheck %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs-asm-verbose=false < %s | FileCheck %s
define void @test(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {
diff --git a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
new file mode 100644
index 0000000..d9f60ea
--- /dev/null
+++ b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
@@ -0,0 +1,36 @@
+; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+
+; 64-bit select was originally lowered with a build_pair, and this
+; could be simplified to 1 cndmask instead of 2, but that broken when
+; it started being implemented with a v2i32 build_vector and
+; bitcasting.
+define void @trunc_select_i64(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) {
+ %cmp = icmp eq i32 %c, 0
+ %select = select i1 %cmp, i64 %a, i64 %b
+ %trunc = trunc i64 %select to i32
+ store i32 %trunc, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: @trunc_load_alloca_i64:
+; SI: V_MOVRELS_B32
+; SI-NOT: V_MOVRELS_B32
+; SI: S_ENDPGM
+define void @trunc_load_alloca_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) {
+ %idx = add i32 %a, %b
+ %alloca = alloca i64, i32 4
+ %gep0 = getelementptr i64* %alloca, i64 0
+ %gep1 = getelementptr i64* %alloca, i64 1
+ %gep2 = getelementptr i64* %alloca, i64 2
+ %gep3 = getelementptr i64* %alloca, i64 3
+ store i64 24, i64* %gep0, align 8
+ store i64 9334, i64* %gep1, align 8
+ store i64 3935, i64* %gep2, align 8
+ store i64 9342, i64* %gep3, align 8
+ %gep = getelementptr i64* %alloca, i32 %idx
+ %load = load i64* %gep, align 8
+ %mask = and i64 %load, 4294967296
+ %add = add i64 %mask, -1
+ store i64 %add, i64 addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/smrd.ll b/test/CodeGen/R600/smrd.ll
index 43231df..dec6185 100644
--- a/test/CodeGen/R600/smrd.ll
+++ b/test/CodeGen/R600/smrd.ll
@@ -2,7 +2,7 @@
; SMRD load with an immediate offset.
; CHECK-LABEL: @smrd0
-; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 1 ; encoding: [0x01
+; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
%0 = getelementptr i32 addrspace(2)* %ptr, i64 1
@@ -13,7 +13,7 @@ entry:
; SMRD load with the largest possible immediate offset.
; CHECK-LABEL: @smrd1
-; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 255 ; encoding: [0xff
+; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
%0 = getelementptr i32 addrspace(2)* %ptr, i64 255
@@ -24,7 +24,7 @@ entry:
; SMRD load with an offset greater than the largest possible immediate.
; CHECK-LABEL: @smrd2
-; CHECK: S_MOV_B32 s[[OFFSET:[0-9]]], 1024
+; CHECK: S_MOV_B32 s[[OFFSET:[0-9]]], 0x400
; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
@@ -34,9 +34,27 @@ entry:
ret void
}
+; SMRD load with a 64-bit offset
+; CHECK-LABEL: @smrd3
+; CHECK-DAG: S_MOV_B32 s[[SHI:[0-9]+]], 4
+; CHECK-DAG: S_MOV_B32 s[[SLO:[0-9]+]], 0
+; FIXME: We don't need to copy these values to VGPRs
+; CHECK-DAG: V_MOV_B32_e32 v[[VHI:[0-9]+]], s[[SHI]]
+; CHECK-DAG: V_MOV_B32_e32 v[[VLO:[0-9]+]], s[[SLO]]
+; FIXME: We should be able to use S_LOAD_DWORD here
+; BUFFER_LOAD_DWORD v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] + v[[[VLO]]:[[VHI]]] + 0x0
+
+define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
+entry:
+ %0 = getelementptr i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
+ %1 = load i32 addrspace(2)* %0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
; SMRD load using the load.const intrinsic with an immediate offset
; CHECK-LABEL: @smrd_load_const0
-; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 4 ; encoding: [0x04
+; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
define void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
%20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
@@ -49,7 +67,7 @@ main_body:
; SMRD load using the load.const intrinsic with an offset greater largest possible
; immediate offset.
; CHECK-LABEL: @smrd_load_const1
-; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 255 ; encoding: [0xff
+; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
define void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
%20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
diff --git a/test/CodeGen/R600/store-v3i64.ll b/test/CodeGen/R600/store-v3i64.ll
index 58229f6..58d28b5 100644
--- a/test/CodeGen/R600/store-v3i64.ll
+++ b/test/CodeGen/R600/store-v3i64.ll
@@ -1,5 +1,5 @@
; XFAIL: *
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI
; SI-LABEL: @global_store_v3i64:
; SI: BUFFER_STORE_DWORDX4
diff --git a/test/CodeGen/R600/store-vector-ptrs.ll b/test/CodeGen/R600/store-vector-ptrs.ll
index 3af7d91..41c5edc 100644
--- a/test/CodeGen/R600/store-vector-ptrs.ll
+++ b/test/CodeGen/R600/store-vector-ptrs.ll
@@ -1,6 +1,6 @@
; REQUIRES: asserts
; XFAIL: *
-; RUN: llc -march=r600 -mcpu=SI < %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s
define void @store_vector_ptrs(<4 x i32*>* %out, <4 x [1024 x i32]*> %array) nounwind {
%p = getelementptr <4 x [1024 x i32]*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll
index a3c5331..c0c8ccc 100644
--- a/test/CodeGen/R600/store.ll
+++ b/test/CodeGen/R600/store.ll
@@ -177,6 +177,26 @@ entry:
ret void
}
+; FUNC-LABEL: @store_i64_i8
+; EG-CHECK: MEM_RAT MSKOR
+; SI-CHECK: BUFFER_STORE_BYTE
+define void @store_i64_i8(i8 addrspace(1)* %out, i64 %in) {
+entry:
+ %0 = trunc i64 %in to i8
+ store i8 %0, i8 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: @store_i64_i16
+; EG-CHECK: MEM_RAT MSKOR
+; SI-CHECK: BUFFER_STORE_SHORT
+define void @store_i64_i16(i16 addrspace(1)* %out, i64 %in) {
+entry:
+ %0 = trunc i64 %in to i16
+ store i16 %0, i16 addrspace(1)* %out
+ ret void
+}
+
;===------------------------------------------------------------------------===;
; Local Address Space
;===------------------------------------------------------------------------===;
@@ -272,6 +292,26 @@ entry:
ret void
}
+; FUNC-LABEL: @store_local_i64_i8
+; EG-CHECK: LDS_BYTE_WRITE
+; SI-CHECK: DS_WRITE_B8
+define void @store_local_i64_i8(i8 addrspace(3)* %out, i64 %in) {
+entry:
+ %0 = trunc i64 %in to i8
+ store i8 %0, i8 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: @store_local_i64_i16
+; EG-CHECK: LDS_SHORT_WRITE
+; SI-CHECK: DS_WRITE_B16
+define void @store_local_i64_i16(i16 addrspace(3)* %out, i64 %in) {
+entry:
+ %0 = trunc i64 %in to i16
+ store i16 %0, i16 addrspace(3)* %out
+ ret void
+}
+
; The stores in this function are combined by the optimizer to create a
; 64-bit store with 32-bit alignment. This is legal for SI and the legalizer
; should not try to split the 64-bit store back into 2 32-bit stores.
@@ -297,3 +337,29 @@ entry:
}
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+; When i128 was a legal type this program generated cannot select errors:
+
+; FUNC-LABEL: @i128-const-store
+; FIXME: We should be able to to this with one store instruction
+; EG-CHECK: STORE_RAW
+; EG-CHECK: STORE_RAW
+; EG-CHECK: STORE_RAW
+; EG-CHECK: STORE_RAW
+; CM-CHECK: STORE_DWORD
+; CM-CHECK: STORE_DWORD
+; CM-CHECK: STORE_DWORD
+; CM-CHECK: STORE_DWORD
+; SI: BUFFER_STORE_DWORDX2
+; SI: BUFFER_STORE_DWORDX2
+define void @i128-const-store(i32 addrspace(1)* %out) {
+entry:
+ store i32 1, i32 addrspace(1)* %out, align 4
+ %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %out, i64 1
+ store i32 1, i32 addrspace(1)* %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds i32 addrspace(1)* %out, i64 2
+ store i32 2, i32 addrspace(1)* %arrayidx4, align 4
+ %arrayidx6 = getelementptr inbounds i32 addrspace(1)* %out, i64 3
+ store i32 2, i32 addrspace(1)* %arrayidx6, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll
index 5fdd2b8..e321ed6 100644
--- a/test/CodeGen/R600/sub.ll
+++ b/test/CodeGen/R600/sub.ll
@@ -1,13 +1,12 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
-;EG-CHECK: @test2
-;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;FUNC-LABEL: @test2
+;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: @test2
-;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -18,17 +17,16 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
ret void
}
-;EG-CHECK: @test4
-;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;FUNC-LABEL: @test4
+;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI-CHECK: @test4
-;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -38,3 +36,24 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
}
+
+;FUNC_LABEL: @test5
+
+;EG-DAG: SETGE_UINT
+;EG-DAG: CNDE_INT
+;EG-DAG: SUB_INT
+;EG-DAG: SUB_INT
+;EG-DAG: SUB_INT
+
+;SI: S_XOR_B64
+;SI-DAG: S_ADD_I32
+;SI-DAG: S_ADDC_U32
+;SI-DAG: S_ADD_I32
+;SI-DAG: S_ADDC_U32
+
+define void @test5(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+ %0 = sub i64 %a, %b
+ store i64 %0, i64 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/trunc-store-i1.ll b/test/CodeGen/R600/trunc-store-i1.ll
index a888943..a3975c8 100644
--- a/test/CodeGen/R600/trunc-store-i1.ll
+++ b/test/CodeGen/R600/trunc-store-i1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; SI-LABEL: @global_truncstore_i32_to_i1
diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/R600/trunc.ll
index 8a759dc..31cdfcd 100644
--- a/test/CodeGen/R600/trunc.ll
+++ b/test/CodeGen/R600/trunc.ll
@@ -3,7 +3,7 @@
define void @trunc_i64_to_i32_store(i32 addrspace(1)* %out, i64 %in) {
; SI-LABEL: @trunc_i64_to_i32_store
-; SI: S_LOAD_DWORD s0, s[0:1], 11
+; SI: S_LOAD_DWORD s0, s[0:1], 0xb
; SI: V_MOV_B32_e32 v0, s0
; SI: BUFFER_STORE_DWORD v0
@@ -31,8 +31,9 @@ define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) {
; SI-LABEL: @trunc_shl_i64:
; SI: S_LOAD_DWORDX2 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}},
-; SI: V_ADD_I32_e32 v[[LO_ADD:[0-9]+]], s[[LO_SREG]],
-; SI: V_LSHL_B64 v{{\[}}[[LO_VREG:[0-9]+]]:{{[0-9]+\]}}, v{{\[}}[[LO_ADD]]:{{[0-9]+\]}}, 2
+; SI: S_ADD_I32 s[[LO_ADD:[0-9]+]], s[[LO_SREG]],
+; SI: S_LSHL_B64 s{{\[}}[[LO_SREG2:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_ADD]]:{{[0-9]+\]}}, 2
+; SI: V_MOV_B32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]]
; SI: BUFFER_STORE_DWORD v[[LO_VREG]],
define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) {
%aa = add i64 %a, 234 ; Prevent shrinking store.
diff --git a/test/CodeGen/R600/uaddo.ll b/test/CodeGen/R600/uaddo.ll
new file mode 100644
index 0000000..3b69687
--- /dev/null
+++ b/test/CodeGen/R600/uaddo.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+
+declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
+
+; SI-LABEL: @uaddo_i64_zext
+; SI: ADD
+; SI: ADDC
+; SI: ADDC
+define void @uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+ %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind
+ %val = extractvalue { i64, i1 } %uadd, 0
+ %carry = extractvalue { i64, i1 } %uadd, 1
+ %ext = zext i1 %carry to i64
+ %add2 = add i64 %val, %ext
+ store i64 %add2, i64 addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/test/CodeGen/R600/udivrem64.ll b/test/CodeGen/R600/udivrem64.ll
new file mode 100644
index 0000000..a71315a
--- /dev/null
+++ b/test/CodeGen/R600/udivrem64.ll
@@ -0,0 +1,82 @@
+;XUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
+
+;FUNC-LABEL: @test_udiv
+;EG: RECIP_UINT
+;EG: LSHL {{.*}}, 1,
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;SI: S_ENDPGM
+define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+ %result = udiv i64 %x, %y
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+;FUNC-LABEL: @test_urem
+;EG: RECIP_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: AND_INT {{.*}}, 1,
+;SI: S_ENDPGM
+define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+ %result = urem i64 %x, %y
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/uint_to_fp.f64.ll b/test/CodeGen/R600/uint_to_fp.f64.ll
new file mode 100644
index 0000000..75150c2
--- /dev/null
+++ b/test/CodeGen/R600/uint_to_fp.f64.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; SI-LABEL: @uint_to_fp_f64_i32
+; SI: V_CVT_F64_U32_e32
+define void @uint_to_fp_f64_i32(double addrspace(1)* %out, i32 %in) {
+ %cast = uitofp i32 %in to double
+ store double %cast, double addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll
index 2824ff8..4df69d1 100644
--- a/test/CodeGen/R600/unaligned-load-store.ll
+++ b/test/CodeGen/R600/unaligned-load-store.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; SI-LABEL: @unaligned_load_store_i32:
; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]]
diff --git a/test/CodeGen/R600/v_cndmask.ll b/test/CodeGen/R600/v_cndmask.ll
index f8e9655..84087ee 100644
--- a/test/CodeGen/R600/v_cndmask.ll
+++ b/test/CodeGen/R600/v_cndmask.ll
@@ -3,7 +3,8 @@
; SI: @v_cnd_nan
; SI: V_CNDMASK_B32_e64 v{{[0-9]}},
; SI-DAG: v{{[0-9]}}
-; SI-DAG: {{nan|#QNAN}}
+; All nan values are converted to 0xffffffff
+; SI-DAG: -1
define void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) {
entry:
%0 = icmp ne i32 %c, 0
diff --git a/test/CodeGen/R600/valu-i1.ll b/test/CodeGen/R600/valu-i1.ll
new file mode 100644
index 0000000..5d5e3ff
--- /dev/null
+++ b/test/CodeGen/R600/valu-i1.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s
+
+; Make sure the i1 values created by the cfg structurizer pass are
+; moved using VALU instructions
+; SI-NOT: S_MOV_B64 s[{{[0-9]:[0-9]}}], -1
+; SI: V_MOV_B32_e32 v{{[0-9]}}, -1
+define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) {
+entry:
+ switch i32 %a, label %default [
+ i32 0, label %case0
+ i32 1, label %case1
+ ]
+
+case0:
+ %arrayidx1 = getelementptr i32 addrspace(1)* %dst, i32 %b
+ store i32 0, i32 addrspace(1)* %arrayidx1, align 4
+ br label %end
+
+case1:
+ %arrayidx5 = getelementptr i32 addrspace(1)* %dst, i32 %b
+ store i32 1, i32 addrspace(1)* %arrayidx5, align 4
+ br label %end
+
+default:
+ %cmp8 = icmp eq i32 %a, 2
+ %arrayidx10 = getelementptr i32 addrspace(1)* %dst, i32 %b
+ br i1 %cmp8, label %if, label %else
+
+if:
+ store i32 2, i32 addrspace(1)* %arrayidx10, align 4
+ br label %end
+
+else:
+ store i32 3, i32 addrspace(1)* %arrayidx10, align 4
+ br label %end
+
+end:
+ ret void
+}
diff --git a/test/CodeGen/R600/work-item-intrinsics.ll b/test/CodeGen/R600/work-item-intrinsics.ll
index 9618d7f..90079b0 100644
--- a/test/CodeGen/R600/work-item-intrinsics.ll
+++ b/test/CodeGen/R600/work-item-intrinsics.ll
@@ -19,7 +19,7 @@ entry:
; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; R600-CHECK: MOV [[VAL]], KC0[0].Y
; SI-CHECK: @ngroups_y
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 1
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x1
; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
define void @ngroups_y (i32 addrspace(1)* %out) {
@@ -33,7 +33,7 @@ entry:
; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; R600-CHECK: MOV [[VAL]], KC0[0].Z
; SI-CHECK: @ngroups_z
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 2
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x2
; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
define void @ngroups_z (i32 addrspace(1)* %out) {
@@ -47,7 +47,7 @@ entry:
; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; R600-CHECK: MOV [[VAL]], KC0[0].W
; SI-CHECK: @global_size_x
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 3
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x3
; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
define void @global_size_x (i32 addrspace(1)* %out) {
@@ -61,7 +61,7 @@ entry:
; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; R600-CHECK: MOV [[VAL]], KC0[1].X
; SI-CHECK: @global_size_y
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 4
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x4
; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
define void @global_size_y (i32 addrspace(1)* %out) {
@@ -75,7 +75,7 @@ entry:
; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; R600-CHECK: MOV [[VAL]], KC0[1].Y
; SI-CHECK: @global_size_z
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 5
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x5
; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
define void @global_size_z (i32 addrspace(1)* %out) {
@@ -89,7 +89,7 @@ entry:
; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; R600-CHECK: MOV [[VAL]], KC0[1].Z
; SI-CHECK: @local_size_x
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 6
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x6
; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
define void @local_size_x (i32 addrspace(1)* %out) {
@@ -103,7 +103,7 @@ entry:
; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; R600-CHECK: MOV [[VAL]], KC0[1].W
; SI-CHECK: @local_size_y
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 7
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x7
; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
define void @local_size_y (i32 addrspace(1)* %out) {
@@ -117,7 +117,7 @@ entry:
; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; R600-CHECK: MOV [[VAL]], KC0[2].X
; SI-CHECK: @local_size_z
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 8
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x8
; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
define void @local_size_z (i32 addrspace(1)* %out) {
diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/R600/xor.ll
index 49ed12d..5a5c86d 100644
--- a/test/CodeGen/R600/xor.ll
+++ b/test/CodeGen/R600/xor.ll
@@ -72,3 +72,21 @@ define void @scalar_xor_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
store i32 %result, i32 addrspace(1)* %out
ret void
}
+
+; SI-CHECK-LABEL: @scalar_not_i32
+; SI-CHECK: S_NOT_B32
+define void @scalar_not_i32(i32 addrspace(1)* %out, i32 %a) {
+ %result = xor i32 %a, -1
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+; SI-CHECK-LABEL: @vector_not_i32
+; SI-CHECK: V_NOT_B32
+define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
+ %a = load i32 addrspace(1)* %in0
+ %b = load i32 addrspace(1)* %in1
+ %result = xor i32 %a, -1
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/zero_extend.ll b/test/CodeGen/R600/zero_extend.ll
index a114bfc..8585d4a 100644
--- a/test/CodeGen/R600/zero_extend.ll
+++ b/test/CodeGen/R600/zero_extend.ll
@@ -6,8 +6,9 @@
; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW
; SI-CHECK: @test
-; SI-CHECK: V_MOV_B32_e32 v[[ZERO:[0-9]]], 0
-; SI-CHECK: BUFFER_STORE_DWORDX2 v[0:[[ZERO]]{{\]}}
+; SI-CHECK: S_MOV_B32 [[ZERO:s[0-9]]], 0
+; SI-CHECK: V_MOV_B32_e32 v[[V_ZERO:[0-9]]], [[ZERO]]
+; SI-CHECK: BUFFER_STORE_DWORDX2 v[0:[[V_ZERO]]{{\]}}
define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
entry:
%0 = mul i32 %a, %b
@@ -26,3 +27,14 @@ entry:
store i32 %1, i32 addrspace(1)* %out
ret void
}
+
+; SI-CHECK-LABEL: @zext_i1_to_i64
+; SI-CHECK: V_CMP_EQ_I32
+; SI-CHECK: V_CNDMASK_B32
+; SI-CHECK: S_MOV_B32 s{{[0-9]+}}, 0
+define void @zext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %cmp = icmp eq i32 %a, %b
+ %ext = zext i1 %cmp to i64
+ store i64 %ext, i64 addrspace(1)* %out, align 8
+ ret void
+}