aboutsummaryrefslogtreecommitdiffstats
path: root/test/CodeGen/R600/cvt_f32_ubyte.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/R600/cvt_f32_ubyte.ll')
-rw-r--r--test/CodeGen/R600/cvt_f32_ubyte.ll39
1 files changed, 30 insertions, 9 deletions
diff --git a/test/CodeGen/R600/cvt_f32_ubyte.ll b/test/CodeGen/R600/cvt_f32_ubyte.ll
index 0d1db19..4d4bf93 100644
--- a/test/CodeGen/R600/cvt_f32_ubyte.ll
+++ b/test/CodeGen/R600/cvt_f32_ubyte.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}load_i8_to_f32:
; SI: buffer_load_ubyte [[LOADREG:v[0-9]+]],
@@ -22,7 +23,7 @@ define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* n
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <2 x i8> addrspace(1)* %in, align 1
+ %load = load <2 x i8> addrspace(1)* %in, align 2
%cvt = uitofp <2 x i8> %load to <2 x float>
store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16
ret void
@@ -36,18 +37,14 @@ define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8>
; SI-DAG: v_cvt_f32_ubyte0_e32
; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <3 x i8> addrspace(1)* %in, align 1
+ %load = load <3 x i8> addrspace(1)* %in, align 4
%cvt = uitofp <3 x i8> %load to <3 x float>
store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16
ret void
}
; SI-LABEL: {{^}}load_v4i8_to_v4f32:
-; We can't use buffer_load_dword here, because the load is byte aligned, and
-; buffer_load_dword requires dword alignment.
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: v_or_b32_e32 [[LOADREG:v[0-9]+]]
+; SI: buffer_load_dword [[LOADREG:v[0-9]+]]
; SI-NOT: bfe
; SI-NOT: lshr
; SI-DAG: v_cvt_f32_ubyte3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
@@ -56,6 +53,30 @@ define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8>
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
+ %load = load <4 x i8> addrspace(1)* %in, align 4
+ %cvt = uitofp <4 x i8> %load to <4 x float>
+ store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
+ ret void
+}
+
+; This should not be adding instructions to shift into the correct
+; position in the word for the component.
+
+; SI-LABEL: {{^}}load_v4i8_to_v4f32_unaligned:
+; SI: buffer_load_ubyte [[LOADREG3:v[0-9]+]]
+; SI: buffer_load_ubyte [[LOADREG2:v[0-9]+]]
+; SI: buffer_load_ubyte [[LOADREG1:v[0-9]+]]
+; SI: buffer_load_ubyte [[LOADREG0:v[0-9]+]]
+; SI-NOT: v_lshlrev_b32
+; SI-NOT: v_or_b32
+
+; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG0]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG1]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG2]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v[[HIRESULT:[0-9]+]], [[LOADREG3]]
+
+; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
+define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <4 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <4 x i8> %load to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
@@ -125,7 +146,7 @@ define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8>
; SI: buffer_store_dword
; SI: buffer_store_dword
define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <8 x i8> addrspace(1)* %in, align 1
+ %load = load <8 x i8> addrspace(1)* %in, align 8
%cvt = uitofp <8 x i8> %load to <8 x float>
store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16
ret void