diff options
author | Stephen Hines <srhines@google.com> | 2014-05-29 02:49:00 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-05-29 02:49:00 -0700 |
commit | dce4a407a24b04eebc6a376f8e62b41aaa7b071f (patch) | |
tree | dcebc53f2b182f145a2e659393bf9a0472cedf23 /test/CodeGen/R600/sext-in-reg.ll | |
parent | 220b921aed042f9e520c26cffd8282a94c66c3d5 (diff) | |
download | external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.zip external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.gz external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.bz2 |
Update LLVM for 3.5 rebase (r209712).
Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
Diffstat (limited to 'test/CodeGen/R600/sext-in-reg.ll')
-rw-r--r-- | test/CodeGen/R600/sext-in-reg.ll | 371 |
1 files changed, 312 insertions, 59 deletions
diff --git a/test/CodeGen/R600/sext-in-reg.ll b/test/CodeGen/R600/sext-in-reg.ll index eef3f07..1b02e4b 100644 --- a/test/CodeGen/R600/sext-in-reg.ll +++ b/test/CodeGen/R600/sext-in-reg.ll @@ -1,15 +1,18 @@ -; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc < %s -march=r600 -mcpu=cypress | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone ; FUNC-LABEL: @sext_in_reg_i1_i32 ; SI: S_LOAD_DWORD [[ARG:s[0-9]+]], -; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[ARG]], 0, 1 +; SI: S_BFE_I32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000 +; SI: V_MOV_B32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]] ; SI: BUFFER_STORE_DWORD [[EXTRACT]], -; EG: BFE_INT +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] +; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1 +; EG-NEXT: LSHR * [[ADDR]] define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) { %shl = shl i32 %in, 31 %sext = ashr i32 %shl, 31 @@ -19,10 +22,14 @@ define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) { ; FUNC-LABEL: @sext_in_reg_i8_to_i32 ; SI: S_ADD_I32 [[VAL:s[0-9]+]], -; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 8 -; SI: BUFFER_STORE_DWORD [[EXTRACT]], - -; EG: BFE_INT +; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]] +; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] +; SI: BUFFER_STORE_DWORD [[VEXTRACT]], + +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] +; EG: ADD_INT +; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal +; EG-NEXT: LSHR * [[ADDR]] define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %c = add i32 %a, %b ; add to prevent folding into extload %shl = shl i32 %c, 24 @@ -33,10 +40,14 @@ define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounw ; FUNC-LABEL: @sext_in_reg_i16_to_i32 ; SI: S_ADD_I32 [[VAL:s[0-9]+]], -; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 16 -; SI: BUFFER_STORE_DWORD [[EXTRACT]], - -; EG: BFE_INT +; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]] +; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] +; SI: BUFFER_STORE_DWORD [[VEXTRACT]], + +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] +; EG: ADD_INT +; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal +; EG-NEXT: LSHR * [[ADDR]] define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %c = add i32 %a, %b ; add to prevent folding into extload %shl = shl i32 %c, 16 @@ -47,10 +58,14 @@ define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) noun ; FUNC-LABEL: @sext_in_reg_i8_to_v1i32 ; SI: S_ADD_I32 [[VAL:s[0-9]+]], -; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 8 -; SI: BUFFER_STORE_DWORD [[EXTRACT]], - -; EG: BFE_INT +; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]] +; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] +; SI: BUFFER_STORE_DWORD [[VEXTRACT]], + +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] +; EG: ADD_INT +; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal +; EG-NEXT: LSHR * [[ADDR]] define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind { %c = add <1 x i32> %a, %b ; add to prevent folding into extload %shl = shl <1 x i32> %c, <i32 24> @@ -59,13 +74,35 @@ define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, ret void } +; FUNC-LABEL: @sext_in_reg_i1_to_i64 +; SI: S_ADD_I32 [[VAL:s[0-9]+]], +; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10000 +; SI: S_MOV_B32 {{s[0-9]+}}, -1 +; SI: BUFFER_STORE_DWORDX2 +define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %c = add i64 %a, %b + %shl = shl i64 %c, 63 + %ashr = ashr i64 %shl, 63 + store i64 %ashr, i64 addrspace(1)* %out, align 8 + ret void +} + ; FUNC-LABEL: @sext_in_reg_i8_to_i64 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, -; SI: BUFFER_STORE_DWORD +; SI: S_ADD_I32 [[VAL:s[0-9]+]], +; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]] +; SI: S_MOV_B32 {{s[0-9]+}}, -1 +; SI: BUFFER_STORE_DWORDX2 -; EG: BFE_INT -; EG: ASHR +; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]] +; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]] +; EG: ADD_INT +; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal +; EG: ASHR [[RES_HI]] +; EG-NOT: BFE_INT +; EG: LSHR +; EG: LSHR +;; TODO Check address computation, using | with variables in {{}} does not work, +;; also the _LO/_HI order might be different define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { %c = add i64 %a, %b %shl = shl i64 %c, 56 @@ -75,12 +112,21 @@ define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounw } ; FUNC-LABEL: @sext_in_reg_i16_to_i64 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 16 -; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, -; SI: BUFFER_STORE_DWORD +; SI: S_ADD_I32 [[VAL:s[0-9]+]], +; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]] +; SI: S_MOV_B32 {{s[0-9]+}}, -1 +; SI: BUFFER_STORE_DWORDX2 -; EG: BFE_INT -; EG: ASHR +; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]] +; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]] +; EG: ADD_INT +; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal +; EG: ASHR [[RES_HI]] +; EG-NOT: BFE_INT +; EG: LSHR +; EG: LSHR +;; TODO Check address computation, using | with variables in {{}} does not work, +;; also the _LO/_HI order might be different define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { %c = add i64 %a, %b %shl = shl i64 %c, 48 @@ -95,6 +141,17 @@ define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun ; SI: S_ADD_I32 [[ADD:s[0-9]+]], ; SI: S_ASHR_I32 s{{[0-9]+}}, [[ADD]], 31 ; SI: BUFFER_STORE_DWORDX2 + +; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]] +; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]] +; EG-NOT: BFE_INT +; EG: ADD_INT {{\*?}} [[RES_LO]] +; EG: ASHR [[RES_HI]] +; EG: ADD_INT +; EG: LSHR +; EG: LSHR +;; TODO Check address computation, using | with variables in {{}} does not work, +;; also the _LO/_HI order might be different define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { %c = add i64 %a, %b %shl = shl i64 %c, 32 @@ -105,8 +162,8 @@ define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun ; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments. ; XFUNC-LABEL: @sext_in_reg_i8_to_v1i64 -; XSI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; XSI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, +; XSI: S_BFE_I32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288 +; XSI: S_ASHR_I32 {{v[0-9]+}}, [[EXTRACT]], 31 ; XSI: BUFFER_STORE_DWORD ; XEG: BFE_INT ; XEG: ASHR @@ -122,7 +179,13 @@ define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun ; SI-NOT: BFE ; SI: S_LSHL_B32 [[REG:s[0-9]+]], {{s[0-9]+}}, 6 ; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG]], 7 + +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] ; EG-NOT: BFE +; EG: ADD_INT +; EG: LSHL +; EG: ASHR [[RES]] +; EG: LSHR {{\*?}} [[ADDR]] define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %c = add i32 %a, %b %x = shl i32 %c, 6 @@ -136,7 +199,15 @@ define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, ; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG0]], 7 ; SI: S_LSHL_B32 [[REG1:s[0-9]+]], {{s[0-9]}}, 6 ; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG1]], 7 + +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] ; EG-NOT: BFE +; EG: ADD_INT +; EG: LSHL +; EG: ASHR [[RES]] +; EG: LSHL +; EG: ASHR [[RES]] +; EG: LSHR {{\*?}} [[ADDR]] define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { %c = add <2 x i32> %a, %b %x = shl <2 x i32> %c, <i32 6, i32 6> @@ -147,11 +218,14 @@ define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out ; FUNC-LABEL: @sext_in_reg_v2i1_to_v2i32 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1 +; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 +; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 ; SI: BUFFER_STORE_DWORDX2 -; EG: BFE -; EG: BFE + +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] +; EG: BFE_INT [[RES]] +; EG: BFE_INT [[RES]] +; EG: LSHR {{\*?}} [[ADDR]] define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { %c = add <2 x i32> %a, %b ; add to prevent folding into extload %shl = shl <2 x i32> %c, <i32 31, i32 31> @@ -161,16 +235,18 @@ define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> % } ; FUNC-LABEL: @sext_in_reg_v4i1_to_v4i32 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1 +; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 +; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 +; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 +; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 ; SI: BUFFER_STORE_DWORDX4 -; EG: BFE -; EG: BFE -; EG: BFE -; EG: BFE +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] +; EG: BFE_INT [[RES]] +; EG: BFE_INT [[RES]] +; EG: BFE_INT [[RES]] +; EG: BFE_INT [[RES]] +; EG: LSHR {{\*?}} [[ADDR]] define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind { %c = add <4 x i32> %a, %b ; add to prevent folding into extload %shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31> @@ -180,12 +256,14 @@ define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> % } ; FUNC-LABEL: @sext_in_reg_v2i8_to_v2i32 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 +; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} +; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} ; SI: BUFFER_STORE_DWORDX2 -; EG: BFE -; EG: BFE +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] +; EG: BFE_INT [[RES]] +; EG: BFE_INT [[RES]] +; EG: LSHR {{\*?}} [[ADDR]] define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { %c = add <2 x i32> %a, %b ; add to prevent folding into extload %shl = shl <2 x i32> %c, <i32 24, i32 24> @@ -195,16 +273,18 @@ define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> % } ; FUNC-LABEL: @sext_in_reg_v4i8_to_v4i32 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 +; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} +; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} +; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} +; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}} ; SI: BUFFER_STORE_DWORDX4 -; EG: BFE -; EG: BFE -; EG: BFE -; EG: BFE +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] +; EG: BFE_INT [[RES]] +; EG: BFE_INT [[RES]] +; EG: BFE_INT [[RES]] +; EG: BFE_INT [[RES]] +; EG: LSHR {{\*?}} [[ADDR]] define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind { %c = add <4 x i32> %a, %b ; add to prevent folding into extload %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24> @@ -214,16 +294,18 @@ define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> % } ; FUNC-LABEL: @sext_in_reg_v2i16_to_v2i32 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 +; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}} +; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}} ; SI: BUFFER_STORE_DWORDX2 -; EG: BFE -; EG: BFE +; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] +; EG: BFE_INT [[RES]] +; EG: BFE_INT [[RES]] +; EG: LSHR {{\*?}} [[ADDR]] define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { %c = add <2 x i32> %a, %b ; add to prevent folding into extload - %shl = shl <2 x i32> %c, <i32 24, i32 24> - %ashr = ashr <2 x i32> %shl, <i32 24, i32 24> + %shl = shl <2 x i32> %c, <i32 16, i32 16> + %ashr = ashr <2 x i32> %shl, <i32 16, i32 16> store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8 ret void } @@ -252,8 +334,36 @@ define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind { ret void } +; FUNC-LABEL: @vgpr_sext_in_reg_v4i8_to_v4i32 +; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 +; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 +; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 +; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 +define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind { + %loada = load <4 x i32> addrspace(1)* %a, align 16 + %loadb = load <4 x i32> addrspace(1)* %b, align 16 + %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload + %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24> + %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24> + store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @vgpr_sext_in_reg_v4i16_to_v4i32 +; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16 +; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16 +define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind { + %loada = load <4 x i32> addrspace(1)* %a, align 16 + %loadb = load <4 x i32> addrspace(1)* %b, align 16 + %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload + %shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16> + %ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16> + store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 + ret void +} + ; FIXME: The BFE should really be eliminated. I think it should happen -; when computeMaskedBitsForTargetNode is implemented for imax. +; when computeKnownBitsForTargetNode is implemented for imax. ; FUNC-LABEL: @sext_in_reg_to_illegal_type ; SI: BUFFER_LOAD_SBYTE @@ -269,3 +379,146 @@ define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 ad store i16 %tmp6, i16 addrspace(1)* %out, align 2 ret void } + +declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone + +; FUNC-LABEL: @bfe_0_width +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %load = load i32 addrspace(1)* %ptr, align 4 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_8_bfe_8 +; SI: V_BFE_I32 +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %load = load i32 addrspace(1)* %ptr, align 4 + %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone + %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone + store i32 %bfe1, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @bfe_8_bfe_16 +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 +; SI: S_ENDPGM +define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %load = load i32 addrspace(1)* %ptr, align 4 + %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone + %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone + store i32 %bfe1, i32 addrspace(1)* %out, align 4 + ret void +} + +; This really should be folded into 1 +; FUNC-LABEL: @bfe_16_bfe_8 +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 +; SI-NOT: BFE +; SI: S_ENDPGM +define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %load = load i32 addrspace(1)* %ptr, align 4 + %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone + %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone + store i32 %bfe1, i32 addrspace(1)* %out, align 4 + ret void +} + +; Make sure there isn't a redundant BFE +; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe +; SI: S_SEXT_I32_I8 s{{[0-9]+}}, s{{[0-9]+}} +; SI-NOT: BFE +; SI: S_ENDPGM +define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %c = add i32 %a, %b ; add to prevent folding into extload + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone + %shl = shl i32 %bfe, 24 + %ashr = ashr i32 %shl, 24 + store i32 %ashr, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe_wrong +define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %c = add i32 %a, %b ; add to prevent folding into extload + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone + %shl = shl i32 %bfe, 24 + %ashr = ashr i32 %shl, 24 + store i32 %ashr, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sextload_i8_to_i32_bfe +; SI: BUFFER_LOAD_SBYTE +; SI-NOT: BFE +; SI: S_ENDPGM +define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind { + %load = load i8 addrspace(1)* %ptr, align 1 + %sext = sext i8 %load to i32 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone + %shl = shl i32 %bfe, 24 + %ashr = ashr i32 %shl, 24 + store i32 %ashr, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sextload_i8_to_i32_bfe_0: +; SI-NOT: BFE +; SI: S_ENDPGM +define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind { + %load = load i8 addrspace(1)* %ptr, align 1 + %sext = sext i8 %load to i32 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone + %shl = shl i32 %bfe, 24 + %ashr = ashr i32 %shl, 24 + store i32 %ashr, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_0: +; SI-NOT: SHR +; SI-NOT: SHL +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1 +; SI: S_ENDPGM +define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 31 + %shr = ashr i32 %shl, 31 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_1 +; SI: BUFFER_LOAD_DWORD +; SI-NOT: SHL +; SI-NOT: SHR +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1 +; SI: S_ENDPGM +define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 30 + %shr = ashr i32 %shl, 30 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sext_in_reg_i2_bfe_offset_1: +; SI: BUFFER_LOAD_DWORD +; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}} +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}} +; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2 +; SI: S_ENDPGM +define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %x = load i32 addrspace(1)* %in, align 4 + %shl = shl i32 %x, 30 + %shr = ashr i32 %shl, 30 + %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2) + store i32 %bfe, i32 addrspace(1)* %out, align 4 + ret void +} |