diff options
author | Daniel Sanders <daniel.sanders@imgtec.com> | 2013-10-30 15:20:38 +0000 |
---|---|---|
committer | Daniel Sanders <daniel.sanders@imgtec.com> | 2013-10-30 15:20:38 +0000 |
commit | c385709d8397ca1535481c04564b67d07c66c619 (patch) | |
tree | 64403f5d389d7395fc1768eeb99c98c3df4c65d1 /test/CodeGen/Mips/msa | |
parent | f853a034a1fdccd194da04ca1e2e1aa8bcbd16b4 (diff) | |
download | external_llvm-c385709d8397ca1535481c04564b67d07c66c619.zip external_llvm-c385709d8397ca1535481c04564b67d07c66c619.tar.gz external_llvm-c385709d8397ca1535481c04564b67d07c66c619.tar.bz2 |
[mips][msa] Added support for matching bmnz, bmnzi, bmz, and bmzi from normal IR (i.e. not intrinsics)
Also corrected the definition of the intrinsics for these instructions (the
result register is also the first operand), and added intrinsics for bsel and
bseli to clang (they already existed in the backend).
These four operations are mostly equivalent to bsel, and bseli (the difference
is which operand is tied to the result). As a result some of the tests changed
as described below.
bitwise.ll:
- bsel.v test adapted so that the mask is unknown at compile-time. This stops
it emitting bmnzi.b instead of the intended bsel.v.
- The bseli.b test now tests the right thing. Namely the case when one of the
values is an uimm8, rather than when the condition is a uimm8 (which is
covered by bmnzi.b)
compare.ll:
- bsel.v tests now (correctly) emits bmnz.v instead of bsel.v because this
is the same operation (see MSA.txt).
i8.ll
- CHECK-DAG-ized test.
- bmzi.b test now (correctly) emits equivalent bmnzi.b with swapped operands
because this is the same operation (see MSA.txt).
- bseli.b still emits bseli.b though because the immediate makes it
distinguishable from bmnzi.b.
vec.ll:
- CHECK-DAG-ized test.
- bmz.v tests now (correctly) emits bmnz.v with swapped operands (see
MSA.txt).
- bsel.v tests now (correctly) emits bmnz.v with swapped operands (see
MSA.txt).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193693 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/Mips/msa')
-rw-r--r-- | test/CodeGen/Mips/msa/bitwise.ll | 51 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/compare.ll | 10 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/i8.ll | 64 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/vec.ll | 336 |
4 files changed, 299 insertions, 162 deletions
diff --git a/test/CodeGen/Mips/msa/bitwise.ll b/test/CodeGen/Mips/msa/bitwise.ll index d0b13f6..1ec373b 100644 --- a/test/CodeGen/Mips/msa/bitwise.ll +++ b/test/CodeGen/Mips/msa/bitwise.ll @@ -972,29 +972,56 @@ define void @ctlz_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { ; CHECK: .size ctlz_v2i64 } -define void @bsel_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { +define void @bsel_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %m) nounwind { ; CHECK: bsel_v16i8: %1 = load <16 x i8>* %a ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) %2 = load <16 x i8>* %b ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) - %3 = and <16 x i8> %1, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, - i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6> - %4 = and <16 x i8> %2, <i8 249, i8 249, i8 249, i8 249, - i8 249, i8 249, i8 249, i8 249, - i8 249, i8 249, i8 249, i8 249, - i8 249, i8 249, i8 249, i8 249> - %5 = or <16 x i8> %3, %4 - ; CHECK-DAG: ldi.b [[R3:\$w[0-9]+]], 6 - ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]] - store <16 x i8> %5, <16 x i8>* %c - ; CHECK-DAG: st.b [[R3]], 0($4) + %3 = load <16 x i8>* %m + ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7) + %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1> + %5 = and <16 x i8> %1, %3 + %6 = and <16 x i8> %2, %4 + %7 = or <16 x i8> %5, %6 + ; bmnz is the same operation + ; CHECK-DAG: bmnz.v [[R1]], [[R2]], [[R3]] + store <16 x i8> %7, <16 x i8>* %c + ; CHECK-DAG: st.b [[R1]], 0($4) ret void ; CHECK: .size bsel_v16i8 } +define void @bsel_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %m) nounwind { + ; CHECK: bsel_v16i8_i: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %m + ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($6) + %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1> + %4 = and <16 x i8> %1, %3 + %5 = and <16 x i8> <i8 6, i8 6, i8 6, i8 6, + i8 6, i8 6, i8 6, i8 6, + i8 6, i8 6, i8 6, i8 6, + i8 6, i8 6, i8 6, i8 6>, %2 + %6 = or <16 x i8> %4, %5 + ; CHECK-DAG: bseli.b [[R3]], [[R1]], 6 + store <16 x i8> %6, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size bsel_v16i8_i +} + define void @bsel_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { ; CHECK: bsel_v8i16: diff --git a/test/CodeGen/Mips/msa/compare.ll b/test/CodeGen/Mips/msa/compare.ll index e45e849..a7c704e 100644 --- a/test/CodeGen/Mips/msa/compare.ll +++ b/test/CodeGen/Mips/msa/compare.ll @@ -653,9 +653,10 @@ define void @bsel_s_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b, %4 = icmp sgt <16 x i8> %1, %2 ; CHECK-DAG: clt_s.b [[R4:\$w[0-9]+]], [[R2]], [[R1]] %5 = select <16 x i1> %4, <16 x i8> %1, <16 x i8> %3 - ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]] + ; bmnz.v is the same operation + ; CHECK-DAG: bmnz.v [[R3]], [[R1]], [[R4]] store <16 x i8> %5, <16 x i8>* %d - ; CHECK-DAG: st.b [[R4]], 0($4) + ; CHECK-DAG: st.b [[R3]], 0($4) ret void ; CHECK: .size bsel_s_v16i8 @@ -737,9 +738,10 @@ define void @bsel_u_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b, %4 = icmp ugt <16 x i8> %1, %2 ; CHECK-DAG: clt_u.b [[R4:\$w[0-9]+]], [[R2]], [[R1]] %5 = select <16 x i1> %4, <16 x i8> %1, <16 x i8> %3 - ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]] + ; bmnz.v is the same operation + ; CHECK-DAG: bmnz.v [[R3]], [[R1]], [[R4]] store <16 x i8> %5, <16 x i8>* %d - ; CHECK-DAG: st.b [[R4]], 0($4) + ; CHECK-DAG: st.b [[R3]], 0($4) ret void ; CHECK: .size bsel_u_v16i8 diff --git a/test/CodeGen/Mips/msa/i8.ll b/test/CodeGen/Mips/msa/i8.ll index 1406588..f3e8dfc 100644 --- a/test/CodeGen/Mips/msa/i8.ll +++ b/test/CodeGen/Mips/msa/i8.ll @@ -20,64 +20,80 @@ declare <16 x i8> @llvm.mips.andi.b(<16 x i8>, i32) nounwind ; CHECK: andi.b ; CHECK: st.b ; CHECK: .size llvm_mips_andi_b_test -; -@llvm_mips_bmnzi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 + +@llvm_mips_bmnzi_b_ARG1 = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 +@llvm_mips_bmnzi_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 @llvm_mips_bmnzi_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 define void @llvm_mips_bmnzi_b_test() nounwind { entry: %0 = load <16 x i8>* @llvm_mips_bmnzi_b_ARG1 - %1 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, i32 25) - store <16 x i8> %1, <16 x i8>* @llvm_mips_bmnzi_b_RES + %1 = load <16 x i8>* @llvm_mips_bmnzi_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 25) + store <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES ret void } -declare <16 x i8> @llvm.mips.bmnzi.b(<16 x i8>, i32) nounwind +declare <16 x i8> @llvm.mips.bmnzi.b(<16 x i8>, <16 x i8>, i32) nounwind ; CHECK: llvm_mips_bmnzi_b_test: -; CHECK: ld.b -; CHECK: bmnzi.b -; CHECK: st.b +; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmnzi_b_ARG1)( +; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmnzi_b_ARG2)( +; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]]) +; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]]) +; CHECK-DAG: bmnzi.b [[R3]], [[R4]], 25 +; CHECK-DAG: st.b [[R3]], 0( ; CHECK: .size llvm_mips_bmnzi_b_test -; -@llvm_mips_bmzi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 + +@llvm_mips_bmzi_b_ARG1 = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 +@llvm_mips_bmzi_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 @llvm_mips_bmzi_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 define void @llvm_mips_bmzi_b_test() nounwind { entry: %0 = load <16 x i8>* @llvm_mips_bmzi_b_ARG1 - %1 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, i32 25) - store <16 x i8> %1, <16 x i8>* @llvm_mips_bmzi_b_RES + %1 = load <16 x i8>* @llvm_mips_bmzi_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 25) + store <16 x i8> %2, <16 x i8>* @llvm_mips_bmzi_b_RES ret void } -declare <16 x i8> @llvm.mips.bmzi.b(<16 x i8>, i32) nounwind +declare <16 x i8> @llvm.mips.bmzi.b(<16 x i8>, <16 x i8>, i32) nounwind ; CHECK: llvm_mips_bmzi_b_test: -; CHECK: ld.b -; CHECK: bmzi.b -; CHECK: st.b +; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmzi_b_ARG1)( +; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmzi_b_ARG2)( +; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]]) +; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]]) +; bmnzi.b is the same as bmzi.b with ws and wd_in swapped +; CHECK-DAG: bmnzi.b [[R4]], [[R3]], 25 +; CHECK-DAG: st.b [[R4]], 0( ; CHECK: .size llvm_mips_bmzi_b_test -; -@llvm_mips_bseli_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 + +@llvm_mips_bseli_b_ARG1 = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 +@llvm_mips_bseli_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 @llvm_mips_bseli_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 define void @llvm_mips_bseli_b_test() nounwind { entry: %0 = load <16 x i8>* @llvm_mips_bseli_b_ARG1 - %1 = tail call <16 x i8> @llvm.mips.bseli.b(<16 x i8> %0, <16 x i8> %0, i32 25) - store <16 x i8> %1, <16 x i8>* @llvm_mips_bseli_b_RES + %1 = load <16 x i8>* @llvm_mips_bseli_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.bseli.b(<16 x i8> %0, <16 x i8> %1, i32 25) + store <16 x i8> %2, <16 x i8>* @llvm_mips_bseli_b_RES ret void } declare <16 x i8> @llvm.mips.bseli.b(<16 x i8>, <16 x i8>, i32) nounwind ; CHECK: llvm_mips_bseli_b_test: -; CHECK: ld.b -; CHECK: bseli.b -; CHECK: st.b +; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bseli_b_ARG1)( +; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bseli_b_ARG2)( +; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]]) +; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]]) +; CHECK-DAG: bseli.b [[R3]], [[R4]], 25 +; CHECK-DAG: st.b [[R3]], 0( ; CHECK: .size llvm_mips_bseli_b_test -; + @llvm_mips_nori_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 @llvm_mips_nori_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 diff --git a/test/CodeGen/Mips/msa/vec.ll b/test/CodeGen/Mips/msa/vec.ll index c26144e..5bddf5a 100644 --- a/test/CodeGen/Mips/msa/vec.ll +++ b/test/CodeGen/Mips/msa/vec.ll @@ -163,280 +163,372 @@ entry: ; @llvm_mips_bmnz_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 @llvm_mips_bmnz_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_bmnz_v_b_ARG3 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 @llvm_mips_bmnz_v_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 define void @llvm_mips_bmnz_v_b_test() nounwind { entry: %0 = load <16 x i8>* @llvm_mips_bmnz_v_b_ARG1 %1 = load <16 x i8>* @llvm_mips_bmnz_v_b_ARG2 - %2 = bitcast <16 x i8> %0 to <16 x i8> - %3 = bitcast <16 x i8> %1 to <16 x i8> - %4 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %2, <16 x i8> %3) - %5 = bitcast <16 x i8> %4 to <16 x i8> - store <16 x i8> %5, <16 x i8>* @llvm_mips_bmnz_v_b_RES + %2 = load <16 x i8>* @llvm_mips_bmnz_v_b_ARG3 + %3 = bitcast <16 x i8> %0 to <16 x i8> + %4 = bitcast <16 x i8> %1 to <16 x i8> + %5 = bitcast <16 x i8> %2 to <16 x i8> + %6 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5) + %7 = bitcast <16 x i8> %6 to <16 x i8> + store <16 x i8> %7, <16 x i8>* @llvm_mips_bmnz_v_b_RES ret void } ; ANYENDIAN: llvm_mips_bmnz_v_b_test: -; ANYENDIAN: ld.b -; ANYENDIAN: ld.b -; ANYENDIAN: bmnz.v -; ANYENDIAN: st.b +; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmnz_v_b_ARG1)( +; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmnz_v_b_ARG2)( +; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmnz_v_b_ARG3)( +; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]]) +; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]]) +; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]]) +; ANYENDIAN-DAG: bmnz.v [[R4]], [[R5]], [[R6]] +; ANYENDIAN-DAG: st.b [[R4]], 0( ; ANYENDIAN: .size llvm_mips_bmnz_v_b_test -; + @llvm_mips_bmnz_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 @llvm_mips_bmnz_v_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_bmnz_v_h_ARG3 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 @llvm_mips_bmnz_v_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 define void @llvm_mips_bmnz_v_h_test() nounwind { entry: %0 = load <8 x i16>* @llvm_mips_bmnz_v_h_ARG1 %1 = load <8 x i16>* @llvm_mips_bmnz_v_h_ARG2 - %2 = bitcast <8 x i16> %0 to <16 x i8> - %3 = bitcast <8 x i16> %1 to <16 x i8> - %4 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %2, <16 x i8> %3) - %5 = bitcast <16 x i8> %4 to <8 x i16> - store <8 x i16> %5, <8 x i16>* @llvm_mips_bmnz_v_h_RES + %2 = load <8 x i16>* @llvm_mips_bmnz_v_h_ARG3 + %3 = bitcast <8 x i16> %0 to <16 x i8> + %4 = bitcast <8 x i16> %1 to <16 x i8> + %5 = bitcast <8 x i16> %2 to <16 x i8> + %6 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5) + %7 = bitcast <16 x i8> %6 to <8 x i16> + store <8 x i16> %7, <8 x i16>* @llvm_mips_bmnz_v_h_RES ret void } ; ANYENDIAN: llvm_mips_bmnz_v_h_test: -; ANYENDIAN: ld.b -; ANYENDIAN: ld.b -; ANYENDIAN: bmnz.v -; ANYENDIAN: st.b +; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmnz_v_h_ARG1)( +; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmnz_v_h_ARG2)( +; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmnz_v_h_ARG3)( +; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]]) +; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]]) +; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]]) +; ANYENDIAN-DAG: bmnz.v [[R4]], [[R5]], [[R6]] +; ANYENDIAN-DAG: st.b [[R4]], 0( ; ANYENDIAN: .size llvm_mips_bmnz_v_h_test -; + @llvm_mips_bmnz_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 @llvm_mips_bmnz_v_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_bmnz_v_w_ARG3 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 @llvm_mips_bmnz_v_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 define void @llvm_mips_bmnz_v_w_test() nounwind { entry: %0 = load <4 x i32>* @llvm_mips_bmnz_v_w_ARG1 %1 = load <4 x i32>* @llvm_mips_bmnz_v_w_ARG2 - %2 = bitcast <4 x i32> %0 to <16 x i8> - %3 = bitcast <4 x i32> %1 to <16 x i8> - %4 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %2, <16 x i8> %3) - %5 = bitcast <16 x i8> %4 to <4 x i32> - store <4 x i32> %5, <4 x i32>* @llvm_mips_bmnz_v_w_RES + %2 = load <4 x i32>* @llvm_mips_bmnz_v_w_ARG3 + %3 = bitcast <4 x i32> %0 to <16 x i8> + %4 = bitcast <4 x i32> %1 to <16 x i8> + %5 = bitcast <4 x i32> %2 to <16 x i8> + %6 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5) + %7 = bitcast <16 x i8> %6 to <4 x i32> + store <4 x i32> %7, <4 x i32>* @llvm_mips_bmnz_v_w_RES ret void } ; ANYENDIAN: llvm_mips_bmnz_v_w_test: -; ANYENDIAN: ld.b -; ANYENDIAN: ld.b -; ANYENDIAN: bmnz.v -; ANYENDIAN: st.b +; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmnz_v_w_ARG1)( +; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmnz_v_w_ARG2)( +; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmnz_v_w_ARG3)( +; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]]) +; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]]) +; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]]) +; ANYENDIAN-DAG: bmnz.v [[R4]], [[R5]], [[R6]] +; ANYENDIAN-DAG: st.b [[R4]], 0( ; ANYENDIAN: .size llvm_mips_bmnz_v_w_test -; + @llvm_mips_bmnz_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 @llvm_mips_bmnz_v_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_bmnz_v_d_ARG3 = global <2 x i64> <i64 0, i64 1>, align 16 @llvm_mips_bmnz_v_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 define void @llvm_mips_bmnz_v_d_test() nounwind { entry: %0 = load <2 x i64>* @llvm_mips_bmnz_v_d_ARG1 %1 = load <2 x i64>* @llvm_mips_bmnz_v_d_ARG2 - %2 = bitcast <2 x i64> %0 to <16 x i8> - %3 = bitcast <2 x i64> %1 to <16 x i8> - %4 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %2, <16 x i8> %3) - %5 = bitcast <16 x i8> %4 to <2 x i64> - store <2 x i64> %5, <2 x i64>* @llvm_mips_bmnz_v_d_RES + %2 = load <2 x i64>* @llvm_mips_bmnz_v_d_ARG3 + %3 = bitcast <2 x i64> %0 to <16 x i8> + %4 = bitcast <2 x i64> %1 to <16 x i8> + %5 = bitcast <2 x i64> %2 to <16 x i8> + %6 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5) + %7 = bitcast <16 x i8> %6 to <2 x i64> + store <2 x i64> %7, <2 x i64>* @llvm_mips_bmnz_v_d_RES ret void } ; ANYENDIAN: llvm_mips_bmnz_v_d_test: -; ANYENDIAN: ld.b -; ANYENDIAN: ld.b -; ANYENDIAN: bmnz.v -; ANYENDIAN: st.b +; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmnz_v_d_ARG1)( +; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmnz_v_d_ARG2)( +; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmnz_v_d_ARG3)( +; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]]) +; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]]) +; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]]) +; ANYENDIAN-DAG: bmnz.v [[R4]], [[R5]], [[R6]] +; ANYENDIAN-DAG: st.b [[R4]], 0( ; ANYENDIAN: .size llvm_mips_bmnz_v_d_test -; + @llvm_mips_bmz_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 @llvm_mips_bmz_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_bmz_v_b_ARG3 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 @llvm_mips_bmz_v_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 define void @llvm_mips_bmz_v_b_test() nounwind { entry: %0 = load <16 x i8>* @llvm_mips_bmz_v_b_ARG1 %1 = load <16 x i8>* @llvm_mips_bmz_v_b_ARG2 - %2 = bitcast <16 x i8> %0 to <16 x i8> - %3 = bitcast <16 x i8> %1 to <16 x i8> - %4 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %2, <16 x i8> %3) - %5 = bitcast <16 x i8> %4 to <16 x i8> - store <16 x i8> %5, <16 x i8>* @llvm_mips_bmz_v_b_RES + %2 = load <16 x i8>* @llvm_mips_bmz_v_b_ARG3 + %3 = bitcast <16 x i8> %0 to <16 x i8> + %4 = bitcast <16 x i8> %1 to <16 x i8> + %5 = bitcast <16 x i8> %2 to <16 x i8> + %6 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5) + %7 = bitcast <16 x i8> %6 to <16 x i8> + store <16 x i8> %7, <16 x i8>* @llvm_mips_bmz_v_b_RES ret void } ; ANYENDIAN: llvm_mips_bmz_v_b_test: -; ANYENDIAN: ld.b -; ANYENDIAN: ld.b -; ANYENDIAN: bmz.v -; ANYENDIAN: st.b +; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmz_v_b_ARG1)( +; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmz_v_b_ARG2)( +; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmz_v_b_ARG3)( +; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]]) +; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]]) +; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]]) +; bmnz.v is the same as bmz.v with ws and wd_in swapped +; ANYENDIAN-DAG: bmnz.v [[R5]], [[R4]], [[R6]] +; ANYENDIAN-DAG: st.b [[R5]], 0( ; ANYENDIAN: .size llvm_mips_bmz_v_b_test -; + @llvm_mips_bmz_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 @llvm_mips_bmz_v_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_bmz_v_h_ARG3 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 @llvm_mips_bmz_v_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 define void @llvm_mips_bmz_v_h_test() nounwind { entry: %0 = load <8 x i16>* @llvm_mips_bmz_v_h_ARG1 %1 = load <8 x i16>* @llvm_mips_bmz_v_h_ARG2 - %2 = bitcast <8 x i16> %0 to <16 x i8> - %3 = bitcast <8 x i16> %1 to <16 x i8> - %4 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %2, <16 x i8> %3) - %5 = bitcast <16 x i8> %4 to <8 x i16> - store <8 x i16> %5, <8 x i16>* @llvm_mips_bmz_v_h_RES + %2 = load <8 x i16>* @llvm_mips_bmz_v_h_ARG3 + %3 = bitcast <8 x i16> %0 to <16 x i8> + %4 = bitcast <8 x i16> %1 to <16 x i8> + %5 = bitcast <8 x i16> %2 to <16 x i8> + %6 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5) + %7 = bitcast <16 x i8> %6 to <8 x i16> + store <8 x i16> %7, <8 x i16>* @llvm_mips_bmz_v_h_RES ret void } ; ANYENDIAN: llvm_mips_bmz_v_h_test: -; ANYENDIAN: ld.b -; ANYENDIAN: ld.b -; ANYENDIAN: bmz.v -; ANYENDIAN: st.b +; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmz_v_h_ARG1)( +; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmz_v_h_ARG2)( +; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmz_v_h_ARG3)( +; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]]) +; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]]) +; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]]) +; bmnz.v is the same as bmz.v with ws and wd_in swapped +; ANYENDIAN-DAG: bmnz.v [[R5]], [[R4]], [[R6]] +; ANYENDIAN-DAG: st.b [[R5]], 0( ; ANYENDIAN: .size llvm_mips_bmz_v_h_test -; + @llvm_mips_bmz_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 @llvm_mips_bmz_v_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_bmz_v_w_ARG3 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 @llvm_mips_bmz_v_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 define void @llvm_mips_bmz_v_w_test() nounwind { entry: %0 = load <4 x i32>* @llvm_mips_bmz_v_w_ARG1 %1 = load <4 x i32>* @llvm_mips_bmz_v_w_ARG2 - %2 = bitcast <4 x i32> %0 to <16 x i8> - %3 = bitcast <4 x i32> %1 to <16 x i8> - %4 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %2, <16 x i8> %3) - %5 = bitcast <16 x i8> %4 to <4 x i32> - store <4 x i32> %5, <4 x i32>* @llvm_mips_bmz_v_w_RES + %2 = load <4 x i32>* @llvm_mips_bmz_v_w_ARG3 + %3 = bitcast <4 x i32> %0 to <16 x i8> + %4 = bitcast <4 x i32> %1 to <16 x i8> + %5 = bitcast <4 x i32> %2 to <16 x i8> + %6 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5) + %7 = bitcast <16 x i8> %6 to <4 x i32> + store <4 x i32> %7, <4 x i32>* @llvm_mips_bmz_v_w_RES ret void } ; ANYENDIAN: llvm_mips_bmz_v_w_test: -; ANYENDIAN: ld.b -; ANYENDIAN: ld.b -; ANYENDIAN: bmz.v -; ANYENDIAN: st.b +; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmz_v_w_ARG1)( +; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmz_v_w_ARG2)( +; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmz_v_w_ARG3)( +; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]]) +; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]]) +; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]]) +; bmnz.v is the same as bmz.v with ws and wd_in swapped +; ANYENDIAN-DAG: bmnz.v [[R5]], [[R4]], [[R6]] +; ANYENDIAN-DAG: st.b [[R5]], 0( ; ANYENDIAN: .size llvm_mips_bmz_v_w_test -; + @llvm_mips_bmz_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 @llvm_mips_bmz_v_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_bmz_v_d_ARG3 = global <2 x i64> <i64 0, i64 1>, align 16 @llvm_mips_bmz_v_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 define void @llvm_mips_bmz_v_d_test() nounwind { entry: %0 = load <2 x i64>* @llvm_mips_bmz_v_d_ARG1 %1 = load <2 x i64>* @llvm_mips_bmz_v_d_ARG2 - %2 = bitcast <2 x i64> %0 to <16 x i8> - %3 = bitcast <2 x i64> %1 to <16 x i8> - %4 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %2, <16 x i8> %3) - %5 = bitcast <16 x i8> %4 to <2 x i64> - store <2 x i64> %5, <2 x i64>* @llvm_mips_bmz_v_d_RES + %2 = load <2 x i64>* @llvm_mips_bmz_v_d_ARG3 + %3 = bitcast <2 x i64> %0 to <16 x i8> + %4 = bitcast <2 x i64> %1 to <16 x i8> + %5 = bitcast <2 x i64> %2 to <16 x i8> + %6 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5) + %7 = bitcast <16 x i8> %6 to <2 x i64> + store <2 x i64> %7, <2 x i64>* @llvm_mips_bmz_v_d_RES ret void } ; ANYENDIAN: llvm_mips_bmz_v_d_test: -; ANYENDIAN: ld.b -; ANYENDIAN: ld.b -; ANYENDIAN: bmz.v -; ANYENDIAN: st.b +; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmz_v_d_ARG1)( +; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmz_v_d_ARG2)( +; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmz_v_d_ARG3)( +; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]]) +; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]]) +; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]]) +; bmnz.v is the same as bmz.v with ws and wd_in swapped +; ANYENDIAN-DAG: bmnz.v [[R5]], [[R4]], [[R6]] +; ANYENDIAN-DAG: st.b [[R5]], 0( ; ANYENDIAN: .size llvm_mips_bmz_v_d_test -; + @llvm_mips_bsel_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 @llvm_mips_bsel_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_bsel_v_b_ARG3 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 @llvm_mips_bsel_v_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 define void @llvm_mips_bsel_v_b_test() nounwind { entry: %0 = load <16 x i8>* @llvm_mips_bsel_v_b_ARG1 %1 = load <16 x i8>* @llvm_mips_bsel_v_b_ARG2 - %2 = bitcast <16 x i8> %0 to <16 x i8> - %3 = bitcast <16 x i8> %1 to <16 x i8> - %4 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %2, <16 x i8> %2, <16 x i8> %3) - %5 = bitcast <16 x i8> %4 to <16 x i8> - store <16 x i8> %5, <16 x i8>* @llvm_mips_bsel_v_b_RES + %2 = load <16 x i8>* @llvm_mips_bsel_v_b_ARG3 + %3 = bitcast <16 x i8> %0 to <16 x i8> + %4 = bitcast <16 x i8> %1 to <16 x i8> + %5 = bitcast <16 x i8> %2 to <16 x i8> + %6 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5) + %7 = bitcast <16 x i8> %6 to <16 x i8> + store <16 x i8> %7, <16 x i8>* @llvm_mips_bsel_v_b_RES ret void } ; ANYENDIAN: llvm_mips_bsel_v_b_test: -; ANYENDIAN: ld.b -; ANYENDIAN: ld.b -; ANYENDIAN: bsel.v -; ANYENDIAN: st.b +; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bsel_v_b_ARG1)( +; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bsel_v_b_ARG2)( +; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bsel_v_b_ARG3)( +; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]]) +; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]]) +; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]]) +; bmnz.v is the same as bsel.v with wt and wd_in swapped +; ANYENDIAN-DAG: bmnz.v [[R6]], [[R5]], [[R4]] +; ANYENDIAN-DAG: st.b [[R6]], 0( ; ANYENDIAN: .size llvm_mips_bsel_v_b_test -; + @llvm_mips_bsel_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 @llvm_mips_bsel_v_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_bsel_v_h_ARG3 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 @llvm_mips_bsel_v_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 define void @llvm_mips_bsel_v_h_test() nounwind { entry: %0 = load <8 x i16>* @llvm_mips_bsel_v_h_ARG1 %1 = load <8 x i16>* @llvm_mips_bsel_v_h_ARG2 - %2 = bitcast <8 x i16> %0 to <16 x i8> - %3 = bitcast <8 x i16> %1 to <16 x i8> - %4 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %2, <16 x i8> %2, <16 x i8> %3) - %5 = bitcast <16 x i8> %4 to <8 x i16> - store <8 x i16> %5, <8 x i16>* @llvm_mips_bsel_v_h_RES + %2 = load <8 x i16>* @llvm_mips_bsel_v_h_ARG3 + %3 = bitcast <8 x i16> %0 to <16 x i8> + %4 = bitcast <8 x i16> %1 to <16 x i8> + %5 = bitcast <8 x i16> %2 to <16 x i8> + %6 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5) + %7 = bitcast <16 x i8> %6 to <8 x i16> + store <8 x i16> %7, <8 x i16>* @llvm_mips_bsel_v_h_RES ret void } ; ANYENDIAN: llvm_mips_bsel_v_h_test: -; ANYENDIAN: ld.b -; ANYENDIAN: ld.b -; ANYENDIAN: bsel.v -; ANYENDIAN: st.b +; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bsel_v_h_ARG1)( +; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bsel_v_h_ARG2)( +; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bsel_v_h_ARG3)( +; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]]) +; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]]) +; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]]) +; bmnz.v is the same as bsel.v with wt and wd_in swapped +; ANYENDIAN-DAG: bmnz.v [[R6]], [[R5]], [[R4]] +; ANYENDIAN-DAG: st.b [[R6]], 0( ; ANYENDIAN: .size llvm_mips_bsel_v_h_test -; + @llvm_mips_bsel_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 @llvm_mips_bsel_v_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_bsel_v_w_ARG3 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 @llvm_mips_bsel_v_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 define void @llvm_mips_bsel_v_w_test() nounwind { entry: %0 = load <4 x i32>* @llvm_mips_bsel_v_w_ARG1 %1 = load <4 x i32>* @llvm_mips_bsel_v_w_ARG2 - %2 = bitcast <4 x i32> %0 to <16 x i8> - %3 = bitcast <4 x i32> %1 to <16 x i8> - %4 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %2, <16 x i8> %2, <16 x i8> %3) - %5 = bitcast <16 x i8> %4 to <4 x i32> - store <4 x i32> %5, <4 x i32>* @llvm_mips_bsel_v_w_RES + %2 = load <4 x i32>* @llvm_mips_bsel_v_w_ARG3 + %3 = bitcast <4 x i32> %0 to <16 x i8> + %4 = bitcast <4 x i32> %1 to <16 x i8> + %5 = bitcast <4 x i32> %2 to <16 x i8> + %6 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5) + %7 = bitcast <16 x i8> %6 to <4 x i32> + store <4 x i32> %7, <4 x i32>* @llvm_mips_bsel_v_w_RES ret void } ; ANYENDIAN: llvm_mips_bsel_v_w_test: -; ANYENDIAN: ld.b -; ANYENDIAN: ld.b -; ANYENDIAN: bsel.v -; ANYENDIAN: st.b +; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bsel_v_w_ARG1)( +; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bsel_v_w_ARG2)( +; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bsel_v_w_ARG3)( +; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]]) +; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]]) +; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]]) +; bmnz.v is the same as bsel.v with wt and wd_in swapped +; ANYENDIAN-DAG: bmnz.v [[R6]], [[R5]], [[R4]] +; ANYENDIAN-DAG: st.b [[R6]], 0( ; ANYENDIAN: .size llvm_mips_bsel_v_w_test -; + @llvm_mips_bsel_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 @llvm_mips_bsel_v_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_bsel_v_d_ARG3 = global <2 x i64> <i64 0, i64 1>, align 16 @llvm_mips_bsel_v_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 define void @llvm_mips_bsel_v_d_test() nounwind { entry: %0 = load <2 x i64>* @llvm_mips_bsel_v_d_ARG1 %1 = load <2 x i64>* @llvm_mips_bsel_v_d_ARG2 - %2 = bitcast <2 x i64> %0 to <16 x i8> - %3 = bitcast <2 x i64> %1 to <16 x i8> - %4 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %2, <16 x i8> %2, <16 x i8> %3) - %5 = bitcast <16 x i8> %4 to <2 x i64> - store <2 x i64> %5, <2 x i64>* @llvm_mips_bsel_v_d_RES + %2 = load <2 x i64>* @llvm_mips_bsel_v_d_ARG3 + %3 = bitcast <2 x i64> %0 to <16 x i8> + %4 = bitcast <2 x i64> %1 to <16 x i8> + %5 = bitcast <2 x i64> %2 to <16 x i8> + %6 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5) + %7 = bitcast <16 x i8> %6 to <2 x i64> + store <2 x i64> %7, <2 x i64>* @llvm_mips_bsel_v_d_RES ret void } ; ANYENDIAN: llvm_mips_bsel_v_d_test: -; ANYENDIAN: ld.b -; ANYENDIAN: ld.b -; ANYENDIAN: bsel.v -; ANYENDIAN: st.b +; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bsel_v_d_ARG1)( +; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bsel_v_d_ARG2)( +; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bsel_v_d_ARG3)( +; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]]) +; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]]) +; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]]) +; bmnz.v is the same as bsel.v with wt and wd_in swapped +; ANYENDIAN-DAG: bmnz.v [[R6]], [[R5]], [[R4]] +; ANYENDIAN-DAG: st.b [[R6]], 0( ; ANYENDIAN: .size llvm_mips_bsel_v_d_test -; + @llvm_mips_nor_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 @llvm_mips_nor_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 @llvm_mips_nor_v_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 @@ -846,8 +938,8 @@ entry: ; CHECK: .size xor_v_d_test ; declare <16 x i8> @llvm.mips.and.v(<16 x i8>, <16 x i8>) nounwind -declare <16 x i8> @llvm.mips.bmnz.v(<16 x i8>, <16 x i8>) nounwind -declare <16 x i8> @llvm.mips.bmz.v(<16 x i8>, <16 x i8>) nounwind +declare <16 x i8> @llvm.mips.bmnz.v(<16 x i8>, <16 x i8>, <16 x i8>) nounwind +declare <16 x i8> @llvm.mips.bmz.v(<16 x i8>, <16 x i8>, <16 x i8>) nounwind declare <16 x i8> @llvm.mips.bsel.v(<16 x i8>, <16 x i8>, <16 x i8>) nounwind declare <16 x i8> @llvm.mips.nor.v(<16 x i8>, <16 x i8>) nounwind declare <16 x i8> @llvm.mips.or.v(<16 x i8>, <16 x i8>) nounwind |