diff options
author | Jack Carter <jack.carter@imgtec.com> | 2013-08-15 12:24:57 +0000 |
---|---|---|
committer | Jack Carter <jack.carter@imgtec.com> | 2013-08-15 12:24:57 +0000 |
commit | e2a9376b1bd2204ea6f56a35b762e28e0ef4e35a (patch) | |
tree | b6c7bd299df4286fc2e0d42078c1a54fd38e9327 /test | |
parent | d36e1efa4b674b6b224995657e04a1c6145f70db (diff) | |
download | external_llvm-e2a9376b1bd2204ea6f56a35b762e28e0ef4e35a.zip external_llvm-e2a9376b1bd2204ea6f56a35b762e28e0ef4e35a.tar.gz external_llvm-e2a9376b1bd2204ea6f56a35b762e28e0ef4e35a.tar.bz2 |
[Mips][msa] Added the simple builtins (add_a to dpsub[su], ilvev to ldi)
Includes:
add_a, adds_[asu], addv, addvi, andi.b, asub_[su].[bhwd], aver?_[su]_[bhwd],
bclr, bclri, bins[lr], bins[lr]i, bmnzi, bmzi, bneg, bnegi, bseli, bset, bseti,
c(eq|ne), c(eq|ne)i, cl[et]_[su], cl[et]i_[su], copy_[su].[bhw], div_[su],
dotp_[su], dpadd_[su], dpsub_[su], ilvev, ilvl, ilvod, ilvr, insv, insve,
ldi
Patch by Daniel Sanders
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188457 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/Mips/msa/2r_vector_scalar.ll | 59 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/3r-a.ll | 970 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/3r-b.ll | 442 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/3r-c.ll | 442 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/3r-d.ll | 354 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/3r-i.ll | 354 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/3r_4r_widen.ll | 302 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/elm_copy.ll | 116 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/elm_insv.ll | 68 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/i5-a.ll | 78 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/i5-b.ll | 382 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/i5-c.ll | 382 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/i8.ll | 78 |
13 files changed, 4027 insertions, 0 deletions
diff --git a/test/CodeGen/Mips/msa/2r_vector_scalar.ll b/test/CodeGen/Mips/msa/2r_vector_scalar.ll new file mode 100644 index 0000000..9d0765a --- /dev/null +++ b/test/CodeGen/Mips/msa/2r_vector_scalar.ll @@ -0,0 +1,59 @@ +; RUN: llc -march=mips -mattr=+msa < %s | FileCheck %s + +@llvm_mips_fill_b_ARG1 = global i32 23, align 16 +@llvm_mips_fill_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_fill_b_test() nounwind { +entry: + %0 = load i32* @llvm_mips_fill_b_ARG1 + %1 = tail call <16 x i8> @llvm.mips.fill.b(i32 %0) + store <16 x i8> %1, <16 x i8>* @llvm_mips_fill_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.fill.b(i32) nounwind + +; CHECK: llvm_mips_fill_b_test: +; CHECK: lw +; CHECK: fill.b +; CHECK: st.b +; CHECK: .size llvm_mips_fill_b_test +; +@llvm_mips_fill_h_ARG1 = global i32 23, align 16 +@llvm_mips_fill_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_fill_h_test() nounwind { +entry: + %0 = load i32* @llvm_mips_fill_h_ARG1 + %1 = tail call <8 x i16> @llvm.mips.fill.h(i32 %0) + store <8 x i16> %1, <8 x i16>* @llvm_mips_fill_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.fill.h(i32) nounwind + +; CHECK: llvm_mips_fill_h_test: +; CHECK: lw +; CHECK: fill.h +; CHECK: st.h +; CHECK: .size llvm_mips_fill_h_test +; +@llvm_mips_fill_w_ARG1 = global i32 23, align 16 +@llvm_mips_fill_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_fill_w_test() nounwind { +entry: + %0 = load i32* @llvm_mips_fill_w_ARG1 + %1 = tail call <4 x i32> @llvm.mips.fill.w(i32 %0) + store <4 x i32> %1, <4 x i32>* @llvm_mips_fill_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.fill.w(i32) nounwind + +; CHECK: llvm_mips_fill_w_test: +; CHECK: lw +; CHECK: fill.w +; CHECK: st.w +; CHECK: .size llvm_mips_fill_w_test +; diff --git a/test/CodeGen/Mips/msa/3r-a.ll b/test/CodeGen/Mips/msa/3r-a.ll new file mode 100644 index 0000000..3cdeae4 --- /dev/null +++ b/test/CodeGen/Mips/msa/3r-a.ll @@ -0,0 +1,970 @@ +; RUN: llc -march=mips -mattr=+msa < %s | FileCheck %s + +@llvm_mips_add_a_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_add_a_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_add_a_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_add_a_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_add_a_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_add_a_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.add.a.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_add_a_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.add.a.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_add_a_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: add_a.b +; CHECK: st.b +; CHECK: .size llvm_mips_add_a_b_test +; +@llvm_mips_add_a_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_add_a_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_add_a_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_add_a_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_add_a_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_add_a_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.add.a.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_add_a_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.add.a.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_add_a_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: add_a.h +; CHECK: st.h +; CHECK: .size llvm_mips_add_a_h_test +; +@llvm_mips_add_a_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_add_a_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_add_a_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_add_a_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_add_a_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_add_a_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.add.a.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_add_a_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.add.a.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_add_a_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: add_a.w +; CHECK: st.w +; CHECK: .size llvm_mips_add_a_w_test +; +@llvm_mips_add_a_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_add_a_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_add_a_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_add_a_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_add_a_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_add_a_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.add.a.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_add_a_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.add.a.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_add_a_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: add_a.d +; CHECK: st.d +; CHECK: .size llvm_mips_add_a_d_test +; +@llvm_mips_adds_a_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_adds_a_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_adds_a_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_adds_a_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_adds_a_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_adds_a_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.adds.a.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_adds_a_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.adds.a.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_adds_a_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: adds_a.b +; CHECK: st.b +; CHECK: .size llvm_mips_adds_a_b_test +; +@llvm_mips_adds_a_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_adds_a_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_adds_a_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_adds_a_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_adds_a_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_adds_a_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.adds.a.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_adds_a_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.adds.a.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_adds_a_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: adds_a.h +; CHECK: st.h +; CHECK: .size llvm_mips_adds_a_h_test +; +@llvm_mips_adds_a_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_adds_a_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_adds_a_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_adds_a_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_adds_a_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_adds_a_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.adds.a.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_adds_a_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.adds.a.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_adds_a_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: adds_a.w +; CHECK: st.w +; CHECK: .size llvm_mips_adds_a_w_test +; +@llvm_mips_adds_a_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_adds_a_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_adds_a_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_adds_a_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_adds_a_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_adds_a_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.adds.a.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_adds_a_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.adds.a.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_adds_a_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: adds_a.d +; CHECK: st.d +; CHECK: .size llvm_mips_adds_a_d_test +; +@llvm_mips_adds_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_adds_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_adds_s_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_adds_s_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_adds_s_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_adds_s_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.adds.s.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_adds_s_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.adds.s.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_adds_s_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: adds_s.b +; CHECK: st.b +; CHECK: .size llvm_mips_adds_s_b_test +; +@llvm_mips_adds_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_adds_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_adds_s_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_adds_s_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_adds_s_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_adds_s_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.adds.s.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_adds_s_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.adds.s.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_adds_s_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: adds_s.h +; CHECK: st.h +; CHECK: .size llvm_mips_adds_s_h_test +; +@llvm_mips_adds_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_adds_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_adds_s_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_adds_s_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_adds_s_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_adds_s_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.adds.s.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_adds_s_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.adds.s.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_adds_s_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: adds_s.w +; CHECK: st.w +; CHECK: .size llvm_mips_adds_s_w_test +; +@llvm_mips_adds_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_adds_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_adds_s_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_adds_s_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_adds_s_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_adds_s_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.adds.s.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_adds_s_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.adds.s.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_adds_s_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: adds_s.d +; CHECK: st.d +; CHECK: .size llvm_mips_adds_s_d_test +; +@llvm_mips_adds_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_adds_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_adds_u_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_adds_u_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_adds_u_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_adds_u_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.adds.u.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_adds_u_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.adds.u.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_adds_u_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: adds_u.b +; CHECK: st.b +; CHECK: .size llvm_mips_adds_u_b_test +; +@llvm_mips_adds_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_adds_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_adds_u_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_adds_u_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_adds_u_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_adds_u_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.adds.u.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_adds_u_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.adds.u.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_adds_u_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: adds_u.h +; CHECK: st.h +; CHECK: .size llvm_mips_adds_u_h_test +; +@llvm_mips_adds_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_adds_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_adds_u_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_adds_u_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_adds_u_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_adds_u_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.adds.u.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_adds_u_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.adds.u.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_adds_u_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: adds_u.w +; CHECK: st.w +; CHECK: .size llvm_mips_adds_u_w_test +; +@llvm_mips_adds_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_adds_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_adds_u_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_adds_u_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_adds_u_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_adds_u_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.adds.u.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_adds_u_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.adds.u.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_adds_u_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: adds_u.d +; CHECK: st.d +; CHECK: .size llvm_mips_adds_u_d_test +; +@llvm_mips_addv_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_addv_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_addv_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_addv_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_addv_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_addv_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_addv_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.addv.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_addv_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: addv.b +; CHECK: st.b +; CHECK: .size llvm_mips_addv_b_test +; +@llvm_mips_addv_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_addv_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_addv_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_addv_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_addv_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_addv_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_addv_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.addv.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_addv_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: addv.h +; CHECK: st.h +; CHECK: .size llvm_mips_addv_h_test +; +@llvm_mips_addv_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_addv_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_addv_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_addv_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_addv_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_addv_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_addv_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.addv.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_addv_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: addv.w +; CHECK: st.w +; CHECK: .size llvm_mips_addv_w_test +; +@llvm_mips_addv_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_addv_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_addv_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_addv_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_addv_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_addv_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_addv_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.addv.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_addv_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: addv.d +; CHECK: st.d +; CHECK: .size llvm_mips_addv_d_test +; +@llvm_mips_asub_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_asub_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_asub_s_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_asub_s_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_asub_s_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_asub_s_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.asub.s.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_asub_s_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.asub.s.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_asub_s_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: asub_s.b +; CHECK: st.b +; CHECK: .size llvm_mips_asub_s_b_test +; +@llvm_mips_asub_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_asub_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_asub_s_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_asub_s_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_asub_s_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_asub_s_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.asub.s.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_asub_s_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.asub.s.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_asub_s_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: asub_s.h +; CHECK: st.h +; CHECK: .size llvm_mips_asub_s_h_test +; +@llvm_mips_asub_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_asub_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_asub_s_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_asub_s_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_asub_s_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_asub_s_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.asub.s.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_asub_s_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.asub.s.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_asub_s_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: asub_s.w +; CHECK: st.w +; CHECK: .size llvm_mips_asub_s_w_test +; +@llvm_mips_asub_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_asub_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_asub_s_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_asub_s_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_asub_s_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_asub_s_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.asub.s.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_asub_s_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.asub.s.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_asub_s_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: asub_s.d +; CHECK: st.d +; CHECK: .size llvm_mips_asub_s_d_test +; +@llvm_mips_asub_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_asub_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_asub_u_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_asub_u_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_asub_u_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_asub_u_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.asub.u.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_asub_u_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.asub.u.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_asub_u_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: asub_u.b +; CHECK: st.b +; CHECK: .size llvm_mips_asub_u_b_test +; +@llvm_mips_asub_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_asub_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_asub_u_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_asub_u_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_asub_u_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_asub_u_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.asub.u.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_asub_u_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.asub.u.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_asub_u_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: asub_u.h +; CHECK: st.h +; CHECK: .size llvm_mips_asub_u_h_test +; +@llvm_mips_asub_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_asub_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_asub_u_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_asub_u_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_asub_u_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_asub_u_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.asub.u.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_asub_u_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.asub.u.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_asub_u_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: asub_u.w +; CHECK: st.w +; CHECK: .size llvm_mips_asub_u_w_test +; +@llvm_mips_asub_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_asub_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_asub_u_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_asub_u_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_asub_u_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_asub_u_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.asub.u.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_asub_u_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.asub.u.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_asub_u_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: asub_u.d +; CHECK: st.d +; CHECK: .size llvm_mips_asub_u_d_test +; +@llvm_mips_ave_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_ave_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_ave_s_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_ave_s_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_ave_s_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_ave_s_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.ave.s.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_ave_s_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.ave.s.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_ave_s_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: ave_s.b +; CHECK: st.b +; CHECK: .size llvm_mips_ave_s_b_test +; +@llvm_mips_ave_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_ave_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_ave_s_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_ave_s_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_ave_s_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_ave_s_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.ave.s.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_ave_s_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.ave.s.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_ave_s_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: ave_s.h +; CHECK: st.h +; CHECK: .size llvm_mips_ave_s_h_test +; +@llvm_mips_ave_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_ave_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_ave_s_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_ave_s_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_ave_s_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_ave_s_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.ave.s.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_ave_s_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.ave.s.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_ave_s_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: ave_s.w +; CHECK: st.w +; CHECK: .size llvm_mips_ave_s_w_test +; +@llvm_mips_ave_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_ave_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_ave_s_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_ave_s_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_ave_s_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_ave_s_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.ave.s.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_ave_s_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.ave.s.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_ave_s_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: ave_s.d +; CHECK: st.d +; CHECK: .size llvm_mips_ave_s_d_test +; +@llvm_mips_ave_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_ave_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_ave_u_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_ave_u_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_ave_u_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_ave_u_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.ave.u.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_ave_u_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.ave.u.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_ave_u_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: ave_u.b +; CHECK: st.b +; CHECK: .size llvm_mips_ave_u_b_test +; +@llvm_mips_ave_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_ave_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_ave_u_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_ave_u_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_ave_u_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_ave_u_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.ave.u.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_ave_u_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.ave.u.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_ave_u_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: ave_u.h +; CHECK: st.h +; CHECK: .size llvm_mips_ave_u_h_test +; +@llvm_mips_ave_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_ave_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_ave_u_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_ave_u_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_ave_u_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_ave_u_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.ave.u.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_ave_u_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.ave.u.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_ave_u_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: ave_u.w +; CHECK: st.w +; CHECK: .size llvm_mips_ave_u_w_test +; +@llvm_mips_ave_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_ave_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_ave_u_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_ave_u_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_ave_u_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_ave_u_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.ave.u.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_ave_u_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.ave.u.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_ave_u_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: ave_u.d +; CHECK: st.d +; CHECK: .size llvm_mips_ave_u_d_test +; +@llvm_mips_aver_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_aver_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_aver_s_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_aver_s_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_aver_s_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_aver_s_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.aver.s.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_aver_s_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.aver.s.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_aver_s_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: aver_s.b +; CHECK: st.b +; CHECK: .size llvm_mips_aver_s_b_test +; +@llvm_mips_aver_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_aver_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_aver_s_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_aver_s_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_aver_s_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_aver_s_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.aver.s.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_aver_s_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.aver.s.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_aver_s_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: aver_s.h +; CHECK: st.h +; CHECK: .size llvm_mips_aver_s_h_test +; +@llvm_mips_aver_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_aver_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_aver_s_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_aver_s_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_aver_s_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_aver_s_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.aver.s.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_aver_s_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.aver.s.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_aver_s_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: aver_s.w +; CHECK: st.w +; CHECK: .size llvm_mips_aver_s_w_test +; +@llvm_mips_aver_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_aver_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_aver_s_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_aver_s_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_aver_s_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_aver_s_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.aver.s.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_aver_s_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.aver.s.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_aver_s_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: aver_s.d +; CHECK: st.d +; CHECK: .size llvm_mips_aver_s_d_test +; +@llvm_mips_aver_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_aver_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_aver_u_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_aver_u_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_aver_u_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_aver_u_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.aver.u.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_aver_u_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.aver.u.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_aver_u_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: aver_u.b +; CHECK: st.b +; CHECK: .size llvm_mips_aver_u_b_test +; +@llvm_mips_aver_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_aver_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_aver_u_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_aver_u_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_aver_u_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_aver_u_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.aver.u.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_aver_u_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.aver.u.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_aver_u_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: aver_u.h +; CHECK: st.h +; CHECK: .size llvm_mips_aver_u_h_test +; +@llvm_mips_aver_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_aver_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_aver_u_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_aver_u_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_aver_u_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_aver_u_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.aver.u.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_aver_u_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.aver.u.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_aver_u_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: aver_u.w +; CHECK: st.w +; CHECK: .size llvm_mips_aver_u_w_test +; +@llvm_mips_aver_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_aver_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_aver_u_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_aver_u_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_aver_u_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_aver_u_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.aver.u.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_aver_u_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.aver.u.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_aver_u_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: aver_u.d +; CHECK: st.d +; CHECK: .size llvm_mips_aver_u_d_test +; diff --git a/test/CodeGen/Mips/msa/3r-b.ll b/test/CodeGen/Mips/msa/3r-b.ll new file mode 100644 index 0000000..afcc391 --- /dev/null +++ b/test/CodeGen/Mips/msa/3r-b.ll @@ -0,0 +1,442 @@ +; RUN: llc -march=mips -mattr=+msa < %s | FileCheck %s + +@llvm_mips_bclr_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_bclr_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_bclr_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_bclr_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_bclr_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_bclr_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.bclr.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_bclr_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.bclr.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_bclr_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: bclr.b +; CHECK: st.b +; CHECK: .size llvm_mips_bclr_b_test +; +@llvm_mips_bclr_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_bclr_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_bclr_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_bclr_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_bclr_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_bclr_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.bclr.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_bclr_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.bclr.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_bclr_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: bclr.h +; CHECK: st.h +; CHECK: .size llvm_mips_bclr_h_test +; +@llvm_mips_bclr_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_bclr_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_bclr_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_bclr_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_bclr_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_bclr_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.bclr.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_bclr_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.bclr.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_bclr_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: bclr.w +; CHECK: st.w +; CHECK: .size llvm_mips_bclr_w_test +; +@llvm_mips_bclr_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_bclr_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_bclr_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_bclr_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_bclr_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_bclr_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.bclr.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_bclr_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.bclr.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_bclr_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: bclr.d +; CHECK: st.d +; CHECK: .size llvm_mips_bclr_d_test +; +@llvm_mips_binsl_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_binsl_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_binsl_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_binsl_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_binsl_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_binsl_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.binsl.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_binsl_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.binsl.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_binsl_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: binsl.b +; CHECK: st.b +; CHECK: .size llvm_mips_binsl_b_test +; +@llvm_mips_binsl_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_binsl_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_binsl_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_binsl_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_binsl_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_binsl_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.binsl.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_binsl_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.binsl.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_binsl_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: binsl.h +; CHECK: st.h +; CHECK: .size llvm_mips_binsl_h_test +; +@llvm_mips_binsl_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_binsl_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_binsl_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_binsl_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_binsl_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_binsl_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.binsl.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_binsl_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.binsl.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_binsl_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: binsl.w +; CHECK: st.w +; CHECK: .size llvm_mips_binsl_w_test +; +@llvm_mips_binsl_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_binsl_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_binsl_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_binsl_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_binsl_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_binsl_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.binsl.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_binsl_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.binsl.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_binsl_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: binsl.d +; CHECK: st.d +; CHECK: .size llvm_mips_binsl_d_test +; +@llvm_mips_binsr_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_binsr_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_binsr_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_binsr_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_binsr_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_binsr_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.binsr.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_binsr_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.binsr.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_binsr_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: binsr.b +; CHECK: st.b +; CHECK: .size llvm_mips_binsr_b_test +; +@llvm_mips_binsr_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_binsr_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_binsr_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_binsr_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_binsr_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_binsr_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.binsr.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_binsr_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.binsr.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_binsr_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: binsr.h +; CHECK: st.h +; CHECK: .size llvm_mips_binsr_h_test +; +@llvm_mips_binsr_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_binsr_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_binsr_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_binsr_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_binsr_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_binsr_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.binsr.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_binsr_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.binsr.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_binsr_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: binsr.w +; CHECK: st.w +; CHECK: .size llvm_mips_binsr_w_test +; +@llvm_mips_binsr_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_binsr_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_binsr_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_binsr_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_binsr_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_binsr_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.binsr.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_binsr_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.binsr.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_binsr_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: binsr.d +; CHECK: st.d +; CHECK: .size llvm_mips_binsr_d_test +; +@llvm_mips_bneg_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_bneg_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_bneg_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_bneg_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_bneg_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_bneg_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.bneg.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_bneg_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.bneg.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_bneg_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: bneg.b +; CHECK: st.b +; CHECK: .size llvm_mips_bneg_b_test +; +@llvm_mips_bneg_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_bneg_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_bneg_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_bneg_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_bneg_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_bneg_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.bneg.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_bneg_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.bneg.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_bneg_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: bneg.h +; CHECK: st.h +; CHECK: .size llvm_mips_bneg_h_test +; +@llvm_mips_bneg_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_bneg_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_bneg_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_bneg_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_bneg_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_bneg_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.bneg.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_bneg_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.bneg.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_bneg_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: bneg.w +; CHECK: st.w +; CHECK: .size llvm_mips_bneg_w_test +; +@llvm_mips_bneg_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_bneg_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_bneg_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_bneg_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_bneg_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_bneg_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.bneg.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_bneg_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.bneg.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_bneg_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: bneg.d +; CHECK: st.d +; CHECK: .size llvm_mips_bneg_d_test +; +@llvm_mips_bset_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_bset_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_bset_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_bset_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_bset_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_bset_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.bset.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_bset_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.bset.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_bset_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: bset.b +; CHECK: st.b +; CHECK: .size llvm_mips_bset_b_test +; +@llvm_mips_bset_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_bset_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_bset_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_bset_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_bset_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_bset_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.bset.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_bset_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.bset.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_bset_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: bset.h +; CHECK: st.h +; CHECK: .size llvm_mips_bset_h_test +; +@llvm_mips_bset_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_bset_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_bset_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_bset_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_bset_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_bset_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.bset.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_bset_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.bset.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_bset_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: bset.w +; CHECK: st.w +; CHECK: .size llvm_mips_bset_w_test +; +@llvm_mips_bset_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_bset_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_bset_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_bset_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_bset_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_bset_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.bset.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_bset_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.bset.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_bset_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: bset.d +; CHECK: st.d +; CHECK: .size llvm_mips_bset_d_test +; diff --git a/test/CodeGen/Mips/msa/3r-c.ll b/test/CodeGen/Mips/msa/3r-c.ll new file mode 100644 index 0000000..5663694 --- /dev/null +++ b/test/CodeGen/Mips/msa/3r-c.ll @@ -0,0 +1,442 @@ +; RUN: llc -march=mips -mattr=+msa < %s | FileCheck %s + +@llvm_mips_ceq_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_ceq_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_ceq_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_ceq_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_ceq_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_ceq_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.ceq.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_ceq_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.ceq.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_ceq_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: ceq.b +; CHECK: st.b +; CHECK: .size llvm_mips_ceq_b_test +; +@llvm_mips_ceq_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_ceq_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_ceq_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_ceq_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_ceq_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_ceq_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.ceq.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_ceq_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.ceq.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_ceq_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: ceq.h +; CHECK: st.h +; CHECK: .size llvm_mips_ceq_h_test +; +@llvm_mips_ceq_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_ceq_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_ceq_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_ceq_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_ceq_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_ceq_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.ceq.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_ceq_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.ceq.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_ceq_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: ceq.w +; CHECK: st.w +; CHECK: .size llvm_mips_ceq_w_test +; +@llvm_mips_ceq_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_ceq_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_ceq_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_ceq_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_ceq_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_ceq_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.ceq.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_ceq_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.ceq.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_ceq_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: ceq.d +; CHECK: st.d +; CHECK: .size llvm_mips_ceq_d_test +; +@llvm_mips_cle_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_cle_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_cle_s_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_cle_s_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_cle_s_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_cle_s_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.cle.s.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_cle_s_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.cle.s.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_cle_s_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: cle_s.b +; CHECK: st.b +; CHECK: .size llvm_mips_cle_s_b_test +; +@llvm_mips_cle_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_cle_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_cle_s_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_cle_s_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_cle_s_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_cle_s_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.cle.s.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_cle_s_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.cle.s.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_cle_s_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: cle_s.h +; CHECK: st.h +; CHECK: .size llvm_mips_cle_s_h_test +; +@llvm_mips_cle_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_cle_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_cle_s_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_cle_s_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_cle_s_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_cle_s_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.cle.s.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_cle_s_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.cle.s.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_cle_s_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: cle_s.w +; CHECK: st.w +; CHECK: .size llvm_mips_cle_s_w_test +; +@llvm_mips_cle_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_cle_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_cle_s_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_cle_s_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_cle_s_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_cle_s_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.cle.s.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_cle_s_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.cle.s.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_cle_s_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: cle_s.d +; CHECK: st.d +; CHECK: .size llvm_mips_cle_s_d_test +; +@llvm_mips_cle_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_cle_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_cle_u_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_cle_u_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_cle_u_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_cle_u_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.cle.u.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_cle_u_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.cle.u.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_cle_u_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: cle_u.b +; CHECK: st.b +; CHECK: .size llvm_mips_cle_u_b_test +; +@llvm_mips_cle_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_cle_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_cle_u_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_cle_u_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_cle_u_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_cle_u_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.cle.u.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_cle_u_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.cle.u.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_cle_u_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: cle_u.h +; CHECK: st.h +; CHECK: .size llvm_mips_cle_u_h_test +; +@llvm_mips_cle_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_cle_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_cle_u_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_cle_u_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_cle_u_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_cle_u_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.cle.u.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_cle_u_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.cle.u.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_cle_u_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: cle_u.w +; CHECK: st.w +; CHECK: .size llvm_mips_cle_u_w_test +; +@llvm_mips_cle_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_cle_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_cle_u_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_cle_u_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_cle_u_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_cle_u_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.cle.u.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_cle_u_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.cle.u.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_cle_u_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: cle_u.d +; CHECK: st.d +; CHECK: .size llvm_mips_cle_u_d_test +; +@llvm_mips_clt_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_clt_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_clt_s_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_clt_s_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_clt_s_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_clt_s_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.clt.s.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_clt_s_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.clt.s.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_clt_s_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: clt_s.b +; CHECK: st.b +; CHECK: .size llvm_mips_clt_s_b_test +; +@llvm_mips_clt_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_clt_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_clt_s_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_clt_s_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_clt_s_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_clt_s_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.clt.s.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_clt_s_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.clt.s.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_clt_s_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: clt_s.h +; CHECK: st.h +; CHECK: .size llvm_mips_clt_s_h_test +; +@llvm_mips_clt_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_clt_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_clt_s_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_clt_s_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_clt_s_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_clt_s_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.clt.s.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_clt_s_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.clt.s.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_clt_s_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: clt_s.w +; CHECK: st.w +; CHECK: .size llvm_mips_clt_s_w_test +; +@llvm_mips_clt_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_clt_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_clt_s_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_clt_s_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_clt_s_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_clt_s_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.clt.s.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_clt_s_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.clt.s.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_clt_s_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: clt_s.d +; CHECK: st.d +; CHECK: .size llvm_mips_clt_s_d_test +; +@llvm_mips_clt_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_clt_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_clt_u_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_clt_u_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_clt_u_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_clt_u_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.clt.u.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_clt_u_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.clt.u.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_clt_u_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: clt_u.b +; CHECK: st.b +; CHECK: .size llvm_mips_clt_u_b_test +; +@llvm_mips_clt_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_clt_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_clt_u_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_clt_u_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_clt_u_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_clt_u_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.clt.u.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_clt_u_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.clt.u.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_clt_u_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: clt_u.h +; CHECK: st.h +; CHECK: .size llvm_mips_clt_u_h_test +; +@llvm_mips_clt_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_clt_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_clt_u_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_clt_u_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_clt_u_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_clt_u_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.clt.u.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_clt_u_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.clt.u.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_clt_u_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: clt_u.w +; CHECK: st.w +; CHECK: .size llvm_mips_clt_u_w_test +; +@llvm_mips_clt_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_clt_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_clt_u_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_clt_u_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_clt_u_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_clt_u_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.clt.u.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_clt_u_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.clt.u.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_clt_u_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: clt_u.d +; CHECK: st.d +; CHECK: .size llvm_mips_clt_u_d_test +; diff --git a/test/CodeGen/Mips/msa/3r-d.ll b/test/CodeGen/Mips/msa/3r-d.ll new file mode 100644 index 0000000..3b20743 --- /dev/null +++ b/test/CodeGen/Mips/msa/3r-d.ll @@ -0,0 +1,354 @@ +; RUN: llc -march=mips -mattr=+msa < %s | FileCheck %s + +@llvm_mips_div_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_div_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_div_s_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_div_s_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_div_s_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_div_s_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.div.s.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_div_s_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.div.s.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_div_s_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: div_s.b +; CHECK: st.b +; CHECK: .size llvm_mips_div_s_b_test +; +@llvm_mips_div_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_div_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_div_s_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_div_s_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_div_s_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_div_s_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.div.s.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_div_s_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.div.s.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_div_s_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: div_s.h +; CHECK: st.h +; CHECK: .size llvm_mips_div_s_h_test +; +@llvm_mips_div_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_div_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_div_s_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_div_s_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_div_s_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_div_s_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.div.s.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_div_s_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.div.s.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_div_s_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: div_s.w +; CHECK: st.w +; CHECK: .size llvm_mips_div_s_w_test +; +@llvm_mips_div_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_div_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_div_s_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_div_s_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_div_s_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_div_s_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.div.s.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_div_s_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.div.s.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_div_s_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: div_s.d +; CHECK: st.d +; CHECK: .size llvm_mips_div_s_d_test +; +@llvm_mips_div_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_div_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_div_u_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_div_u_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_div_u_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_div_u_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.div.u.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_div_u_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.div.u.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_div_u_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: div_u.b +; CHECK: st.b +; CHECK: .size llvm_mips_div_u_b_test +; +@llvm_mips_div_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_div_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_div_u_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_div_u_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_div_u_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_div_u_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.div.u.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_div_u_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.div.u.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_div_u_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: div_u.h +; CHECK: st.h +; CHECK: .size llvm_mips_div_u_h_test +; +@llvm_mips_div_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_div_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_div_u_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_div_u_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_div_u_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_div_u_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.div.u.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_div_u_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.div.u.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_div_u_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: div_u.w +; CHECK: st.w +; CHECK: .size llvm_mips_div_u_w_test +; +@llvm_mips_div_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_div_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_div_u_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_div_u_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_div_u_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_div_u_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.div.u.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_div_u_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.div.u.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_div_u_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: div_u.d +; CHECK: st.d +; CHECK: .size llvm_mips_div_u_d_test +; +@llvm_mips_dotp_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_dotp_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_dotp_s_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_dotp_s_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_dotp_s_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_dotp_s_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.dotp.s.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_dotp_s_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.dotp.s.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_dotp_s_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: dotp_s.b +; CHECK: st.b +; CHECK: .size llvm_mips_dotp_s_b_test +; +@llvm_mips_dotp_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_dotp_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_dotp_s_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_dotp_s_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_dotp_s_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_dotp_s_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.dotp.s.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_dotp_s_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.dotp.s.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_dotp_s_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: dotp_s.h +; CHECK: st.h +; CHECK: .size llvm_mips_dotp_s_h_test +; +@llvm_mips_dotp_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_dotp_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_dotp_s_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_dotp_s_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_dotp_s_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_dotp_s_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.dotp.s.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_dotp_s_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.dotp.s.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_dotp_s_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: dotp_s.w +; CHECK: st.w +; CHECK: .size llvm_mips_dotp_s_w_test +; +@llvm_mips_dotp_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_dotp_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_dotp_s_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_dotp_s_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_dotp_s_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_dotp_s_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.dotp.s.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_dotp_s_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.dotp.s.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_dotp_s_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: dotp_s.d +; CHECK: st.d +; CHECK: .size llvm_mips_dotp_s_d_test +; +@llvm_mips_dotp_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_dotp_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_dotp_u_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_dotp_u_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_dotp_u_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_dotp_u_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.dotp.u.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_dotp_u_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.dotp.u.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_dotp_u_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: dotp_u.b +; CHECK: st.b +; CHECK: .size llvm_mips_dotp_u_b_test +; +@llvm_mips_dotp_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_dotp_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_dotp_u_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_dotp_u_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_dotp_u_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_dotp_u_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.dotp.u.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_dotp_u_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.dotp.u.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_dotp_u_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: dotp_u.h +; CHECK: st.h +; CHECK: .size llvm_mips_dotp_u_h_test +; +@llvm_mips_dotp_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_dotp_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_dotp_u_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_dotp_u_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_dotp_u_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_dotp_u_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.dotp.u.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_dotp_u_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.dotp.u.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_dotp_u_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: dotp_u.w +; CHECK: st.w +; CHECK: .size llvm_mips_dotp_u_w_test +; +@llvm_mips_dotp_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_dotp_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_dotp_u_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_dotp_u_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_dotp_u_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_dotp_u_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.dotp.u.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_dotp_u_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.dotp.u.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_dotp_u_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: dotp_u.d +; CHECK: st.d +; CHECK: .size llvm_mips_dotp_u_d_test +; diff --git a/test/CodeGen/Mips/msa/3r-i.ll b/test/CodeGen/Mips/msa/3r-i.ll new file mode 100644 index 0000000..b5c6a30 --- /dev/null +++ b/test/CodeGen/Mips/msa/3r-i.ll @@ -0,0 +1,354 @@ +; RUN: llc -march=mips -mattr=+msa < %s | FileCheck %s + +@llvm_mips_ilvev_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_ilvev_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_ilvev_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_ilvev_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_ilvev_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_ilvev_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.ilvev.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_ilvev_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.ilvev.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_ilvev_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: ilvev.b +; CHECK: st.b +; CHECK: .size llvm_mips_ilvev_b_test +; +@llvm_mips_ilvev_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_ilvev_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_ilvev_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_ilvev_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_ilvev_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_ilvev_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.ilvev.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_ilvev_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.ilvev.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_ilvev_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: ilvev.h +; CHECK: st.h +; CHECK: .size llvm_mips_ilvev_h_test +; +@llvm_mips_ilvev_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_ilvev_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_ilvev_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_ilvev_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_ilvev_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_ilvev_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.ilvev.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_ilvev_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.ilvev.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_ilvev_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: ilvev.w +; CHECK: st.w +; CHECK: .size llvm_mips_ilvev_w_test +; +@llvm_mips_ilvev_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_ilvev_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_ilvev_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_ilvev_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_ilvev_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_ilvev_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.ilvev.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_ilvev_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.ilvev.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_ilvev_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: ilvev.d +; CHECK: st.d +; CHECK: .size llvm_mips_ilvev_d_test +; +@llvm_mips_ilvl_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_ilvl_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_ilvl_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_ilvl_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_ilvl_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_ilvl_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.ilvl.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_ilvl_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.ilvl.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_ilvl_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: ilvl.b +; CHECK: st.b +; CHECK: .size llvm_mips_ilvl_b_test +; +@llvm_mips_ilvl_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_ilvl_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_ilvl_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_ilvl_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_ilvl_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_ilvl_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.ilvl.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_ilvl_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.ilvl.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_ilvl_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: ilvl.h +; CHECK: st.h +; CHECK: .size llvm_mips_ilvl_h_test +; +@llvm_mips_ilvl_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_ilvl_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_ilvl_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_ilvl_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_ilvl_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_ilvl_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.ilvl.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_ilvl_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.ilvl.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_ilvl_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: ilvl.w +; CHECK: st.w +; CHECK: .size llvm_mips_ilvl_w_test +; +@llvm_mips_ilvl_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_ilvl_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_ilvl_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_ilvl_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_ilvl_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_ilvl_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.ilvl.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_ilvl_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.ilvl.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_ilvl_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: ilvl.d +; CHECK: st.d +; CHECK: .size llvm_mips_ilvl_d_test +; +@llvm_mips_ilvod_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_ilvod_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_ilvod_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_ilvod_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_ilvod_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_ilvod_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.ilvod.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_ilvod_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.ilvod.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_ilvod_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: ilvod.b +; CHECK: st.b +; CHECK: .size llvm_mips_ilvod_b_test +; +@llvm_mips_ilvod_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_ilvod_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_ilvod_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_ilvod_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_ilvod_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_ilvod_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.ilvod.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_ilvod_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.ilvod.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_ilvod_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: ilvod.h +; CHECK: st.h +; CHECK: .size llvm_mips_ilvod_h_test +; +@llvm_mips_ilvod_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_ilvod_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_ilvod_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_ilvod_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_ilvod_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_ilvod_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.ilvod.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_ilvod_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.ilvod.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_ilvod_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: ilvod.w +; CHECK: st.w +; CHECK: .size llvm_mips_ilvod_w_test +; +@llvm_mips_ilvod_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_ilvod_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_ilvod_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_ilvod_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_ilvod_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_ilvod_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.ilvod.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_ilvod_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.ilvod.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_ilvod_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: ilvod.d +; CHECK: st.d +; CHECK: .size llvm_mips_ilvod_d_test +; +@llvm_mips_ilvr_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_ilvr_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 +@llvm_mips_ilvr_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_ilvr_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_ilvr_b_ARG1 + %1 = load <16 x i8>* @llvm_mips_ilvr_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.ilvr.b(<16 x i8> %0, <16 x i8> %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_ilvr_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.ilvr.b(<16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_ilvr_b_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: ilvr.b +; CHECK: st.b +; CHECK: .size llvm_mips_ilvr_b_test +; +@llvm_mips_ilvr_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_ilvr_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16 +@llvm_mips_ilvr_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_ilvr_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_ilvr_h_ARG1 + %1 = load <8 x i16>* @llvm_mips_ilvr_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.ilvr.h(<8 x i16> %0, <8 x i16> %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_ilvr_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.ilvr.h(<8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_ilvr_h_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: ilvr.h +; CHECK: st.h +; CHECK: .size llvm_mips_ilvr_h_test +; +@llvm_mips_ilvr_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_ilvr_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16 +@llvm_mips_ilvr_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_ilvr_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_ilvr_w_ARG1 + %1 = load <4 x i32>* @llvm_mips_ilvr_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.ilvr.w(<4 x i32> %0, <4 x i32> %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_ilvr_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.ilvr.w(<4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_ilvr_w_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: ilvr.w +; CHECK: st.w +; CHECK: .size llvm_mips_ilvr_w_test +; +@llvm_mips_ilvr_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_ilvr_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16 +@llvm_mips_ilvr_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_ilvr_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_ilvr_d_ARG1 + %1 = load <2 x i64>* @llvm_mips_ilvr_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.ilvr.d(<2 x i64> %0, <2 x i64> %1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_ilvr_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.ilvr.d(<2 x i64>, <2 x i64>) nounwind + +; CHECK: llvm_mips_ilvr_d_test: +; CHECK: ld.d +; CHECK: ld.d +; CHECK: ilvr.d +; CHECK: st.d +; CHECK: .size llvm_mips_ilvr_d_test +; diff --git a/test/CodeGen/Mips/msa/3r_4r_widen.ll b/test/CodeGen/Mips/msa/3r_4r_widen.ll new file mode 100644 index 0000000..891d11a --- /dev/null +++ b/test/CodeGen/Mips/msa/3r_4r_widen.ll @@ -0,0 +1,302 @@ +; RUN: llc -march=mips -mattr=+msa < %s | FileCheck %s + +@llvm_mips_dpadd_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_dpadd_s_h_ARG2 = global <16 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23>, align 16 +@llvm_mips_dpadd_s_h_ARG3 = global <16 x i8> <i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39>, align 16 +@llvm_mips_dpadd_s_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_dpadd_s_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_dpadd_s_h_ARG1 + %1 = load <16 x i8>* @llvm_mips_dpadd_s_h_ARG2 + %2 = load <16 x i8>* @llvm_mips_dpadd_s_h_ARG3 + %3 = tail call <8 x i16> @llvm.mips.dpadd.s.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2) + store <8 x i16> %3, <8 x i16>* @llvm_mips_dpadd_s_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.dpadd.s.h(<8 x i16>, <16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_dpadd_s_h_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: ld.h +; CHECK: dpadd_s.h +; CHECK: st.h +; CHECK: .size llvm_mips_dpadd_s_h_test +; +@llvm_mips_dpadd_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_dpadd_s_w_ARG2 = global <8 x i16> <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>, align 16 +@llvm_mips_dpadd_s_w_ARG3 = global <8 x i16> <i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>, align 16 +@llvm_mips_dpadd_s_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_dpadd_s_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_dpadd_s_w_ARG1 + %1 = load <8 x i16>* @llvm_mips_dpadd_s_w_ARG2 + %2 = load <8 x i16>* @llvm_mips_dpadd_s_w_ARG3 + %3 = tail call <4 x i32> @llvm.mips.dpadd.s.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2) + store <4 x i32> %3, <4 x i32>* @llvm_mips_dpadd_s_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.dpadd.s.w(<4 x i32>, <8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_dpadd_s_w_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: ld.w +; CHECK: dpadd_s.w +; CHECK: st.w +; CHECK: .size llvm_mips_dpadd_s_w_test +; +@llvm_mips_dpadd_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_dpadd_s_d_ARG2 = global <4 x i32> <i32 2, i32 3, i32 4, i32 5>, align 16 +@llvm_mips_dpadd_s_d_ARG3 = global <4 x i32> <i32 6, i32 7, i32 8, i32 9>, align 16 +@llvm_mips_dpadd_s_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_dpadd_s_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_dpadd_s_d_ARG1 + %1 = load <4 x i32>* @llvm_mips_dpadd_s_d_ARG2 + %2 = load <4 x i32>* @llvm_mips_dpadd_s_d_ARG3 + %3 = tail call <2 x i64> @llvm.mips.dpadd.s.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2) + store <2 x i64> %3, <2 x i64>* @llvm_mips_dpadd_s_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.dpadd.s.d(<2 x i64>, <4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_dpadd_s_d_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: ld.d +; CHECK: dpadd_s.d +; CHECK: st.d +; CHECK: .size llvm_mips_dpadd_s_d_test +; +@llvm_mips_dpadd_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_dpadd_u_h_ARG2 = global <16 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23>, align 16 +@llvm_mips_dpadd_u_h_ARG3 = global <16 x i8> <i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39>, align 16 +@llvm_mips_dpadd_u_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_dpadd_u_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_dpadd_u_h_ARG1 + %1 = load <16 x i8>* @llvm_mips_dpadd_u_h_ARG2 + %2 = load <16 x i8>* @llvm_mips_dpadd_u_h_ARG3 + %3 = tail call <8 x i16> @llvm.mips.dpadd.u.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2) + store <8 x i16> %3, <8 x i16>* @llvm_mips_dpadd_u_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.dpadd.u.h(<8 x i16>, <16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_dpadd_u_h_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: ld.h +; CHECK: dpadd_u.h +; CHECK: st.h +; CHECK: .size llvm_mips_dpadd_u_h_test +; +@llvm_mips_dpadd_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_dpadd_u_w_ARG2 = global <8 x i16> <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>, align 16 +@llvm_mips_dpadd_u_w_ARG3 = global <8 x i16> <i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>, align 16 +@llvm_mips_dpadd_u_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_dpadd_u_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_dpadd_u_w_ARG1 + %1 = load <8 x i16>* @llvm_mips_dpadd_u_w_ARG2 + %2 = load <8 x i16>* @llvm_mips_dpadd_u_w_ARG3 + %3 = tail call <4 x i32> @llvm.mips.dpadd.u.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2) + store <4 x i32> %3, <4 x i32>* @llvm_mips_dpadd_u_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.dpadd.u.w(<4 x i32>, <8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_dpadd_u_w_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: ld.w +; CHECK: dpadd_u.w +; CHECK: st.w +; CHECK: .size llvm_mips_dpadd_u_w_test +; +@llvm_mips_dpadd_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_dpadd_u_d_ARG2 = global <4 x i32> <i32 2, i32 3, i32 4, i32 5>, align 16 +@llvm_mips_dpadd_u_d_ARG3 = global <4 x i32> <i32 6, i32 7, i32 8, i32 9>, align 16 +@llvm_mips_dpadd_u_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_dpadd_u_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_dpadd_u_d_ARG1 + %1 = load <4 x i32>* @llvm_mips_dpadd_u_d_ARG2 + %2 = load <4 x i32>* @llvm_mips_dpadd_u_d_ARG3 + %3 = tail call <2 x i64> @llvm.mips.dpadd.u.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2) + store <2 x i64> %3, <2 x i64>* @llvm_mips_dpadd_u_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.dpadd.u.d(<2 x i64>, <4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_dpadd_u_d_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: ld.d +; CHECK: dpadd_u.d +; CHECK: st.d +; CHECK: .size llvm_mips_dpadd_u_d_test +; +@llvm_mips_dpsub_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_dpsub_s_h_ARG2 = global <16 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23>, align 16 +@llvm_mips_dpsub_s_h_ARG3 = global <16 x i8> <i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39>, align 16 +@llvm_mips_dpsub_s_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_dpsub_s_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_dpsub_s_h_ARG1 + %1 = load <16 x i8>* @llvm_mips_dpsub_s_h_ARG2 + %2 = load <16 x i8>* @llvm_mips_dpsub_s_h_ARG3 + %3 = tail call <8 x i16> @llvm.mips.dpsub.s.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2) + store <8 x i16> %3, <8 x i16>* @llvm_mips_dpsub_s_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.dpsub.s.h(<8 x i16>, <16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_dpsub_s_h_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: ld.h +; CHECK: dpsub_s.h +; CHECK: st.h +; CHECK: .size llvm_mips_dpsub_s_h_test +; +@llvm_mips_dpsub_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_dpsub_s_w_ARG2 = global <8 x i16> <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>, align 16 +@llvm_mips_dpsub_s_w_ARG3 = global <8 x i16> <i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>, align 16 +@llvm_mips_dpsub_s_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_dpsub_s_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_dpsub_s_w_ARG1 + %1 = load <8 x i16>* @llvm_mips_dpsub_s_w_ARG2 + %2 = load <8 x i16>* @llvm_mips_dpsub_s_w_ARG3 + %3 = tail call <4 x i32> @llvm.mips.dpsub.s.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2) + store <4 x i32> %3, <4 x i32>* @llvm_mips_dpsub_s_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.dpsub.s.w(<4 x i32>, <8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_dpsub_s_w_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: ld.w +; CHECK: dpsub_s.w +; CHECK: st.w +; CHECK: .size llvm_mips_dpsub_s_w_test +; +@llvm_mips_dpsub_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_dpsub_s_d_ARG2 = global <4 x i32> <i32 2, i32 3, i32 4, i32 5>, align 16 +@llvm_mips_dpsub_s_d_ARG3 = global <4 x i32> <i32 6, i32 7, i32 8, i32 9>, align 16 +@llvm_mips_dpsub_s_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_dpsub_s_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_dpsub_s_d_ARG1 + %1 = load <4 x i32>* @llvm_mips_dpsub_s_d_ARG2 + %2 = load <4 x i32>* @llvm_mips_dpsub_s_d_ARG3 + %3 = tail call <2 x i64> @llvm.mips.dpsub.s.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2) + store <2 x i64> %3, <2 x i64>* @llvm_mips_dpsub_s_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.dpsub.s.d(<2 x i64>, <4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_dpsub_s_d_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: ld.d +; CHECK: dpsub_s.d +; CHECK: st.d +; CHECK: .size llvm_mips_dpsub_s_d_test +; +@llvm_mips_dpsub_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_dpsub_u_h_ARG2 = global <16 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23>, align 16 +@llvm_mips_dpsub_u_h_ARG3 = global <16 x i8> <i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39>, align 16 +@llvm_mips_dpsub_u_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_dpsub_u_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_dpsub_u_h_ARG1 + %1 = load <16 x i8>* @llvm_mips_dpsub_u_h_ARG2 + %2 = load <16 x i8>* @llvm_mips_dpsub_u_h_ARG3 + %3 = tail call <8 x i16> @llvm.mips.dpsub.u.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2) + store <8 x i16> %3, <8 x i16>* @llvm_mips_dpsub_u_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.dpsub.u.h(<8 x i16>, <16 x i8>, <16 x i8>) nounwind + +; CHECK: llvm_mips_dpsub_u_h_test: +; CHECK: ld.b +; CHECK: ld.b +; CHECK: ld.h +; CHECK: dpsub_u.h +; CHECK: st.h +; CHECK: .size llvm_mips_dpsub_u_h_test +; +@llvm_mips_dpsub_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_dpsub_u_w_ARG2 = global <8 x i16> <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>, align 16 +@llvm_mips_dpsub_u_w_ARG3 = global <8 x i16> <i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>, align 16 +@llvm_mips_dpsub_u_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_dpsub_u_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_dpsub_u_w_ARG1 + %1 = load <8 x i16>* @llvm_mips_dpsub_u_w_ARG2 + %2 = load <8 x i16>* @llvm_mips_dpsub_u_w_ARG3 + %3 = tail call <4 x i32> @llvm.mips.dpsub.u.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2) + store <4 x i32> %3, <4 x i32>* @llvm_mips_dpsub_u_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.dpsub.u.w(<4 x i32>, <8 x i16>, <8 x i16>) nounwind + +; CHECK: llvm_mips_dpsub_u_w_test: +; CHECK: ld.h +; CHECK: ld.h +; CHECK: ld.w +; CHECK: dpsub_u.w +; CHECK: st.w +; CHECK: .size llvm_mips_dpsub_u_w_test +; +@llvm_mips_dpsub_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_dpsub_u_d_ARG2 = global <4 x i32> <i32 2, i32 3, i32 4, i32 5>, align 16 +@llvm_mips_dpsub_u_d_ARG3 = global <4 x i32> <i32 6, i32 7, i32 8, i32 9>, align 16 +@llvm_mips_dpsub_u_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_dpsub_u_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_dpsub_u_d_ARG1 + %1 = load <4 x i32>* @llvm_mips_dpsub_u_d_ARG2 + %2 = load <4 x i32>* @llvm_mips_dpsub_u_d_ARG3 + %3 = tail call <2 x i64> @llvm.mips.dpsub.u.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2) + store <2 x i64> %3, <2 x i64>* @llvm_mips_dpsub_u_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.dpsub.u.d(<2 x i64>, <4 x i32>, <4 x i32>) nounwind + +; CHECK: llvm_mips_dpsub_u_d_test: +; CHECK: ld.w +; CHECK: ld.w +; CHECK: ld.d +; CHECK: dpsub_u.d +; CHECK: st.d +; CHECK: .size llvm_mips_dpsub_u_d_test +; diff --git a/test/CodeGen/Mips/msa/elm_copy.ll b/test/CodeGen/Mips/msa/elm_copy.ll new file mode 100644 index 0000000..f31bb50 --- /dev/null +++ b/test/CodeGen/Mips/msa/elm_copy.ll @@ -0,0 +1,116 @@ +; RUN: llc -march=mips -mattr=+msa < %s | FileCheck %s + +@llvm_mips_copy_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_copy_s_b_RES = global i32 0, align 16 + +define void @llvm_mips_copy_s_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_copy_s_b_ARG1 + %1 = tail call i32 @llvm.mips.copy.s.b(<16 x i8> %0, i32 1) + store i32 %1, i32* @llvm_mips_copy_s_b_RES + ret void +} + +declare i32 @llvm.mips.copy.s.b(<16 x i8>, i32) nounwind + +; CHECK: llvm_mips_copy_s_b_test: +; CHECK: ld.b +; CHECK: copy_s.b +; CHECK: sw +; CHECK: .size llvm_mips_copy_s_b_test +; +@llvm_mips_copy_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_copy_s_h_RES = global i32 0, align 16 + +define void @llvm_mips_copy_s_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_copy_s_h_ARG1 + %1 = tail call i32 @llvm.mips.copy.s.h(<8 x i16> %0, i32 1) + store i32 %1, i32* @llvm_mips_copy_s_h_RES + ret void +} + +declare i32 @llvm.mips.copy.s.h(<8 x i16>, i32) nounwind + +; CHECK: llvm_mips_copy_s_h_test: +; CHECK: ld.h +; CHECK: copy_s.h +; CHECK: sw +; CHECK: .size llvm_mips_copy_s_h_test +; +@llvm_mips_copy_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_copy_s_w_RES = global i32 0, align 16 + +define void @llvm_mips_copy_s_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_copy_s_w_ARG1 + %1 = tail call i32 @llvm.mips.copy.s.w(<4 x i32> %0, i32 1) + store i32 %1, i32* @llvm_mips_copy_s_w_RES + ret void +} + +declare i32 @llvm.mips.copy.s.w(<4 x i32>, i32) nounwind + +; CHECK: llvm_mips_copy_s_w_test: +; CHECK: ld.w +; CHECK: copy_s.w +; CHECK: sw +; CHECK: .size llvm_mips_copy_s_w_test +; +@llvm_mips_copy_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_copy_u_b_RES = global i32 0, align 16 + +define void @llvm_mips_copy_u_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_copy_u_b_ARG1 + %1 = tail call i32 @llvm.mips.copy.u.b(<16 x i8> %0, i32 1) + store i32 %1, i32* @llvm_mips_copy_u_b_RES + ret void +} + +declare i32 @llvm.mips.copy.u.b(<16 x i8>, i32) nounwind + +; CHECK: llvm_mips_copy_u_b_test: +; CHECK: ld.b +; CHECK: copy_u.b +; CHECK: sw +; CHECK: .size llvm_mips_copy_u_b_test +; +@llvm_mips_copy_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_copy_u_h_RES = global i32 0, align 16 + +define void @llvm_mips_copy_u_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_copy_u_h_ARG1 + %1 = tail call i32 @llvm.mips.copy.u.h(<8 x i16> %0, i32 1) + store i32 %1, i32* @llvm_mips_copy_u_h_RES + ret void +} + +declare i32 @llvm.mips.copy.u.h(<8 x i16>, i32) nounwind + +; CHECK: llvm_mips_copy_u_h_test: +; CHECK: ld.h +; CHECK: copy_u.h +; CHECK: sw +; CHECK: .size llvm_mips_copy_u_h_test +; +@llvm_mips_copy_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_copy_u_w_RES = global i32 0, align 16 + +define void @llvm_mips_copy_u_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_copy_u_w_ARG1 + %1 = tail call i32 @llvm.mips.copy.u.w(<4 x i32> %0, i32 1) + store i32 %1, i32* @llvm_mips_copy_u_w_RES + ret void +} + +declare i32 @llvm.mips.copy.u.w(<4 x i32>, i32) nounwind + +; CHECK: llvm_mips_copy_u_w_test: +; CHECK: ld.w +; CHECK: copy_u.w +; CHECK: sw +; CHECK: .size llvm_mips_copy_u_w_test +; diff --git a/test/CodeGen/Mips/msa/elm_insv.ll b/test/CodeGen/Mips/msa/elm_insv.ll new file mode 100644 index 0000000..c5378eb --- /dev/null +++ b/test/CodeGen/Mips/msa/elm_insv.ll @@ -0,0 +1,68 @@ +; RUN: llc -march=mips -mattr=+msa < %s | FileCheck %s + +@llvm_mips_insert_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_insert_b_ARG3 = global i32 27, align 16 +@llvm_mips_insert_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_insert_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_insert_b_ARG1 + %1 = load i32* @llvm_mips_insert_b_ARG3 + %2 = tail call <16 x i8> @llvm.mips.insert.b(<16 x i8> %0, i32 1, i32 %1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_insert_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.insert.b(<16 x i8>, i32, i32) nounwind + +; CHECK: llvm_mips_insert_b_test: +; CHECK: lw +; CHECK: ld.b +; CHECK: insert.b +; CHECK: st.b +; CHECK: .size llvm_mips_insert_b_test +; +@llvm_mips_insert_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_insert_h_ARG3 = global i32 27, align 16 +@llvm_mips_insert_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_insert_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_insert_h_ARG1 + %1 = load i32* @llvm_mips_insert_h_ARG3 + %2 = tail call <8 x i16> @llvm.mips.insert.h(<8 x i16> %0, i32 1, i32 %1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_insert_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.insert.h(<8 x i16>, i32, i32) nounwind + +; CHECK: llvm_mips_insert_h_test: +; CHECK: lw +; CHECK: ld.h +; CHECK: insert.h +; CHECK: st.h +; CHECK: .size llvm_mips_insert_h_test +; +@llvm_mips_insert_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_insert_w_ARG3 = global i32 27, align 16 +@llvm_mips_insert_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_insert_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_insert_w_ARG1 + %1 = load i32* @llvm_mips_insert_w_ARG3 + %2 = tail call <4 x i32> @llvm.mips.insert.w(<4 x i32> %0, i32 1, i32 %1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_insert_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.insert.w(<4 x i32>, i32, i32) nounwind + +; CHECK: llvm_mips_insert_w_test: +; CHECK: lw +; CHECK: ld.w +; CHECK: insert.w +; CHECK: st.w +; CHECK: .size llvm_mips_insert_w_test +; diff --git a/test/CodeGen/Mips/msa/i5-a.ll b/test/CodeGen/Mips/msa/i5-a.ll new file mode 100644 index 0000000..fe93534 --- /dev/null +++ b/test/CodeGen/Mips/msa/i5-a.ll @@ -0,0 +1,78 @@ +; RUN: llc -march=mips -mattr=+msa < %s | FileCheck %s + +@llvm_mips_addvi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_addvi_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_addvi_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_addvi_b_ARG1 + %1 = tail call <16 x i8> @llvm.mips.addvi.b(<16 x i8> %0, i32 14) + store <16 x i8> %1, <16 x i8>* @llvm_mips_addvi_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.addvi.b(<16 x i8>, i32) nounwind + +; CHECK: llvm_mips_addvi_b_test: +; CHECK: ld.b +; CHECK: addvi.b +; CHECK: st.b +; CHECK: .size llvm_mips_addvi_b_test +; +@llvm_mips_addvi_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_addvi_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_addvi_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_addvi_h_ARG1 + %1 = tail call <8 x i16> @llvm.mips.addvi.h(<8 x i16> %0, i32 14) + store <8 x i16> %1, <8 x i16>* @llvm_mips_addvi_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.addvi.h(<8 x i16>, i32) nounwind + +; CHECK: llvm_mips_addvi_h_test: +; CHECK: ld.h +; CHECK: addvi.h +; CHECK: st.h +; CHECK: .size llvm_mips_addvi_h_test +; +@llvm_mips_addvi_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_addvi_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_addvi_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_addvi_w_ARG1 + %1 = tail call <4 x i32> @llvm.mips.addvi.w(<4 x i32> %0, i32 14) + store <4 x i32> %1, <4 x i32>* @llvm_mips_addvi_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.addvi.w(<4 x i32>, i32) nounwind + +; CHECK: llvm_mips_addvi_w_test: +; CHECK: ld.w +; CHECK: addvi.w +; CHECK: st.w +; CHECK: .size llvm_mips_addvi_w_test +; +@llvm_mips_addvi_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_addvi_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_addvi_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_addvi_d_ARG1 + %1 = tail call <2 x i64> @llvm.mips.addvi.d(<2 x i64> %0, i32 14) + store <2 x i64> %1, <2 x i64>* @llvm_mips_addvi_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.addvi.d(<2 x i64>, i32) nounwind + +; CHECK: llvm_mips_addvi_d_test: +; CHECK: ld.d +; CHECK: addvi.d +; CHECK: st.d +; CHECK: .size llvm_mips_addvi_d_test +; diff --git a/test/CodeGen/Mips/msa/i5-b.ll b/test/CodeGen/Mips/msa/i5-b.ll new file mode 100644 index 0000000..87d4527 --- /dev/null +++ b/test/CodeGen/Mips/msa/i5-b.ll @@ -0,0 +1,382 @@ +; RUN: llc -march=mips -mattr=+msa < %s | FileCheck %s + +@llvm_mips_bclri_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_bclri_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_bclri_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_bclri_b_ARG1 + %1 = tail call <16 x i8> @llvm.mips.bclri.b(<16 x i8> %0, i32 7) + store <16 x i8> %1, <16 x i8>* @llvm_mips_bclri_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.bclri.b(<16 x i8>, i32) nounwind + +; CHECK: llvm_mips_bclri_b_test: +; CHECK: ld.b +; CHECK: bclri.b +; CHECK: st.b +; CHECK: .size llvm_mips_bclri_b_test +; +@llvm_mips_bclri_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_bclri_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_bclri_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_bclri_h_ARG1 + %1 = tail call <8 x i16> @llvm.mips.bclri.h(<8 x i16> %0, i32 7) + store <8 x i16> %1, <8 x i16>* @llvm_mips_bclri_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.bclri.h(<8 x i16>, i32) nounwind + +; CHECK: llvm_mips_bclri_h_test: +; CHECK: ld.h +; CHECK: bclri.h +; CHECK: st.h +; CHECK: .size llvm_mips_bclri_h_test +; +@llvm_mips_bclri_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_bclri_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_bclri_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_bclri_w_ARG1 + %1 = tail call <4 x i32> @llvm.mips.bclri.w(<4 x i32> %0, i32 7) + store <4 x i32> %1, <4 x i32>* @llvm_mips_bclri_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.bclri.w(<4 x i32>, i32) nounwind + +; CHECK: llvm_mips_bclri_w_test: +; CHECK: ld.w +; CHECK: bclri.w +; CHECK: st.w +; CHECK: .size llvm_mips_bclri_w_test +; +@llvm_mips_bclri_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_bclri_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_bclri_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_bclri_d_ARG1 + %1 = tail call <2 x i64> @llvm.mips.bclri.d(<2 x i64> %0, i32 7) + store <2 x i64> %1, <2 x i64>* @llvm_mips_bclri_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.bclri.d(<2 x i64>, i32) nounwind + +; CHECK: llvm_mips_bclri_d_test: +; CHECK: ld.d +; CHECK: bclri.d +; CHECK: st.d +; CHECK: .size llvm_mips_bclri_d_test +; +@llvm_mips_binsli_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_binsli_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_binsli_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_binsli_b_ARG1 + %1 = tail call <16 x i8> @llvm.mips.binsli.b(<16 x i8> %0, i32 7) + store <16 x i8> %1, <16 x i8>* @llvm_mips_binsli_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.binsli.b(<16 x i8>, i32) nounwind + +; CHECK: llvm_mips_binsli_b_test: +; CHECK: ld.b +; CHECK: binsli.b +; CHECK: st.b +; CHECK: .size llvm_mips_binsli_b_test +; +@llvm_mips_binsli_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_binsli_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_binsli_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_binsli_h_ARG1 + %1 = tail call <8 x i16> @llvm.mips.binsli.h(<8 x i16> %0, i32 7) + store <8 x i16> %1, <8 x i16>* @llvm_mips_binsli_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.binsli.h(<8 x i16>, i32) nounwind + +; CHECK: llvm_mips_binsli_h_test: +; CHECK: ld.h +; CHECK: binsli.h +; CHECK: st.h +; CHECK: .size llvm_mips_binsli_h_test +; +@llvm_mips_binsli_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_binsli_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_binsli_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_binsli_w_ARG1 + %1 = tail call <4 x i32> @llvm.mips.binsli.w(<4 x i32> %0, i32 7) + store <4 x i32> %1, <4 x i32>* @llvm_mips_binsli_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.binsli.w(<4 x i32>, i32) nounwind + +; CHECK: llvm_mips_binsli_w_test: +; CHECK: ld.w +; CHECK: binsli.w +; CHECK: st.w +; CHECK: .size llvm_mips_binsli_w_test +; +@llvm_mips_binsli_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_binsli_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_binsli_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_binsli_d_ARG1 + %1 = tail call <2 x i64> @llvm.mips.binsli.d(<2 x i64> %0, i32 7) + store <2 x i64> %1, <2 x i64>* @llvm_mips_binsli_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.binsli.d(<2 x i64>, i32) nounwind + +; CHECK: llvm_mips_binsli_d_test: +; CHECK: ld.d +; CHECK: binsli.d +; CHECK: st.d +; CHECK: .size llvm_mips_binsli_d_test +; +@llvm_mips_binsri_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_binsri_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_binsri_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_binsri_b_ARG1 + %1 = tail call <16 x i8> @llvm.mips.binsri.b(<16 x i8> %0, i32 7) + store <16 x i8> %1, <16 x i8>* @llvm_mips_binsri_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.binsri.b(<16 x i8>, i32) nounwind + +; CHECK: llvm_mips_binsri_b_test: +; CHECK: ld.b +; CHECK: binsri.b +; CHECK: st.b +; CHECK: .size llvm_mips_binsri_b_test +; +@llvm_mips_binsri_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_binsri_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_binsri_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_binsri_h_ARG1 + %1 = tail call <8 x i16> @llvm.mips.binsri.h(<8 x i16> %0, i32 7) + store <8 x i16> %1, <8 x i16>* @llvm_mips_binsri_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.binsri.h(<8 x i16>, i32) nounwind + +; CHECK: llvm_mips_binsri_h_test: +; CHECK: ld.h +; CHECK: binsri.h +; CHECK: st.h +; CHECK: .size llvm_mips_binsri_h_test +; +@llvm_mips_binsri_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_binsri_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_binsri_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_binsri_w_ARG1 + %1 = tail call <4 x i32> @llvm.mips.binsri.w(<4 x i32> %0, i32 7) + store <4 x i32> %1, <4 x i32>* @llvm_mips_binsri_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.binsri.w(<4 x i32>, i32) nounwind + +; CHECK: llvm_mips_binsri_w_test: +; CHECK: ld.w +; CHECK: binsri.w +; CHECK: st.w +; CHECK: .size llvm_mips_binsri_w_test +; +@llvm_mips_binsri_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_binsri_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_binsri_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_binsri_d_ARG1 + %1 = tail call <2 x i64> @llvm.mips.binsri.d(<2 x i64> %0, i32 7) + store <2 x i64> %1, <2 x i64>* @llvm_mips_binsri_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.binsri.d(<2 x i64>, i32) nounwind + +; CHECK: llvm_mips_binsri_d_test: +; CHECK: ld.d +; CHECK: binsri.d +; CHECK: st.d +; CHECK: .size llvm_mips_binsri_d_test +; +@llvm_mips_bnegi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_bnegi_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_bnegi_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_bnegi_b_ARG1 + %1 = tail call <16 x i8> @llvm.mips.bnegi.b(<16 x i8> %0, i32 7) + store <16 x i8> %1, <16 x i8>* @llvm_mips_bnegi_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.bnegi.b(<16 x i8>, i32) nounwind + +; CHECK: llvm_mips_bnegi_b_test: +; CHECK: ld.b +; CHECK: bnegi.b +; CHECK: st.b +; CHECK: .size llvm_mips_bnegi_b_test +; +@llvm_mips_bnegi_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_bnegi_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_bnegi_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_bnegi_h_ARG1 + %1 = tail call <8 x i16> @llvm.mips.bnegi.h(<8 x i16> %0, i32 7) + store <8 x i16> %1, <8 x i16>* @llvm_mips_bnegi_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.bnegi.h(<8 x i16>, i32) nounwind + +; CHECK: llvm_mips_bnegi_h_test: +; CHECK: ld.h +; CHECK: bnegi.h +; CHECK: st.h +; CHECK: .size llvm_mips_bnegi_h_test +; +@llvm_mips_bnegi_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_bnegi_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_bnegi_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_bnegi_w_ARG1 + %1 = tail call <4 x i32> @llvm.mips.bnegi.w(<4 x i32> %0, i32 7) + store <4 x i32> %1, <4 x i32>* @llvm_mips_bnegi_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.bnegi.w(<4 x i32>, i32) nounwind + +; CHECK: llvm_mips_bnegi_w_test: +; CHECK: ld.w +; CHECK: bnegi.w +; CHECK: st.w +; CHECK: .size llvm_mips_bnegi_w_test +; +@llvm_mips_bnegi_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_bnegi_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_bnegi_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_bnegi_d_ARG1 + %1 = tail call <2 x i64> @llvm.mips.bnegi.d(<2 x i64> %0, i32 7) + store <2 x i64> %1, <2 x i64>* @llvm_mips_bnegi_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.bnegi.d(<2 x i64>, i32) nounwind + +; CHECK: llvm_mips_bnegi_d_test: +; CHECK: ld.d +; CHECK: bnegi.d +; CHECK: st.d +; CHECK: .size llvm_mips_bnegi_d_test +; +@llvm_mips_bseti_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_bseti_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_bseti_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_bseti_b_ARG1 + %1 = tail call <16 x i8> @llvm.mips.bseti.b(<16 x i8> %0, i32 7) + store <16 x i8> %1, <16 x i8>* @llvm_mips_bseti_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.bseti.b(<16 x i8>, i32) nounwind + +; CHECK: llvm_mips_bseti_b_test: +; CHECK: ld.b +; CHECK: bseti.b +; CHECK: st.b +; CHECK: .size llvm_mips_bseti_b_test +; +@llvm_mips_bseti_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_bseti_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_bseti_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_bseti_h_ARG1 + %1 = tail call <8 x i16> @llvm.mips.bseti.h(<8 x i16> %0, i32 7) + store <8 x i16> %1, <8 x i16>* @llvm_mips_bseti_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.bseti.h(<8 x i16>, i32) nounwind + +; CHECK: llvm_mips_bseti_h_test: +; CHECK: ld.h +; CHECK: bseti.h +; CHECK: st.h +; CHECK: .size llvm_mips_bseti_h_test +; +@llvm_mips_bseti_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_bseti_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_bseti_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_bseti_w_ARG1 + %1 = tail call <4 x i32> @llvm.mips.bseti.w(<4 x i32> %0, i32 7) + store <4 x i32> %1, <4 x i32>* @llvm_mips_bseti_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.bseti.w(<4 x i32>, i32) nounwind + +; CHECK: llvm_mips_bseti_w_test: +; CHECK: ld.w +; CHECK: bseti.w +; CHECK: st.w +; CHECK: .size llvm_mips_bseti_w_test +; +@llvm_mips_bseti_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_bseti_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_bseti_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_bseti_d_ARG1 + %1 = tail call <2 x i64> @llvm.mips.bseti.d(<2 x i64> %0, i32 7) + store <2 x i64> %1, <2 x i64>* @llvm_mips_bseti_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.bseti.d(<2 x i64>, i32) nounwind + +; CHECK: llvm_mips_bseti_d_test: +; CHECK: ld.d +; CHECK: bseti.d +; CHECK: st.d +; CHECK: .size llvm_mips_bseti_d_test +; diff --git a/test/CodeGen/Mips/msa/i5-c.ll b/test/CodeGen/Mips/msa/i5-c.ll new file mode 100644 index 0000000..5e9eed8 --- /dev/null +++ b/test/CodeGen/Mips/msa/i5-c.ll @@ -0,0 +1,382 @@ +; RUN: llc -march=mips -mattr=+msa < %s | FileCheck %s + +@llvm_mips_ceqi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_ceqi_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_ceqi_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_ceqi_b_ARG1 + %1 = tail call <16 x i8> @llvm.mips.ceqi.b(<16 x i8> %0, i32 14) + store <16 x i8> %1, <16 x i8>* @llvm_mips_ceqi_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.ceqi.b(<16 x i8>, i32) nounwind + +; CHECK: llvm_mips_ceqi_b_test: +; CHECK: ld.b +; CHECK: ceqi.b +; CHECK: st.b +; CHECK: .size llvm_mips_ceqi_b_test +; +@llvm_mips_ceqi_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_ceqi_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_ceqi_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_ceqi_h_ARG1 + %1 = tail call <8 x i16> @llvm.mips.ceqi.h(<8 x i16> %0, i32 14) + store <8 x i16> %1, <8 x i16>* @llvm_mips_ceqi_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.ceqi.h(<8 x i16>, i32) nounwind + +; CHECK: llvm_mips_ceqi_h_test: +; CHECK: ld.h +; CHECK: ceqi.h +; CHECK: st.h +; CHECK: .size llvm_mips_ceqi_h_test +; +@llvm_mips_ceqi_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_ceqi_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_ceqi_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_ceqi_w_ARG1 + %1 = tail call <4 x i32> @llvm.mips.ceqi.w(<4 x i32> %0, i32 14) + store <4 x i32> %1, <4 x i32>* @llvm_mips_ceqi_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.ceqi.w(<4 x i32>, i32) nounwind + +; CHECK: llvm_mips_ceqi_w_test: +; CHECK: ld.w +; CHECK: ceqi.w +; CHECK: st.w +; CHECK: .size llvm_mips_ceqi_w_test +; +@llvm_mips_ceqi_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_ceqi_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_ceqi_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_ceqi_d_ARG1 + %1 = tail call <2 x i64> @llvm.mips.ceqi.d(<2 x i64> %0, i32 14) + store <2 x i64> %1, <2 x i64>* @llvm_mips_ceqi_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.ceqi.d(<2 x i64>, i32) nounwind + +; CHECK: llvm_mips_ceqi_d_test: +; CHECK: ld.d +; CHECK: ceqi.d +; CHECK: st.d +; CHECK: .size llvm_mips_ceqi_d_test +; +@llvm_mips_clei_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_clei_s_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_clei_s_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_clei_s_b_ARG1 + %1 = tail call <16 x i8> @llvm.mips.clei.s.b(<16 x i8> %0, i32 14) + store <16 x i8> %1, <16 x i8>* @llvm_mips_clei_s_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.clei.s.b(<16 x i8>, i32) nounwind + +; CHECK: llvm_mips_clei_s_b_test: +; CHECK: ld.b +; CHECK: clei_s.b +; CHECK: st.b +; CHECK: .size llvm_mips_clei_s_b_test +; +@llvm_mips_clei_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_clei_s_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_clei_s_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_clei_s_h_ARG1 + %1 = tail call <8 x i16> @llvm.mips.clei.s.h(<8 x i16> %0, i32 14) + store <8 x i16> %1, <8 x i16>* @llvm_mips_clei_s_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.clei.s.h(<8 x i16>, i32) nounwind + +; CHECK: llvm_mips_clei_s_h_test: +; CHECK: ld.h +; CHECK: clei_s.h +; CHECK: st.h +; CHECK: .size llvm_mips_clei_s_h_test +; +@llvm_mips_clei_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_clei_s_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_clei_s_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_clei_s_w_ARG1 + %1 = tail call <4 x i32> @llvm.mips.clei.s.w(<4 x i32> %0, i32 14) + store <4 x i32> %1, <4 x i32>* @llvm_mips_clei_s_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.clei.s.w(<4 x i32>, i32) nounwind + +; CHECK: llvm_mips_clei_s_w_test: +; CHECK: ld.w +; CHECK: clei_s.w +; CHECK: st.w +; CHECK: .size llvm_mips_clei_s_w_test +; +@llvm_mips_clei_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_clei_s_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_clei_s_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_clei_s_d_ARG1 + %1 = tail call <2 x i64> @llvm.mips.clei.s.d(<2 x i64> %0, i32 14) + store <2 x i64> %1, <2 x i64>* @llvm_mips_clei_s_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.clei.s.d(<2 x i64>, i32) nounwind + +; CHECK: llvm_mips_clei_s_d_test: +; CHECK: ld.d +; CHECK: clei_s.d +; CHECK: st.d +; CHECK: .size llvm_mips_clei_s_d_test +; +@llvm_mips_clei_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_clei_u_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_clei_u_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_clei_u_b_ARG1 + %1 = tail call <16 x i8> @llvm.mips.clei.u.b(<16 x i8> %0, i32 14) + store <16 x i8> %1, <16 x i8>* @llvm_mips_clei_u_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.clei.u.b(<16 x i8>, i32) nounwind + +; CHECK: llvm_mips_clei_u_b_test: +; CHECK: ld.b +; CHECK: clei_u.b +; CHECK: st.b +; CHECK: .size llvm_mips_clei_u_b_test +; +@llvm_mips_clei_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_clei_u_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_clei_u_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_clei_u_h_ARG1 + %1 = tail call <8 x i16> @llvm.mips.clei.u.h(<8 x i16> %0, i32 14) + store <8 x i16> %1, <8 x i16>* @llvm_mips_clei_u_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.clei.u.h(<8 x i16>, i32) nounwind + +; CHECK: llvm_mips_clei_u_h_test: +; CHECK: ld.h +; CHECK: clei_u.h +; CHECK: st.h +; CHECK: .size llvm_mips_clei_u_h_test +; +@llvm_mips_clei_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_clei_u_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_clei_u_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_clei_u_w_ARG1 + %1 = tail call <4 x i32> @llvm.mips.clei.u.w(<4 x i32> %0, i32 14) + store <4 x i32> %1, <4 x i32>* @llvm_mips_clei_u_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.clei.u.w(<4 x i32>, i32) nounwind + +; CHECK: llvm_mips_clei_u_w_test: +; CHECK: ld.w +; CHECK: clei_u.w +; CHECK: st.w +; CHECK: .size llvm_mips_clei_u_w_test +; +@llvm_mips_clei_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_clei_u_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_clei_u_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_clei_u_d_ARG1 + %1 = tail call <2 x i64> @llvm.mips.clei.u.d(<2 x i64> %0, i32 14) + store <2 x i64> %1, <2 x i64>* @llvm_mips_clei_u_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.clei.u.d(<2 x i64>, i32) nounwind + +; CHECK: llvm_mips_clei_u_d_test: +; CHECK: ld.d +; CHECK: clei_u.d +; CHECK: st.d +; CHECK: .size llvm_mips_clei_u_d_test +; +@llvm_mips_clti_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_clti_s_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_clti_s_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_clti_s_b_ARG1 + %1 = tail call <16 x i8> @llvm.mips.clti.s.b(<16 x i8> %0, i32 14) + store <16 x i8> %1, <16 x i8>* @llvm_mips_clti_s_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.clti.s.b(<16 x i8>, i32) nounwind + +; CHECK: llvm_mips_clti_s_b_test: +; CHECK: ld.b +; CHECK: clti_s.b +; CHECK: st.b +; CHECK: .size llvm_mips_clti_s_b_test +; +@llvm_mips_clti_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_clti_s_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_clti_s_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_clti_s_h_ARG1 + %1 = tail call <8 x i16> @llvm.mips.clti.s.h(<8 x i16> %0, i32 14) + store <8 x i16> %1, <8 x i16>* @llvm_mips_clti_s_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.clti.s.h(<8 x i16>, i32) nounwind + +; CHECK: llvm_mips_clti_s_h_test: +; CHECK: ld.h +; CHECK: clti_s.h +; CHECK: st.h +; CHECK: .size llvm_mips_clti_s_h_test +; +@llvm_mips_clti_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_clti_s_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_clti_s_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_clti_s_w_ARG1 + %1 = tail call <4 x i32> @llvm.mips.clti.s.w(<4 x i32> %0, i32 14) + store <4 x i32> %1, <4 x i32>* @llvm_mips_clti_s_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.clti.s.w(<4 x i32>, i32) nounwind + +; CHECK: llvm_mips_clti_s_w_test: +; CHECK: ld.w +; CHECK: clti_s.w +; CHECK: st.w +; CHECK: .size llvm_mips_clti_s_w_test +; +@llvm_mips_clti_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_clti_s_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_clti_s_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_clti_s_d_ARG1 + %1 = tail call <2 x i64> @llvm.mips.clti.s.d(<2 x i64> %0, i32 14) + store <2 x i64> %1, <2 x i64>* @llvm_mips_clti_s_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.clti.s.d(<2 x i64>, i32) nounwind + +; CHECK: llvm_mips_clti_s_d_test: +; CHECK: ld.d +; CHECK: clti_s.d +; CHECK: st.d +; CHECK: .size llvm_mips_clti_s_d_test +; +@llvm_mips_clti_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_clti_u_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_clti_u_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_clti_u_b_ARG1 + %1 = tail call <16 x i8> @llvm.mips.clti.u.b(<16 x i8> %0, i32 14) + store <16 x i8> %1, <16 x i8>* @llvm_mips_clti_u_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.clti.u.b(<16 x i8>, i32) nounwind + +; CHECK: llvm_mips_clti_u_b_test: +; CHECK: ld.b +; CHECK: clti_u.b +; CHECK: st.b +; CHECK: .size llvm_mips_clti_u_b_test +; +@llvm_mips_clti_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16 +@llvm_mips_clti_u_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16 + +define void @llvm_mips_clti_u_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_clti_u_h_ARG1 + %1 = tail call <8 x i16> @llvm.mips.clti.u.h(<8 x i16> %0, i32 14) + store <8 x i16> %1, <8 x i16>* @llvm_mips_clti_u_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.clti.u.h(<8 x i16>, i32) nounwind + +; CHECK: llvm_mips_clti_u_h_test: +; CHECK: ld.h +; CHECK: clti_u.h +; CHECK: st.h +; CHECK: .size llvm_mips_clti_u_h_test +; +@llvm_mips_clti_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 +@llvm_mips_clti_u_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 + +define void @llvm_mips_clti_u_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_clti_u_w_ARG1 + %1 = tail call <4 x i32> @llvm.mips.clti.u.w(<4 x i32> %0, i32 14) + store <4 x i32> %1, <4 x i32>* @llvm_mips_clti_u_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.clti.u.w(<4 x i32>, i32) nounwind + +; CHECK: llvm_mips_clti_u_w_test: +; CHECK: ld.w +; CHECK: clti_u.w +; CHECK: st.w +; CHECK: .size llvm_mips_clti_u_w_test +; +@llvm_mips_clti_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16 +@llvm_mips_clti_u_d_RES = global <2 x i64> <i64 0, i64 0>, align 16 + +define void @llvm_mips_clti_u_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_clti_u_d_ARG1 + %1 = tail call <2 x i64> @llvm.mips.clti.u.d(<2 x i64> %0, i32 14) + store <2 x i64> %1, <2 x i64>* @llvm_mips_clti_u_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.clti.u.d(<2 x i64>, i32) nounwind + +; CHECK: llvm_mips_clti_u_d_test: +; CHECK: ld.d +; CHECK: clti_u.d +; CHECK: st.d +; CHECK: .size llvm_mips_clti_u_d_test +; diff --git a/test/CodeGen/Mips/msa/i8.ll b/test/CodeGen/Mips/msa/i8.ll new file mode 100644 index 0000000..4dc30e3 --- /dev/null +++ b/test/CodeGen/Mips/msa/i8.ll @@ -0,0 +1,78 @@ +; RUN: llc -march=mips -mattr=+msa < %s | FileCheck %s + +@llvm_mips_andi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_andi_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_andi_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_andi_b_ARG1 + %1 = tail call <16 x i8> @llvm.mips.andi.b(<16 x i8> %0, i32 25) + store <16 x i8> %1, <16 x i8>* @llvm_mips_andi_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.andi.b(<16 x i8>, i32) nounwind + +; CHECK: llvm_mips_andi_b_test: +; CHECK: ld.b +; CHECK: andi.b +; CHECK: st.b +; CHECK: .size llvm_mips_andi_b_test +; +@llvm_mips_bmnzi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_bmnzi_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_bmnzi_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_bmnzi_b_ARG1 + %1 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, i32 25) + store <16 x i8> %1, <16 x i8>* @llvm_mips_bmnzi_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.bmnzi.b(<16 x i8>, i32) nounwind + +; CHECK: llvm_mips_bmnzi_b_test: +; CHECK: ld.b +; CHECK: bmnzi.b +; CHECK: st.b +; CHECK: .size llvm_mips_bmnzi_b_test +; +@llvm_mips_bmzi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_bmzi_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_bmzi_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_bmzi_b_ARG1 + %1 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, i32 25) + store <16 x i8> %1, <16 x i8>* @llvm_mips_bmzi_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.bmzi.b(<16 x i8>, i32) nounwind + +; CHECK: llvm_mips_bmzi_b_test: +; CHECK: ld.b +; CHECK: bmzi.b +; CHECK: st.b +; CHECK: .size llvm_mips_bmzi_b_test +; +@llvm_mips_bseli_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 +@llvm_mips_bseli_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 + +define void @llvm_mips_bseli_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_bseli_b_ARG1 + %1 = tail call <16 x i8> @llvm.mips.bseli.b(<16 x i8> %0, i32 25) + store <16 x i8> %1, <16 x i8>* @llvm_mips_bseli_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.bseli.b(<16 x i8>, i32) nounwind + +; CHECK: llvm_mips_bseli_b_test: +; CHECK: ld.b +; CHECK: bseli.b +; CHECK: st.b +; CHECK: .size llvm_mips_bseli_b_test +; |