From 7e0df9aa2966d0462e34511524a4958e226b74ee Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 24 Sep 2013 14:02:15 +0000 Subject: [mips][msa] Added support for matching vshf from normal IR (i.e. not intrinsics) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191301 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/Mips/msa/3r-v.ll | 35 +++-- test/CodeGen/Mips/msa/shuffle.ll | 313 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 336 insertions(+), 12 deletions(-) create mode 100644 test/CodeGen/Mips/msa/shuffle.ll (limited to 'test/CodeGen/Mips') diff --git a/test/CodeGen/Mips/msa/3r-v.ll b/test/CodeGen/Mips/msa/3r-v.ll index 055491d..544ae9f 100644 --- a/test/CodeGen/Mips/msa/3r-v.ll +++ b/test/CodeGen/Mips/msa/3r-v.ll @@ -5,84 +5,95 @@ @llvm_mips_vshf_b_ARG1 = global <16 x i8> , align 16 @llvm_mips_vshf_b_ARG2 = global <16 x i8> , align 16 +@llvm_mips_vshf_b_ARG3 = global <16 x i8> , align 16 @llvm_mips_vshf_b_RES = global <16 x i8> , align 16 define void @llvm_mips_vshf_b_test() nounwind { entry: %0 = load <16 x i8>* @llvm_mips_vshf_b_ARG1 %1 = load <16 x i8>* @llvm_mips_vshf_b_ARG2 - %2 = tail call <16 x i8> @llvm.mips.vshf.b(<16 x i8> %0, <16 x i8> %1) - store <16 x i8> %2, <16 x i8>* @llvm_mips_vshf_b_RES + %2 = load <16 x i8>* @llvm_mips_vshf_b_ARG3 + %3 = tail call <16 x i8> @llvm.mips.vshf.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) + store <16 x i8> %3, <16 x i8>* @llvm_mips_vshf_b_RES ret void } -declare <16 x i8> @llvm.mips.vshf.b(<16 x i8>, <16 x i8>) nounwind +declare <16 x i8> @llvm.mips.vshf.b(<16 x i8>, <16 x i8>, <16 x i8>) nounwind ; CHECK: llvm_mips_vshf_b_test: ; CHECK: ld.b ; CHECK: ld.b +; CHECK: ld.b ; CHECK: vshf.b ; CHECK: st.b ; CHECK: .size llvm_mips_vshf_b_test ; @llvm_mips_vshf_h_ARG1 = global <8 x i16> , align 16 @llvm_mips_vshf_h_ARG2 = global <8 x i16> , align 16 +@llvm_mips_vshf_h_ARG3 = global <8 x i16> , align 16 @llvm_mips_vshf_h_RES = global <8 x i16> , align 16 define void @llvm_mips_vshf_h_test() nounwind { entry: %0 = load <8 x i16>* @llvm_mips_vshf_h_ARG1 %1 = load <8 x i16>* @llvm_mips_vshf_h_ARG2 - %2 = tail call <8 x i16> @llvm.mips.vshf.h(<8 x i16> %0, <8 x i16> %1) - store <8 x i16> %2, <8 x i16>* @llvm_mips_vshf_h_RES + %2 = load <8 x i16>* @llvm_mips_vshf_h_ARG3 + %3 = tail call <8 x i16> @llvm.mips.vshf.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) + store <8 x i16> %3, <8 x i16>* @llvm_mips_vshf_h_RES ret void } -declare <8 x i16> @llvm.mips.vshf.h(<8 x i16>, <8 x i16>) nounwind +declare <8 x i16> @llvm.mips.vshf.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind ; CHECK: llvm_mips_vshf_h_test: ; CHECK: ld.h ; CHECK: ld.h +; CHECK: ld.h ; CHECK: vshf.h ; CHECK: st.h ; CHECK: .size llvm_mips_vshf_h_test ; @llvm_mips_vshf_w_ARG1 = global <4 x i32> , align 16 @llvm_mips_vshf_w_ARG2 = global <4 x i32> , align 16 +@llvm_mips_vshf_w_ARG3 = global <4 x i32> , align 16 @llvm_mips_vshf_w_RES = global <4 x i32> , align 16 define void @llvm_mips_vshf_w_test() nounwind { entry: %0 = load <4 x i32>* @llvm_mips_vshf_w_ARG1 %1 = load <4 x i32>* @llvm_mips_vshf_w_ARG2 - %2 = tail call <4 x i32> @llvm.mips.vshf.w(<4 x i32> %0, <4 x i32> %1) - store <4 x i32> %2, <4 x i32>* @llvm_mips_vshf_w_RES + %2 = load <4 x i32>* @llvm_mips_vshf_w_ARG3 + %3 = tail call <4 x i32> @llvm.mips.vshf.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) + store <4 x i32> %3, <4 x i32>* @llvm_mips_vshf_w_RES ret void } -declare <4 x i32> @llvm.mips.vshf.w(<4 x i32>, <4 x i32>) nounwind +declare <4 x i32> @llvm.mips.vshf.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind ; CHECK: llvm_mips_vshf_w_test: ; CHECK: ld.w ; CHECK: ld.w +; CHECK: ld.w ; CHECK: vshf.w ; CHECK: st.w ; CHECK: .size llvm_mips_vshf_w_test ; @llvm_mips_vshf_d_ARG1 = global <2 x i64> , align 16 @llvm_mips_vshf_d_ARG2 = global <2 x i64> , align 16 +@llvm_mips_vshf_d_ARG3 = global <2 x i64> , align 16 @llvm_mips_vshf_d_RES = global <2 x i64> , align 16 define void @llvm_mips_vshf_d_test() nounwind { entry: %0 = load <2 x i64>* @llvm_mips_vshf_d_ARG1 %1 = load <2 x i64>* @llvm_mips_vshf_d_ARG2 - %2 = tail call <2 x i64> @llvm.mips.vshf.d(<2 x i64> %0, <2 x i64> %1) - store <2 x i64> %2, <2 x i64>* @llvm_mips_vshf_d_RES + %2 = load <2 x i64>* @llvm_mips_vshf_d_ARG3 + %3 = tail call <2 x i64> @llvm.mips.vshf.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) + store <2 x i64> %3, <2 x i64>* @llvm_mips_vshf_d_RES ret void } -declare <2 x i64> @llvm.mips.vshf.d(<2 x i64>, <2 x i64>) nounwind +declare <2 x i64> @llvm.mips.vshf.d(<2 x i64>, <2 x i64>, <2 x i64>) nounwind ; CHECK: llvm_mips_vshf_d_test: ; CHECK: ld.d diff --git a/test/CodeGen/Mips/msa/shuffle.ll b/test/CodeGen/Mips/msa/shuffle.ll new file mode 100644 index 0000000..35a5cf8 --- /dev/null +++ b/test/CodeGen/Mips/msa/shuffle.ll @@ -0,0 +1,313 @@ +; RUN: llc -march=mips -mattr=+msa < %s | FileCheck %s + +define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: vshf_v16i8_0: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> + ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo + ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]] + store <16 x i8> %2, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size vshf_v16i8_0 +} + +define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: vshf_v16i8_1: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> + ; CHECK-DAG: ldi.b [[R3:\$w[0-9]+]], 1 + ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]] + store <16 x i8> %2, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size vshf_v16i8_1 +} + +define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: vshf_v16i8_2: + + %1 = load <16 x i8>* %a + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> + ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo + ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R2]] + store <16 x i8> %3, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size vshf_v16i8_2 +} + +define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: vshf_v16i8_3: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> + ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo + ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R2]] + store <16 x i8> %3, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size vshf_v16i8_3 +} + +define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: vshf_v16i8_4: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> + ; CHECK-DAG: ldi.b [[R3:\$w[0-9]+]], 1 + ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]] + store <16 x i8> %2, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size vshf_v16i8_4 +} + +define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: vshf_v8i16_0: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> + ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo + ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]] + store <8 x i16> %2, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size vshf_v8i16_0 +} + +define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: vshf_v8i16_1: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> + ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1 + ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]] + store <8 x i16> %2, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size vshf_v8i16_1 +} + +define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: vshf_v8i16_2: + + %1 = load <8 x i16>* %a + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> + ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo + ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R2]] + store <8 x i16> %3, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size vshf_v8i16_2 +} + +define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: vshf_v8i16_3: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> + ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo + ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R2]] + store <8 x i16> %3, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size vshf_v8i16_3 +} + +define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: vshf_v8i16_4: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> + ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1 + ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]] + store <8 x i16> %2, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size vshf_v8i16_4 +} + +define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: vshf_v4i32_0: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], %lo + ; CHECK-DAG: vshf.w [[R3:\$w[0-9]+]], [[R1]], [[R1]] + store <4 x i32> %2, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size vshf_v4i32_0 +} + +define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: vshf_v4i32_1: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1 + ; CHECK-DAG: vshf.w [[R3:\$w[0-9]+]], [[R1]], [[R1]] + store <4 x i32> %2, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size vshf_v4i32_1 +} + +define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: vshf_v4i32_2: + + %1 = load <4 x i32>* %a + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> + ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], %lo + ; CHECK-DAG: vshf.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] + store <4 x i32> %3, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size vshf_v4i32_2 +} + +define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: vshf_v4i32_3: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> + ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], %lo + ; CHECK-DAG: vshf.w [[R3]], [[R1]], [[R2]] + store <4 x i32> %3, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size vshf_v4i32_3 +} + +define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: vshf_v4i32_4: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> + ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1 + ; CHECK-DAG: vshf.w [[R3:\$w[0-9]+]], [[R1]], [[R1]] + store <4 x i32> %2, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size vshf_v4i32_4 +} + +define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: vshf_v2i64_0: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> + ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo + ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]] + store <2 x i64> %2, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size vshf_v2i64_0 +} + +define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: vshf_v2i64_1: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> + ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1 + ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]] + store <2 x i64> %2, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size vshf_v2i64_1 +} + +define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: vshf_v2i64_2: + + %1 = load <2 x i64>* %a + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> + ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo + ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R2]] + store <2 x i64> %3, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size vshf_v2i64_2 +} + +define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: vshf_v2i64_3: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> + ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo + ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R2]] + store <2 x i64> %3, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size vshf_v2i64_3 +} + +define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: vshf_v2i64_4: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> + ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1 + ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]] + store <2 x i64> %2, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size vshf_v2i64_4 +} -- cgit v1.1