From 8458f371b84ee0cd22c4a433059d53ea6e3ec4f4 Mon Sep 17 00:00:00 2001 From: Jiangning Liu Date: Wed, 6 Nov 2013 03:35:27 +0000 Subject: Implement AArch64 Neon instruction set Perm. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194123 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/AArch64/neon-perm.ll | 1676 ++++++++++++++++++++ test/MC/AArch64/neon-diagnostics.s | 416 +++++ test/MC/AArch64/neon-perm.s | 103 ++ test/MC/Disassembler/AArch64/neon-instructions.txt | 107 ++ 4 files changed, 2302 insertions(+) create mode 100644 test/CodeGen/AArch64/neon-perm.ll create mode 100644 test/MC/AArch64/neon-perm.s (limited to 'test') diff --git a/test/CodeGen/AArch64/neon-perm.ll b/test/CodeGen/AArch64/neon-perm.ll new file mode 100644 index 0000000..4db4771 --- /dev/null +++ b/test/CodeGen/AArch64/neon-perm.ll @@ -0,0 +1,1676 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +%struct.int8x8x2_t = type { [2 x <8 x i8>] } +%struct.int16x4x2_t = type { [2 x <4 x i16>] } +%struct.int32x2x2_t = type { [2 x <2 x i32>] } +%struct.uint8x8x2_t = type { [2 x <8 x i8>] } +%struct.uint16x4x2_t = type { [2 x <4 x i16>] } +%struct.uint32x2x2_t = type { [2 x <2 x i32>] } +%struct.float32x2x2_t = type { [2 x <2 x float>] } +%struct.poly8x8x2_t = type { [2 x <8 x i8>] } +%struct.poly16x4x2_t = type { [2 x <4 x i16>] } +%struct.int8x16x2_t = type { [2 x <16 x i8>] } +%struct.int16x8x2_t = type { [2 x <8 x i16>] } +%struct.int32x4x2_t = type { [2 x <4 x i32>] } +%struct.uint8x16x2_t = type { [2 x <16 x i8>] } +%struct.uint16x8x2_t = type { [2 x <8 x i16>] } +%struct.uint32x4x2_t = type { [2 x <4 x i32>] } +%struct.float32x4x2_t = type { [2 x <4 x float>] } +%struct.poly8x16x2_t = type { [2 x <16 x i8>] } +%struct.poly16x8x2_t = type { [2 x <8 x i16>] } + +define <8 x i8> @test_vuzp1_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vuzp1_s8: +; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vuzp1q_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vuzp1q_s8: +; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vuzp1_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vuzp1_s16: +; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vuzp1q_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vuzp1q_s16: +; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vuzp1_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vuzp1_s32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vuzp1q_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vuzp1q_s32: +; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vuzp1q_s64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vuzp1q_s64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <8 x i8> @test_vuzp1_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vuzp1_u8: +; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vuzp1q_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vuzp1q_u8: +; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vuzp1_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vuzp1_u16: +; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vuzp1q_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vuzp1q_u16: +; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vuzp1_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vuzp1_u32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vuzp1q_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vuzp1q_u32: +; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vuzp1q_u64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vuzp1q_u64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <2 x float> @test_vuzp1_f32(<2 x float> %a, <2 x float> %b) { +; CHECK: test_vuzp1_f32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +entry: + %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + ret <2 x float> %shuffle.i +} + +define <4 x float> @test_vuzp1q_f32(<4 x float> %a, <4 x float> %b) { +; CHECK: test_vuzp1q_f32: +; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <2 x double> @test_vuzp1q_f64(<2 x double> %a, <2 x double> %b) { +; CHECK: test_vuzp1q_f64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %shuffle.i +} + +define <8 x i8> @test_vuzp1_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vuzp1_p8: +; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vuzp1q_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vuzp1q_p8: +; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vuzp1_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vuzp1_p16: +; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vuzp1q_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vuzp1q_p16: +; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <8 x i8> @test_vuzp2_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vuzp2_s8: +; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vuzp2q_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vuzp2q_s8: +; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vuzp2_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vuzp2_s16: +; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vuzp2q_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vuzp2q_s16: +; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vuzp2_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vuzp2_s32: +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vuzp2q_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vuzp2q_s32: +; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vuzp2q_s64: +; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <8 x i8> @test_vuzp2_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vuzp2_u8: +; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vuzp2q_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vuzp2q_u8: +; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vuzp2_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vuzp2_u16: +; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vuzp2q_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vuzp2q_u16: +; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vuzp2_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vuzp2_u32: +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vuzp2q_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vuzp2q_u32: +; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vuzp2q_u64: +; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <2 x float> @test_vuzp2_f32(<2 x float> %a, <2 x float> %b) { +; CHECK: test_vuzp2_f32: +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + ret <2 x float> %shuffle.i +} + +define <4 x float> @test_vuzp2q_f32(<4 x float> %a, <4 x float> %b) { +; CHECK: test_vuzp2q_f32: +; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) { +; CHECK: test_vuzp2q_f64: +; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +entry: + %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %shuffle.i +} + +define <8 x i8> @test_vuzp2_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vuzp2_p8: +; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vuzp2q_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vuzp2q_p8: +; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vuzp2_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vuzp2_p16: +; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vuzp2q_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vuzp2q_p16: +; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <8 x i8> @test_vzip1_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vzip1_s8: +; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vzip1q_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vzip1q_s8: +; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vzip1_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vzip1_s16: +; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vzip1q_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vzip1q_s16: +; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vzip1_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vzip1_s32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vzip1q_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vzip1q_s32: +; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vzip1q_s64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vzip1q_s64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <8 x i8> @test_vzip1_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vzip1_u8: +; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vzip1q_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vzip1q_u8: +; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vzip1_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vzip1_u16: +; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vzip1q_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vzip1q_u16: +; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vzip1_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vzip1_u32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vzip1q_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vzip1q_u32: +; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vzip1q_u64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vzip1q_u64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <2 x float> @test_vzip1_f32(<2 x float> %a, <2 x float> %b) { +; CHECK: test_vzip1_f32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +entry: + %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + ret <2 x float> %shuffle.i +} + +define <4 x float> @test_vzip1q_f32(<4 x float> %a, <4 x float> %b) { +; CHECK: test_vzip1q_f32: +; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <2 x double> @test_vzip1q_f64(<2 x double> %a, <2 x double> %b) { +; CHECK: test_vzip1q_f64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %shuffle.i +} + +define <8 x i8> @test_vzip1_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vzip1_p8: +; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vzip1q_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vzip1q_p8: +; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vzip1_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vzip1_p16: +; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vzip1q_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vzip1q_p16: +; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <8 x i8> @test_vzip2_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vzip2_s8: +; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vzip2q_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vzip2q_s8: +; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vzip2_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vzip2_s16: +; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vzip2q_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vzip2q_s16: +; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vzip2_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vzip2_s32: +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vzip2q_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vzip2q_s32: +; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vzip2q_s64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vzip2q_s64: +; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <8 x i8> @test_vzip2_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vzip2_u8: +; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vzip2q_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vzip2q_u8: +; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vzip2_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vzip2_u16: +; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vzip2q_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vzip2q_u16: +; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vzip2_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vzip2_u32: +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vzip2q_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vzip2q_u32: +; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vzip2q_u64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vzip2q_u64: +; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <2 x float> @test_vzip2_f32(<2 x float> %a, <2 x float> %b) { +; CHECK: test_vzip2_f32: +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + ret <2 x float> %shuffle.i +} + +define <4 x float> @test_vzip2q_f32(<4 x float> %a, <4 x float> %b) { +; CHECK: test_vzip2q_f32: +; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <2 x double> @test_vzip2q_f64(<2 x double> %a, <2 x double> %b) { +; CHECK: test_vzip2q_f64: +; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +entry: + %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %shuffle.i +} + +define <8 x i8> @test_vzip2_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vzip2_p8: +; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vzip2q_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vzip2q_p8: +; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vzip2_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vzip2_p16: +; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vzip2q_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vzip2q_p16: +; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <8 x i8> @test_vtrn1_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtrn1_s8: +; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vtrn1q_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vtrn1q_s8: +; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vtrn1_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vtrn1_s16: +; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vtrn1q_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vtrn1q_s16: +; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vtrn1_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vtrn1_s32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vtrn1q_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vtrn1q_s32: +; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vtrn1q_s64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vtrn1q_s64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <8 x i8> @test_vtrn1_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtrn1_u8: +; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vtrn1q_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vtrn1q_u8: +; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vtrn1_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vtrn1_u16: +; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vtrn1q_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vtrn1q_u16: +; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vtrn1_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vtrn1_u32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vtrn1q_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vtrn1q_u32: +; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vtrn1q_u64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vtrn1q_u64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <2 x float> @test_vtrn1_f32(<2 x float> %a, <2 x float> %b) { +; CHECK: test_vtrn1_f32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +entry: + %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + ret <2 x float> %shuffle.i +} + +define <4 x float> @test_vtrn1q_f32(<4 x float> %a, <4 x float> %b) { +; CHECK: test_vtrn1q_f32: +; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <2 x double> @test_vtrn1q_f64(<2 x double> %a, <2 x double> %b) { +; CHECK: test_vtrn1q_f64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %shuffle.i +} + +define <8 x i8> @test_vtrn1_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtrn1_p8: +; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vtrn1q_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vtrn1q_p8: +; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vtrn1_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vtrn1_p16: +; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vtrn1q_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vtrn1q_p16: +; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <8 x i8> @test_vtrn2_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtrn2_s8: +; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vtrn2q_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vtrn2q_s8: +; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vtrn2_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vtrn2_s16: +; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vtrn2q_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vtrn2q_s16: +; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vtrn2_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vtrn2_s32: +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vtrn2q_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vtrn2q_s32: +; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vtrn2q_s64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vtrn2q_s64: +; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <8 x i8> @test_vtrn2_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtrn2_u8: +; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vtrn2q_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vtrn2q_u8: +; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vtrn2_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vtrn2_u16: +; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vtrn2q_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vtrn2q_u16: +; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vtrn2_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vtrn2_u32: +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vtrn2q_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vtrn2q_u32: +; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vtrn2q_u64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vtrn2q_u64: +; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <2 x float> @test_vtrn2_f32(<2 x float> %a, <2 x float> %b) { +; CHECK: test_vtrn2_f32: +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + ret <2 x float> %shuffle.i +} + +define <4 x float> @test_vtrn2q_f32(<4 x float> %a, <4 x float> %b) { +; CHECK: test_vtrn2q_f32: +; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <2 x double> @test_vtrn2q_f64(<2 x double> %a, <2 x double> %b) { +; CHECK: test_vtrn2q_f64: +; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +entry: + %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %shuffle.i +} + +define <8 x i8> @test_vtrn2_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtrn2_p8: +; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vtrn2q_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vtrn2q_p8: +; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vtrn2_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vtrn2_p16: +; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vtrn2q_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vtrn2q_p16: +; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define %struct.int8x8x2_t @test_vuzp_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vuzp_s8: +; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1 + ret %struct.int8x8x2_t %.fca.0.1.insert +} + +define %struct.int16x4x2_t @test_vuzp_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vuzp_s16: +; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vuzp1.i, 0, 1 + ret %struct.int16x4x2_t %.fca.0.1.insert +} + +define %struct.int32x2x2_t @test_vuzp_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vuzp_s32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vuzp1.i, 0, 1 + ret %struct.int32x2x2_t %.fca.0.1.insert +} + +define %struct.uint8x8x2_t @test_vuzp_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vuzp_u8: +; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1 + ret %struct.uint8x8x2_t %.fca.0.1.insert +} + +define %struct.uint16x4x2_t @test_vuzp_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vuzp_u16: +; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vuzp1.i, 0, 1 + ret %struct.uint16x4x2_t %.fca.0.1.insert +} + +define %struct.uint32x2x2_t @test_vuzp_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vuzp_u32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %.fca.0.0.insert = insertvalue %struct.uint32x2x2_t undef, <2 x i32> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint32x2x2_t %.fca.0.0.insert, <2 x i32> %vuzp1.i, 0, 1 + ret %struct.uint32x2x2_t %.fca.0.1.insert +} + +define %struct.float32x2x2_t @test_vuzp_f32(<2 x float> %a, <2 x float> %b) { +; CHECK: test_vuzp_f32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %vuzp.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + %vuzp1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vuzp1.i, 0, 1 + ret %struct.float32x2x2_t %.fca.0.1.insert +} + +define %struct.poly8x8x2_t @test_vuzp_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vuzp_p8: +; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.poly8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1 + ret %struct.poly8x8x2_t %.fca.0.1.insert +} + +define %struct.poly16x4x2_t @test_vuzp_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vuzp_p16: +; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.poly16x4x2_t undef, <4 x i16> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly16x4x2_t %.fca.0.0.insert, <4 x i16> %vuzp1.i, 0, 1 + ret %struct.poly16x4x2_t %.fca.0.1.insert +} + +define %struct.int8x16x2_t @test_vuzpq_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vuzpq_s8: +; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vuzp1.i, 0, 1 + ret %struct.int8x16x2_t %.fca.0.1.insert +} + +define %struct.int16x8x2_t @test_vuzpq_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vuzpq_s16: +; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vuzp1.i, 0, 1 + ret %struct.int16x8x2_t %.fca.0.1.insert +} + +define %struct.int32x4x2_t @test_vuzpq_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vuzpq_s32: +; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %vuzp.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %vuzp1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vuzp1.i, 0, 1 + ret %struct.int32x4x2_t %.fca.0.1.insert +} + +define %struct.uint8x16x2_t @test_vuzpq_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vuzpq_u8: +; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vuzp1.i, 0, 1 + ret %struct.uint8x16x2_t %.fca.0.1.insert +} + +define %struct.uint16x8x2_t @test_vuzpq_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vuzpq_u16: +; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vuzp1.i, 0, 1 + ret %struct.uint16x8x2_t %.fca.0.1.insert +} + +define %struct.uint32x4x2_t @test_vuzpq_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vuzpq_u32: +; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %vuzp.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %vuzp1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.uint32x4x2_t undef, <4 x i32> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint32x4x2_t %.fca.0.0.insert, <4 x i32> %vuzp1.i, 0, 1 + ret %struct.uint32x4x2_t %.fca.0.1.insert +} + +define %struct.float32x4x2_t @test_vuzpq_f32(<4 x float> %a, <4 x float> %b) { +; CHECK: test_vuzpq_f32: +; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %vuzp.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + %vuzp1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vuzp1.i, 0, 1 + ret %struct.float32x4x2_t %.fca.0.1.insert +} + +define %struct.poly8x16x2_t @test_vuzpq_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vuzpq_p8: +; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vuzp1.i, 0, 1 + ret %struct.poly8x16x2_t %.fca.0.1.insert +} + +define %struct.poly16x8x2_t @test_vuzpq_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vuzpq_p16: +; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.poly16x8x2_t undef, <8 x i16> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly16x8x2_t %.fca.0.0.insert, <8 x i16> %vuzp1.i, 0, 1 + ret %struct.poly16x8x2_t %.fca.0.1.insert +} + +define %struct.int8x8x2_t @test_vzip_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vzip_s8: +; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vzip1.i, 0, 1 + ret %struct.int8x8x2_t %.fca.0.1.insert +} + +define %struct.int16x4x2_t @test_vzip_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vzip_s16: +; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vzip1.i, 0, 1 + ret %struct.int16x4x2_t %.fca.0.1.insert +} + +define %struct.int32x2x2_t @test_vzip_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vzip_s32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vzip1.i, 0, 1 + ret %struct.int32x2x2_t %.fca.0.1.insert +} + +define %struct.uint8x8x2_t @test_vzip_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vzip_u8: +; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vzip1.i, 0, 1 + ret %struct.uint8x8x2_t %.fca.0.1.insert +} + +define %struct.uint16x4x2_t @test_vzip_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vzip_u16: +; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vzip1.i, 0, 1 + ret %struct.uint16x4x2_t %.fca.0.1.insert +} + +define %struct.uint32x2x2_t @test_vzip_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vzip_u32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %.fca.0.0.insert = insertvalue %struct.uint32x2x2_t undef, <2 x i32> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint32x2x2_t %.fca.0.0.insert, <2 x i32> %vzip1.i, 0, 1 + ret %struct.uint32x2x2_t %.fca.0.1.insert +} + +define %struct.float32x2x2_t @test_vzip_f32(<2 x float> %a, <2 x float> %b) { +; CHECK: test_vzip_f32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %vzip.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + %vzip1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vzip1.i, 0, 1 + ret %struct.float32x2x2_t %.fca.0.1.insert +} + +define %struct.poly8x8x2_t @test_vzip_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vzip_p8: +; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.poly8x8x2_t undef, <8 x i8> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly8x8x2_t %.fca.0.0.insert, <8 x i8> %vzip1.i, 0, 1 + ret %struct.poly8x8x2_t %.fca.0.1.insert +} + +define %struct.poly16x4x2_t @test_vzip_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vzip_p16: +; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.poly16x4x2_t undef, <4 x i16> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly16x4x2_t %.fca.0.0.insert, <4 x i16> %vzip1.i, 0, 1 + ret %struct.poly16x4x2_t %.fca.0.1.insert +} + +define %struct.int8x16x2_t @test_vzipq_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vzipq_s8: +; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vzip1.i, 0, 1 + ret %struct.int8x16x2_t %.fca.0.1.insert +} + +define %struct.int16x8x2_t @test_vzipq_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vzipq_s16: +; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vzip1.i, 0, 1 + ret %struct.int16x8x2_t %.fca.0.1.insert +} + +define %struct.int32x4x2_t @test_vzipq_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vzipq_s32: +; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %vzip.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %vzip1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vzip1.i, 0, 1 + ret %struct.int32x4x2_t %.fca.0.1.insert +} + +define %struct.uint8x16x2_t @test_vzipq_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vzipq_u8: +; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vzip1.i, 0, 1 + ret %struct.uint8x16x2_t %.fca.0.1.insert +} + +define %struct.uint16x8x2_t @test_vzipq_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vzipq_u16: +; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vzip1.i, 0, 1 + ret %struct.uint16x8x2_t %.fca.0.1.insert +} + +define %struct.uint32x4x2_t @test_vzipq_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vzipq_u32: +; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %vzip.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %vzip1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.uint32x4x2_t undef, <4 x i32> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint32x4x2_t %.fca.0.0.insert, <4 x i32> %vzip1.i, 0, 1 + ret %struct.uint32x4x2_t %.fca.0.1.insert +} + +define %struct.float32x4x2_t @test_vzipq_f32(<4 x float> %a, <4 x float> %b) { +; CHECK: test_vzipq_f32: +; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %vzip.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + %vzip1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vzip1.i, 0, 1 + ret %struct.float32x4x2_t %.fca.0.1.insert +} + +define %struct.poly8x16x2_t @test_vzipq_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vzipq_p8: +; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vzip1.i, 0, 1 + ret %struct.poly8x16x2_t %.fca.0.1.insert +} + +define %struct.poly16x8x2_t @test_vzipq_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vzipq_p16: +; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.poly16x8x2_t undef, <8 x i16> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly16x8x2_t %.fca.0.0.insert, <8 x i16> %vzip1.i, 0, 1 + ret %struct.poly16x8x2_t %.fca.0.1.insert +} + +define %struct.int8x8x2_t @test_vtrn_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtrn_s8: +; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vtrn1.i, 0, 1 + ret %struct.int8x8x2_t %.fca.0.1.insert +} + +define %struct.int16x4x2_t @test_vtrn_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vtrn_s16: +; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vtrn1.i, 0, 1 + ret %struct.int16x4x2_t %.fca.0.1.insert +} + +define %struct.int32x2x2_t @test_vtrn_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vtrn_s32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vtrn1.i, 0, 1 + ret %struct.int32x2x2_t %.fca.0.1.insert +} + +define %struct.uint8x8x2_t @test_vtrn_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtrn_u8: +; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vtrn1.i, 0, 1 + ret %struct.uint8x8x2_t %.fca.0.1.insert +} + +define %struct.uint16x4x2_t @test_vtrn_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vtrn_u16: +; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vtrn1.i, 0, 1 + ret %struct.uint16x4x2_t %.fca.0.1.insert +} + +define %struct.uint32x2x2_t @test_vtrn_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vtrn_u32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %.fca.0.0.insert = insertvalue %struct.uint32x2x2_t undef, <2 x i32> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint32x2x2_t %.fca.0.0.insert, <2 x i32> %vtrn1.i, 0, 1 + ret %struct.uint32x2x2_t %.fca.0.1.insert +} + +define %struct.float32x2x2_t @test_vtrn_f32(<2 x float> %a, <2 x float> %b) { +; CHECK: test_vtrn_f32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %vtrn.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + %vtrn1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vtrn1.i, 0, 1 + ret %struct.float32x2x2_t %.fca.0.1.insert +} + +define %struct.poly8x8x2_t @test_vtrn_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtrn_p8: +; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.poly8x8x2_t undef, <8 x i8> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly8x8x2_t %.fca.0.0.insert, <8 x i8> %vtrn1.i, 0, 1 + ret %struct.poly8x8x2_t %.fca.0.1.insert +} + +define %struct.poly16x4x2_t @test_vtrn_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vtrn_p16: +; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.poly16x4x2_t undef, <4 x i16> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly16x4x2_t %.fca.0.0.insert, <4 x i16> %vtrn1.i, 0, 1 + ret %struct.poly16x4x2_t %.fca.0.1.insert +} + +define %struct.int8x16x2_t @test_vtrnq_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vtrnq_s8: +; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vtrn1.i, 0, 1 + ret %struct.int8x16x2_t %.fca.0.1.insert +} + +define %struct.int16x8x2_t @test_vtrnq_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vtrnq_s16: +; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vtrn1.i, 0, 1 + ret %struct.int16x8x2_t %.fca.0.1.insert +} + +define %struct.int32x4x2_t @test_vtrnq_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vtrnq_s32: +; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %vtrn.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %vtrn1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vtrn1.i, 0, 1 + ret %struct.int32x4x2_t %.fca.0.1.insert +} + +define %struct.uint8x16x2_t @test_vtrnq_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vtrnq_u8: +; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vtrn1.i, 0, 1 + ret %struct.uint8x16x2_t %.fca.0.1.insert +} + +define %struct.uint16x8x2_t @test_vtrnq_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vtrnq_u16: +; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vtrn1.i, 0, 1 + ret %struct.uint16x8x2_t %.fca.0.1.insert +} + +define %struct.uint32x4x2_t @test_vtrnq_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vtrnq_u32: +; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %vtrn.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %vtrn1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.uint32x4x2_t undef, <4 x i32> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint32x4x2_t %.fca.0.0.insert, <4 x i32> %vtrn1.i, 0, 1 + ret %struct.uint32x4x2_t %.fca.0.1.insert +} + +define %struct.float32x4x2_t @test_vtrnq_f32(<4 x float> %a, <4 x float> %b) { +; CHECK: test_vtrnq_f32: +; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %vtrn.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + %vtrn1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vtrn1.i, 0, 1 + ret %struct.float32x4x2_t %.fca.0.1.insert +} + +define %struct.poly8x16x2_t @test_vtrnq_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vtrnq_p8: +; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vtrn1.i, 0, 1 + ret %struct.poly8x16x2_t %.fca.0.1.insert +} + +define %struct.poly16x8x2_t @test_vtrnq_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vtrnq_p16: +; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.poly16x8x2_t undef, <8 x i16> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly16x8x2_t %.fca.0.0.insert, <8 x i16> %vtrn1.i, 0, 1 + ret %struct.poly16x8x2_t %.fca.0.1.insert +} diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s index b549480..d9afcb1 100644 --- a/test/MC/AArch64/neon-diagnostics.s +++ b/test/MC/AArch64/neon-diagnostics.s @@ -5235,3 +5235,419 @@ // CHECK-ERROR: ext v0.2d, v1.2d, v2.2d, #0x0 // CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Permutation with 3 vectors +//---------------------------------------------------------------------- + + uzp1 v0.16b, v1.8b, v2.8b + uzp1 v0.8b, v1.4b, v2.4b + uzp1 v0.8h, v1.4h, v2.4h + uzp1 v0.4h, v1.2h, v2.2h + uzp1 v0.4s, v1.2s, v2.2s + uzp1 v0.2s, v1.1s, v2.1s + uzp1 v0.2d, v1.1d, v2.1d + uzp1 v0.1d, v1.1d, v2.1d + +// CHECK-ERROR :4289:22: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.16b, v1.8b, v2.8b +// CHECK-ERROR ^ +// CHECK-ERROR :4290:21: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.8b, v1.4b, v2.4b +// CHECK-ERROR ^ +// CHECK-ERROR :4291:21: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.8h, v1.4h, v2.4h +// CHECK-ERROR ^ +// CHECK-ERROR :4292:21: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.4h, v1.2h, v2.2h +// CHECK-ERROR ^ +// CHECK-ERROR :4293:21: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.4s, v1.2s, v2.2s +// CHECK-ERROR ^ +// CHECK-ERROR :4294:21: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.2s, v1.1s, v2.1s +// CHECK-ERROR ^ +// CHECK-ERROR :4295:21: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.2d, v1.1d, v2.1d +// CHECK-ERROR ^ +// CHECK-ERROR :4296:17: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.1d, v1.1d, v2.1d +// CHECK-ERROR ^ + + uzp2 v0.16b, v1.8b, v2.8b + uzp2 v0.8b, v1.4b, v2.4b + uzp2 v0.8h, v1.4h, v2.4h + uzp2 v0.4h, v1.2h, v2.2h + uzp2 v0.4s, v1.2s, v2.2s + uzp2 v0.2s, v1.1s, v2.1s + uzp2 v0.2d, v1.1d, v2.1d + uzp2 v0.1d, v1.1d, v2.1d + +// CHECK-ERROR :4298:22: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.16b, v1.8b, v2.8b +// CHECK-ERROR ^ +// CHECK-ERROR :4299:21: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.8b, v1.4b, v2.4b +// CHECK-ERROR ^ +// CHECK-ERROR :4300:21: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.8h, v1.4h, v2.4h +// CHECK-ERROR ^ +// CHECK-ERROR :4301:21: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.4h, v1.2h, v2.2h +// CHECK-ERROR ^ +// CHECK-ERROR :4302:21: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.4s, v1.2s, v2.2s +// CHECK-ERROR ^ +// CHECK-ERROR :4303:21: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.2s, v1.1s, v2.1s +// CHECK-ERROR ^ +// CHECK-ERROR :4304:21: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.2d, v1.1d, v2.1d +// CHECK-ERROR ^ +// CHECK-ERROR :4305:17: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.1d, v1.1d, v2.1d +// CHECK-ERROR ^ + + zip1 v0.16b, v1.8b, v2.8b + zip1 v0.8b, v1.4b, v2.4b + zip1 v0.8h, v1.4h, v2.4h + zip1 v0.4h, v1.2h, v2.2h + zip1 v0.4s, v1.2s, v2.2s + zip1 v0.2s, v1.1s, v2.1s + zip1 v0.2d, v1.1d, v2.1d + zip1 v0.1d, v1.1d, v2.1d + +// CHECK-ERROR :4307:22: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.16b, v1.8b, v2.8b +// CHECK-ERROR ^ +// CHECK-ERROR :4308:21: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.8b, v1.4b, v2.4b +// CHECK-ERROR ^ +// CHECK-ERROR :4309:21: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.8h, v1.4h, v2.4h +// CHECK-ERROR ^ +// CHECK-ERROR :4310:21: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.4h, v1.2h, v2.2h +// CHECK-ERROR ^ +// CHECK-ERROR :4311:21: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.4s, v1.2s, v2.2s +// CHECK-ERROR ^ +// CHECK-ERROR :4312:21: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.2s, v1.1s, v2.1s +// CHECK-ERROR ^ +// CHECK-ERROR :4313:21: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.2d, v1.1d, v2.1d +// CHECK-ERROR ^ +// CHECK-ERROR :4314:17: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.1d, v1.1d, v2.1d +// CHECK-ERROR ^ + + zip2 v0.16b, v1.8b, v2.8b + zip2 v0.8b, v1.4b, v2.4b + zip2 v0.8h, v1.4h, v2.4h + zip2 v0.4h, v1.2h, v2.2h + zip2 v0.4s, v1.2s, v2.2s + zip2 v0.2s, v1.1s, v2.1s + zip2 v0.2d, v1.1d, v2.1d + zip2 v0.1d, v1.1d, v2.1d + +// CHECK-ERROR :4316:22: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.16b, v1.8b, v2.8b +// CHECK-ERROR ^ +// CHECK-ERROR :4317:21: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.8b, v1.4b, v2.4b +// CHECK-ERROR ^ +// CHECK-ERROR :4318:21: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.8h, v1.4h, v2.4h +// CHECK-ERROR ^ +// CHECK-ERROR :4319:21: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.4h, v1.2h, v2.2h +// CHECK-ERROR ^ +// CHECK-ERROR :4320:21: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.4s, v1.2s, v2.2s +// CHECK-ERROR ^ +// CHECK-ERROR :4321:21: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.2s, v1.1s, v2.1s +// CHECK-ERROR ^ +// CHECK-ERROR :4322:21: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.2d, v1.1d, v2.1d +// CHECK-ERROR ^ +// CHECK-ERROR :4323:17: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.1d, v1.1d, v2.1d +// CHECK-ERROR ^ + + trn1 v0.16b, v1.8b, v2.8b + trn1 v0.8b, v1.4b, v2.4b + trn1 v0.8h, v1.4h, v2.4h + trn1 v0.4h, v1.2h, v2.2h + trn1 v0.4s, v1.2s, v2.2s + trn1 v0.2s, v1.1s, v2.1s + trn1 v0.2d, v1.1d, v2.1d + trn1 v0.1d, v1.1d, v2.1d + +// CHECK-ERROR :4325:22: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.16b, v1.8b, v2.8b +// CHECK-ERROR ^ +// CHECK-ERROR :4326:21: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.8b, v1.4b, v2.4b +// CHECK-ERROR ^ +// CHECK-ERROR :4327:21: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.8h, v1.4h, v2.4h +// CHECK-ERROR ^ +// CHECK-ERROR :4328:21: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.4h, v1.2h, v2.2h +// CHECK-ERROR ^ +// CHECK-ERROR :4329:21: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.4s, v1.2s, v2.2s +// CHECK-ERROR ^ +// CHECK-ERROR :4330:21: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.2s, v1.1s, v2.1s +// CHECK-ERROR ^ +// CHECK-ERROR :4331:21: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.2d, v1.1d, v2.1d +// CHECK-ERROR ^ +// CHECK-ERROR :4332:17: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.1d, v1.1d, v2.1d +// CHECK-ERROR ^ + + trn2 v0.16b, v1.8b, v2.8b + trn2 v0.8b, v1.4b, v2.4b + trn2 v0.8h, v1.4h, v2.4h + trn2 v0.4h, v1.2h, v2.2h + trn2 v0.4s, v1.2s, v2.2s + trn2 v0.2s, v1.1s, v2.1s + trn2 v0.2d, v1.1d, v2.1d + trn2 v0.1d, v1.1d, v2.1d + +// CHECK-ERROR :4334:22: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.16b, v1.8b, v2.8b +// CHECK-ERROR ^ +// CHECK-ERROR :4335:21: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.8b, v1.4b, v2.4b +// CHECK-ERROR ^ +// CHECK-ERROR :4336:21: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.8h, v1.4h, v2.4h +// CHECK-ERROR ^ +// CHECK-ERROR :4337:21: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.4h, v1.2h, v2.2h +// CHECK-ERROR ^ +// CHECK-ERROR :4338:21: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.4s, v1.2s, v2.2s +// CHECK-ERROR ^ +// CHECK-ERROR :4339:21: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.2s, v1.1s, v2.1s +// CHECK-ERROR ^ +// CHECK-ERROR :4340:21: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.2d, v1.1d, v2.1d +// CHECK-ERROR ^ +// CHECK-ERROR :4341:17: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.1d, v1.1d, v2.1d +// CHECK-ERROR ^ + +//---------------------------------------------------------------------- +// Permutation with 3 vectors +//---------------------------------------------------------------------- + + uzp1 v0.16b, v1.8b, v2.8b + uzp1 v0.8b, v1.4b, v2.4b + uzp1 v0.8h, v1.4h, v2.4h + uzp1 v0.4h, v1.2h, v2.2h + uzp1 v0.4s, v1.2s, v2.2s + uzp1 v0.2s, v1.1s, v2.1s + uzp1 v0.2d, v1.1d, v2.1d + uzp1 v0.1d, v1.1d, v2.1d + +// CHECK-ERROR :4289:22: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.16b, v1.8b, v2.8b +// CHECK-ERROR ^ +// CHECK-ERROR :4290:21: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.8b, v1.4b, v2.4b +// CHECK-ERROR ^ +// CHECK-ERROR :4291:21: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.8h, v1.4h, v2.4h +// CHECK-ERROR ^ +// CHECK-ERROR :4292:21: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.4h, v1.2h, v2.2h +// CHECK-ERROR ^ +// CHECK-ERROR :4293:21: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.4s, v1.2s, v2.2s +// CHECK-ERROR ^ +// CHECK-ERROR :4294:21: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.2s, v1.1s, v2.1s +// CHECK-ERROR ^ +// CHECK-ERROR :4295:21: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.2d, v1.1d, v2.1d +// CHECK-ERROR ^ +// CHECK-ERROR :4296:17: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.1d, v1.1d, v2.1d +// CHECK-ERROR ^ + + uzp2 v0.16b, v1.8b, v2.8b + uzp2 v0.8b, v1.4b, v2.4b + uzp2 v0.8h, v1.4h, v2.4h + uzp2 v0.4h, v1.2h, v2.2h + uzp2 v0.4s, v1.2s, v2.2s + uzp2 v0.2s, v1.1s, v2.1s + uzp2 v0.2d, v1.1d, v2.1d + uzp2 v0.1d, v1.1d, v2.1d + +// CHECK-ERROR :4298:22: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.16b, v1.8b, v2.8b +// CHECK-ERROR ^ +// CHECK-ERROR :4299:21: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.8b, v1.4b, v2.4b +// CHECK-ERROR ^ +// CHECK-ERROR :4300:21: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.8h, v1.4h, v2.4h +// CHECK-ERROR ^ +// CHECK-ERROR :4301:21: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.4h, v1.2h, v2.2h +// CHECK-ERROR ^ +// CHECK-ERROR :4302:21: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.4s, v1.2s, v2.2s +// CHECK-ERROR ^ +// CHECK-ERROR :4303:21: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.2s, v1.1s, v2.1s +// CHECK-ERROR ^ +// CHECK-ERROR :4304:21: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.2d, v1.1d, v2.1d +// CHECK-ERROR ^ +// CHECK-ERROR :4305:17: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.1d, v1.1d, v2.1d +// CHECK-ERROR ^ + + zip1 v0.16b, v1.8b, v2.8b + zip1 v0.8b, v1.4b, v2.4b + zip1 v0.8h, v1.4h, v2.4h + zip1 v0.4h, v1.2h, v2.2h + zip1 v0.4s, v1.2s, v2.2s + zip1 v0.2s, v1.1s, v2.1s + zip1 v0.2d, v1.1d, v2.1d + zip1 v0.1d, v1.1d, v2.1d + +// CHECK-ERROR :4307:22: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.16b, v1.8b, v2.8b +// CHECK-ERROR ^ +// CHECK-ERROR :4308:21: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.8b, v1.4b, v2.4b +// CHECK-ERROR ^ +// CHECK-ERROR :4309:21: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.8h, v1.4h, v2.4h +// CHECK-ERROR ^ +// CHECK-ERROR :4310:21: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.4h, v1.2h, v2.2h +// CHECK-ERROR ^ +// CHECK-ERROR :4311:21: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.4s, v1.2s, v2.2s +// CHECK-ERROR ^ +// CHECK-ERROR :4312:21: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.2s, v1.1s, v2.1s +// CHECK-ERROR ^ +// CHECK-ERROR :4313:21: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.2d, v1.1d, v2.1d +// CHECK-ERROR ^ +// CHECK-ERROR :4314:17: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.1d, v1.1d, v2.1d +// CHECK-ERROR ^ + + zip2 v0.16b, v1.8b, v2.8b + zip2 v0.8b, v1.4b, v2.4b + zip2 v0.8h, v1.4h, v2.4h + zip2 v0.4h, v1.2h, v2.2h + zip2 v0.4s, v1.2s, v2.2s + zip2 v0.2s, v1.1s, v2.1s + zip2 v0.2d, v1.1d, v2.1d + zip2 v0.1d, v1.1d, v2.1d + +// CHECK-ERROR :4316:22: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.16b, v1.8b, v2.8b +// CHECK-ERROR ^ +// CHECK-ERROR :4317:21: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.8b, v1.4b, v2.4b +// CHECK-ERROR ^ +// CHECK-ERROR :4318:21: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.8h, v1.4h, v2.4h +// CHECK-ERROR ^ +// CHECK-ERROR :4319:21: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.4h, v1.2h, v2.2h +// CHECK-ERROR ^ +// CHECK-ERROR :4320:21: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.4s, v1.2s, v2.2s +// CHECK-ERROR ^ +// CHECK-ERROR :4321:21: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.2s, v1.1s, v2.1s +// CHECK-ERROR ^ +// CHECK-ERROR :4322:21: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.2d, v1.1d, v2.1d +// CHECK-ERROR ^ +// CHECK-ERROR :4323:17: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.1d, v1.1d, v2.1d +// CHECK-ERROR ^ + + trn1 v0.16b, v1.8b, v2.8b + trn1 v0.8b, v1.4b, v2.4b + trn1 v0.8h, v1.4h, v2.4h + trn1 v0.4h, v1.2h, v2.2h + trn1 v0.4s, v1.2s, v2.2s + trn1 v0.2s, v1.1s, v2.1s + trn1 v0.2d, v1.1d, v2.1d + trn1 v0.1d, v1.1d, v2.1d + +// CHECK-ERROR :4325:22: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.16b, v1.8b, v2.8b +// CHECK-ERROR ^ +// CHECK-ERROR :4326:21: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.8b, v1.4b, v2.4b +// CHECK-ERROR ^ +// CHECK-ERROR :4327:21: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.8h, v1.4h, v2.4h +// CHECK-ERROR ^ +// CHECK-ERROR :4328:21: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.4h, v1.2h, v2.2h +// CHECK-ERROR ^ +// CHECK-ERROR :4329:21: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.4s, v1.2s, v2.2s +// CHECK-ERROR ^ +// CHECK-ERROR :4330:21: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.2s, v1.1s, v2.1s +// CHECK-ERROR ^ +// CHECK-ERROR :4331:21: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.2d, v1.1d, v2.1d +// CHECK-ERROR ^ +// CHECK-ERROR :4332:17: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.1d, v1.1d, v2.1d +// CHECK-ERROR ^ + + trn2 v0.16b, v1.8b, v2.8b + trn2 v0.8b, v1.4b, v2.4b + trn2 v0.8h, v1.4h, v2.4h + trn2 v0.4h, v1.2h, v2.2h + trn2 v0.4s, v1.2s, v2.2s + trn2 v0.2s, v1.1s, v2.1s + trn2 v0.2d, v1.1d, v2.1d + trn2 v0.1d, v1.1d, v2.1d + +// CHECK-ERROR :4334:22: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.16b, v1.8b, v2.8b +// CHECK-ERROR ^ +// CHECK-ERROR :4335:21: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.8b, v1.4b, v2.4b +// CHECK-ERROR ^ +// CHECK-ERROR :4336:21: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.8h, v1.4h, v2.4h +// CHECK-ERROR ^ +// CHECK-ERROR :4337:21: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.4h, v1.2h, v2.2h +// CHECK-ERROR ^ +// CHECK-ERROR :4338:21: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.4s, v1.2s, v2.2s +// CHECK-ERROR ^ +// CHECK-ERROR :4339:21: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.2s, v1.1s, v2.1s +// CHECK-ERROR ^ +// CHECK-ERROR :4340:21: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.2d, v1.1d, v2.1d +// CHECK-ERROR ^ +// CHECK-ERROR :4341:17: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.1d, v1.1d, v2.1d +// CHECK-ERROR ^ diff --git a/test/MC/AArch64/neon-perm.s b/test/MC/AArch64/neon-perm.s new file mode 100644 index 0000000..20a4acde --- /dev/null +++ b/test/MC/AArch64/neon-perm.s @@ -0,0 +1,103 @@ +// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s + +// Check that the assembler can handle the documented syntax for AArch64 + +//------------------------------------------------------------------------------ +// Instructions for permute +//------------------------------------------------------------------------------ + + uzp1 v0.8b, v1.8b, v2.8b + uzp1 v0.16b, v1.16b, v2.16b + uzp1 v0.4h, v1.4h, v2.4h + uzp1 v0.8h, v1.8h, v2.8h + uzp1 v0.2s, v1.2s, v2.2s + uzp1 v0.4s, v1.4s, v2.4s + uzp1 v0.2d, v1.2d, v2.2d + +// CHECK: uzp1 v0.8b, v1.8b, v2.8b // encoding: [0x20,0x18,0x02,0x0e] +// CHECK: uzp1 v0.16b, v1.16b, v2.16b // encoding: [0x20,0x18,0x02,0x4e] +// CHECK: uzp1 v0.4h, v1.4h, v2.4h // encoding: [0x20,0x18,0x42,0x0e] +// CHECK: uzp1 v0.8h, v1.8h, v2.8h // encoding: [0x20,0x18,0x42,0x4e] +// CHECK: uzp1 v0.2s, v1.2s, v2.2s // encoding: [0x20,0x18,0x82,0x0e] +// CHECK: uzp1 v0.4s, v1.4s, v2.4s // encoding: [0x20,0x18,0x82,0x4e] +// CHECK: uzp1 v0.2d, v1.2d, v2.2d // encoding: [0x20,0x18,0xc2,0x4e] + + trn1 v0.8b, v1.8b, v2.8b + trn1 v0.16b, v1.16b, v2.16b + trn1 v0.4h, v1.4h, v2.4h + trn1 v0.8h, v1.8h, v2.8h + trn1 v0.2s, v1.2s, v2.2s + trn1 v0.4s, v1.4s, v2.4s + trn1 v0.2d, v1.2d, v2.2d + +// CHECK: trn1 v0.8b, v1.8b, v2.8b // encoding: [0x20,0x28,0x02,0x0e] +// CHECK: trn1 v0.16b, v1.16b, v2.16b // encoding: [0x20,0x28,0x02,0x4e] +// CHECK: trn1 v0.4h, v1.4h, v2.4h // encoding: [0x20,0x28,0x42,0x0e] +// CHECK: trn1 v0.8h, v1.8h, v2.8h // encoding: [0x20,0x28,0x42,0x4e] +// CHECK: trn1 v0.2s, v1.2s, v2.2s // encoding: [0x20,0x28,0x82,0x0e] +// CHECK: trn1 v0.4s, v1.4s, v2.4s // encoding: [0x20,0x28,0x82,0x4e] +// CHECK: trn1 v0.2d, v1.2d, v2.2d // encoding: [0x20,0x28,0xc2,0x4e] + + zip1 v0.8b, v1.8b, v2.8b + zip1 v0.16b, v1.16b, v2.16b + zip1 v0.4h, v1.4h, v2.4h + zip1 v0.8h, v1.8h, v2.8h + zip1 v0.2s, v1.2s, v2.2s + zip1 v0.4s, v1.4s, v2.4s + zip1 v0.2d, v1.2d, v2.2d + +// CHECK: zip1 v0.8b, v1.8b, v2.8b // encoding: [0x20,0x38,0x02,0x0e] +// CHECK: zip1 v0.16b, v1.16b, v2.16b // encoding: [0x20,0x38,0x02,0x4e] +// CHECK: zip1 v0.4h, v1.4h, v2.4h // encoding: [0x20,0x38,0x42,0x0e] +// CHECK: zip1 v0.8h, v1.8h, v2.8h // encoding: [0x20,0x38,0x42,0x4e] +// CHECK: zip1 v0.2s, v1.2s, v2.2s // encoding: [0x20,0x38,0x82,0x0e] +// CHECK: zip1 v0.4s, v1.4s, v2.4s // encoding: [0x20,0x38,0x82,0x4e] +// CHECK: zip1 v0.2d, v1.2d, v2.2d // encoding: [0x20,0x38,0xc2,0x4e] + + uzp2 v0.8b, v1.8b, v2.8b + uzp2 v0.16b, v1.16b, v2.16b + uzp2 v0.4h, v1.4h, v2.4h + uzp2 v0.8h, v1.8h, v2.8h + uzp2 v0.2s, v1.2s, v2.2s + uzp2 v0.4s, v1.4s, v2.4s + uzp2 v0.2d, v1.2d, v2.2d + +// CHECK: uzp2 v0.8b, v1.8b, v2.8b // encoding: [0x20,0x58,0x02,0x0e] +// CHECK: uzp2 v0.16b, v1.16b, v2.16b // encoding: [0x20,0x58,0x02,0x4e] +// CHECK: uzp2 v0.4h, v1.4h, v2.4h // encoding: [0x20,0x58,0x42,0x0e] +// CHECK: uzp2 v0.8h, v1.8h, v2.8h // encoding: [0x20,0x58,0x42,0x4e] +// CHECK: uzp2 v0.2s, v1.2s, v2.2s // encoding: [0x20,0x58,0x82,0x0e] +// CHECK: uzp2 v0.4s, v1.4s, v2.4s // encoding: [0x20,0x58,0x82,0x4e] +// CHECK: uzp2 v0.2d, v1.2d, v2.2d // encoding: [0x20,0x58,0xc2,0x4e] + + trn2 v0.8b, v1.8b, v2.8b + trn2 v0.16b, v1.16b, v2.16b + trn2 v0.4h, v1.4h, v2.4h + trn2 v0.8h, v1.8h, v2.8h + trn2 v0.2s, v1.2s, v2.2s + trn2 v0.4s, v1.4s, v2.4s + trn2 v0.2d, v1.2d, v2.2d + +// CHECK: trn2 v0.8b, v1.8b, v2.8b // encoding: [0x20,0x68,0x02,0x0e] +// CHECK: trn2 v0.16b, v1.16b, v2.16b // encoding: [0x20,0x68,0x02,0x4e] +// CHECK: trn2 v0.4h, v1.4h, v2.4h // encoding: [0x20,0x68,0x42,0x0e] +// CHECK: trn2 v0.8h, v1.8h, v2.8h // encoding: [0x20,0x68,0x42,0x4e] +// CHECK: trn2 v0.2s, v1.2s, v2.2s // encoding: [0x20,0x68,0x82,0x0e] +// CHECK: trn2 v0.4s, v1.4s, v2.4s // encoding: [0x20,0x68,0x82,0x4e] +// CHECK: trn2 v0.2d, v1.2d, v2.2d // encoding: [0x20,0x68,0xc2,0x4e] + + zip2 v0.8b, v1.8b, v2.8b + zip2 v0.16b, v1.16b, v2.16b + zip2 v0.4h, v1.4h, v2.4h + zip2 v0.8h, v1.8h, v2.8h + zip2 v0.2s, v1.2s, v2.2s + zip2 v0.4s, v1.4s, v2.4s + zip2 v0.2d, v1.2d, v2.2d + +// CHECK: zip2 v0.8b, v1.8b, v2.8b // encoding: [0x20,0x78,0x02,0x0e] +// CHECK: zip2 v0.16b, v1.16b, v2.16b // encoding: [0x20,0x78,0x02,0x4e] +// CHECK: zip2 v0.4h, v1.4h, v2.4h // encoding: [0x20,0x78,0x42,0x0e] +// CHECK: zip2 v0.8h, v1.8h, v2.8h // encoding: [0x20,0x78,0x42,0x4e] +// CHECK: zip2 v0.2s, v1.2s, v2.2s // encoding: [0x20,0x78,0x82,0x0e] +// CHECK: zip2 v0.4s, v1.4s, v2.4s // encoding: [0x20,0x78,0x82,0x4e] +// CHECK: zip2 v0.2d, v1.2d, v2.2d // encoding: [0x20,0x78,0xc2,0x4e] diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt index 225bb16..e4aad48 100644 --- a/test/MC/Disassembler/AArch64/neon-instructions.txt +++ b/test/MC/Disassembler/AArch64/neon-instructions.txt @@ -2051,3 +2051,110 @@ G# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -disassemble < %s | # CHECK: ext v0.8b, v1.8b, v2.8b, #0x3 # CHECK: ext v0.16b, v1.16b, v2.16b, #0x3 +#---------------------------------------------------------------------- +# unzip with 3 same vectors to get primary result +#---------------------------------------------------------------------- +# CHECK: uzp1 v1.8b, v1.8b, v2.8b +# CHECK: uzp1 v2.16b, v1.16b, v2.16b +# CHECK: uzp1 v3.4h, v1.4h, v2.4h +# CHECK: uzp1 v4.8h, v1.8h, v2.8h +# CHECK: uzp1 v5.2s, v1.2s, v2.2s +# CHECK: uzp1 v6.4s, v1.4s, v2.4s +# CHECK: uzp1 v7.2d, v1.2d, v2.2d +0x21,0x18,0x02,0x0e +0x22,0x18,0x02,0x4e +0x23,0x18,0x42,0x0e +0x24,0x18,0x42,0x4e +0x25,0x18,0x82,0x0e +0x26,0x18,0x82,0x4e +0x27,0x18,0xc2,0x4e + +#---------------------------------------------------------------------- +# transpose with 3 same vectors to get primary result +#---------------------------------------------------------------------- +# CHECK: trn1 v8.8b, v1.8b, v2.8b +# CHECK: trn1 v9.16b, v1.16b, v2.16b +# CHECK: trn1 v10.4h, v1.4h, v2.4h +# CHECK: trn1 v27.8h, v7.8h, v2.8h +# CHECK: trn1 v12.2s, v7.2s, v2.2s +# CHECK: trn1 v29.4s, v6.4s, v2.4s +# CHECK: trn1 v14.2d, v6.2d, v2.2d +0x28,0x28,0x02,0x0e +0x29,0x28,0x02,0x4e +0x2a,0x28,0x42,0x0e +0xfb,0x28,0x42,0x4e +0xec,0x28,0x82,0x0e +0xdd,0x28,0x82,0x4e +0xce,0x28,0xc2,0x4e + +#---------------------------------------------------------------------- +# zip with 3 same vectors to get primary result +#---------------------------------------------------------------------- +# CHECK: zip1 v31.8b, v5.8b, v2.8b +# CHECK: zip1 v0.16b, v5.16b, v2.16b +# CHECK: zip1 v17.4h, v4.4h, v2.4h +# CHECK: zip1 v2.8h, v4.8h, v2.8h +# CHECK: zip1 v19.2s, v3.2s, v2.2s +# CHECK: zip1 v4.4s, v3.4s, v2.4s +# CHECK: zip1 v21.2d, v2.2d, v2.2d +0xbf,0x38,0x02,0x0e +0xa0,0x38,0x02,0x4e +0x91,0x38,0x42,0x0e +0x82,0x38,0x42,0x4e +0x73,0x38,0x82,0x0e +0x64,0x38,0x82,0x4e +0x55,0x38,0xc2,0x4e + +#---------------------------------------------------------------------- +# unzip with 3 same vectors to get secondary result +#---------------------------------------------------------------------- +# CHECK: uzp2 v6.8b, v2.8b, v2.8b +# CHECK: uzp2 v23.16b, v1.16b, v2.16b +# CHECK: uzp2 v8.4h, v1.4h, v2.4h +# CHECK: uzp2 v25.8h, v0.8h, v2.8h +# CHECK: uzp2 v10.2s, v0.2s, v2.2s +# CHECK: uzp2 v27.4s, v7.4s, v2.4s +# CHECK: uzp2 v12.2d, v7.2d, v2.2d +0x46,0x58,0x02,0x0e +0x37,0x58,0x02,0x4e +0x28,0x58,0x42,0x0e +0x19,0x58,0x42,0x4e +0x0a,0x58,0x82,0x0e +0xfb,0x58,0x82,0x4e +0xec,0x58,0xc2,0x4e + +#---------------------------------------------------------------------- +# transpose with 3 same vectors to get secondary result +#---------------------------------------------------------------------- +# CHECK: trn2 v29.8b, v6.8b, v2.8b +# CHECK: trn2 v14.16b, v6.16b, v2.16b +# CHECK: trn2 v31.4h, v5.4h, v2.4h +# CHECK: trn2 v0.8h, v5.8h, v2.8h +# CHECK: trn2 v17.2s, v4.2s, v2.2s +# CHECK: trn2 v2.4s, v4.4s, v2.4s +# CHECK: trn2 v19.2d, v3.2d, v2.2d +0xdd,0x68,0x02,0x0e +0xce,0x68,0x02,0x4e +0xbf,0x68,0x42,0x0e +0xa0,0x68,0x42,0x4e +0x91,0x68,0x82,0x0e +0x82,0x68,0x82,0x4e +0x73,0x68,0xc2,0x4e + +#---------------------------------------------------------------------- +# zip with 3 same vectors to get secondary result +#---------------------------------------------------------------------- +# CHECK: zip2 v4.8b, v3.8b, v2.8b +# CHECK: zip2 v21.16b, v2.16b, v2.16b +# CHECK: zip2 v6.4h, v2.4h, v2.4h +# CHECK: zip2 v23.8h, v1.8h, v2.8h +# CHECK: zip2 v8.2s, v1.2s, v2.2s +# CHECK: zip2 v25.4s, v0.4s, v2.4s +# CHECK: zip2 v10.2d, v0.2d, v2.2d +0x64,0x78,0x02,0x0e +0x55,0x78,0x02,0x4e +0x46,0x78,0x42,0x0e +0x37,0x78,0x42,0x4e +0x28,0x78,0x82,0x0e +0x19,0x78,0x82,0x4e +0x0a,0x78,0xc2,0x4e -- cgit v1.1