aboutsummaryrefslogtreecommitdiffstats
path: root/test
diff options
context:
space:
mode:
authorJiangning Liu <jiangning.liu@arm.com>2013-09-09 02:20:27 +0000
committerJiangning Liu <jiangning.liu@arm.com>2013-09-09 02:20:27 +0000
commit959cd8f49bb85c8dfe971eb5a8a648ff41ca8ebd (patch)
tree746a5fdf146ee7373e073b6c7b163147e7a693d4 /test
parent56736c18c14e89a386dae969e33a31ce685a0a1c (diff)
downloadexternal_llvm-959cd8f49bb85c8dfe971eb5a8a648ff41ca8ebd.zip
external_llvm-959cd8f49bb85c8dfe971eb5a8a648ff41ca8ebd.tar.gz
external_llvm-959cd8f49bb85c8dfe971eb5a8a648ff41ca8ebd.tar.bz2
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190288 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r--test/CodeGen/AArch64/neon-3vdiff.ll1806
-rw-r--r--test/MC/AArch64/neon-3vdiff.s411
-rw-r--r--test/MC/AArch64/neon-diagnostics.s739
-rw-r--r--test/MC/Disassembler/AArch64/neon-instructions.txt360
4 files changed, 3316 insertions, 0 deletions
diff --git a/test/CodeGen/AArch64/neon-3vdiff.ll b/test/CodeGen/AArch64/neon-3vdiff.ll
new file mode 100644
index 0000000..171e2b2
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-3vdiff.ll
@@ -0,0 +1,1806 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+declare <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>)
+
+declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>)
+
+declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>)
+
+declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>)
+
+declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>)
+
+declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>)
+
+declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>)
+
+declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>)
+
+declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>)
+
+declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>)
+
+declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>)
+
+declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>)
+
+declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>)
+
+declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>)
+
+declare <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>)
+
+declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>)
+
+declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>)
+
+declare <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vaddl_s8:
+; CHECK: saddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vmovl.i.i = sext <8 x i8> %a to <8 x i16>
+ %vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
+ %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
+ ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vaddl_s16:
+; CHECK: saddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vmovl.i.i = sext <4 x i16> %a to <4 x i32>
+ %vmovl.i2.i = sext <4 x i16> %b to <4 x i32>
+ %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i
+ ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vaddl_s32:
+; CHECK: saddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+ %vmovl.i.i = sext <2 x i32> %a to <2 x i64>
+ %vmovl.i2.i = sext <2 x i32> %b to <2 x i64>
+ %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i
+ ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vaddl_u8:
+; CHECK: uaddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
+ %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
+ %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
+ ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vaddl_u16:
+; CHECK: uaddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vmovl.i.i = zext <4 x i16> %a to <4 x i32>
+ %vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
+ %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i
+ ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vaddl_u32:
+; CHECK: uaddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+ %vmovl.i.i = zext <2 x i32> %a to <2 x i64>
+ %vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
+ %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i
+ ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vaddl_high_s8:
+; CHECK: saddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
+ %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16>
+ %add.i = add <8 x i16> %0, %1
+ ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vaddl_high_s16:
+; CHECK: saddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
+ %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32>
+ %add.i = add <4 x i32> %0, %1
+ ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vaddl_high_s32:
+; CHECK: saddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
+ %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64>
+ %add.i = add <2 x i64> %0, %1
+ ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vaddl_high_u8:
+; CHECK: uaddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
+ %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16>
+ %add.i = add <8 x i16> %0, %1
+ ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vaddl_high_u16:
+; CHECK: uaddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
+ %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32>
+ %add.i = add <4 x i32> %0, %1
+ ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vaddl_high_u32:
+; CHECK: uaddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
+ %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64>
+ %add.i = add <2 x i64> %0, %1
+ ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) {
+; CHECK: test_vaddw_s8:
+; CHECK: saddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
+entry:
+ %vmovl.i.i = sext <8 x i8> %b to <8 x i16>
+ %add.i = add <8 x i16> %vmovl.i.i, %a
+ ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) {
+; CHECK: test_vaddw_s16:
+; CHECK: saddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
+entry:
+ %vmovl.i.i = sext <4 x i16> %b to <4 x i32>
+ %add.i = add <4 x i32> %vmovl.i.i, %a
+ ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) {
+; CHECK: test_vaddw_s32:
+; CHECK: saddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
+entry:
+ %vmovl.i.i = sext <2 x i32> %b to <2 x i64>
+ %add.i = add <2 x i64> %vmovl.i.i, %a
+ ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) {
+; CHECK: test_vaddw_u8:
+; CHECK: uaddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
+entry:
+ %vmovl.i.i = zext <8 x i8> %b to <8 x i16>
+ %add.i = add <8 x i16> %vmovl.i.i, %a
+ ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) {
+; CHECK: test_vaddw_u16:
+; CHECK: uaddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
+entry:
+ %vmovl.i.i = zext <4 x i16> %b to <4 x i32>
+ %add.i = add <4 x i32> %vmovl.i.i, %a
+ ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) {
+; CHECK: test_vaddw_u32:
+; CHECK: uaddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
+entry:
+ %vmovl.i.i = zext <2 x i32> %b to <2 x i64>
+ %add.i = add <2 x i64> %vmovl.i.i, %a
+ ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) {
+; CHECK: test_vaddw_high_s8:
+; CHECK: saddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
+ %add.i = add <8 x i16> %0, %a
+ ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) {
+; CHECK: test_vaddw_high_s16:
+; CHECK: saddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
+ %add.i = add <4 x i32> %0, %a
+ ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) {
+; CHECK: test_vaddw_high_s32:
+; CHECK: saddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
+ %add.i = add <2 x i64> %0, %a
+ ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) {
+; CHECK: test_vaddw_high_u8:
+; CHECK: uaddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
+ %add.i = add <8 x i16> %0, %a
+ ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) {
+; CHECK: test_vaddw_high_u16:
+; CHECK: uaddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
+ %add.i = add <4 x i32> %0, %a
+ ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) {
+; CHECK: test_vaddw_high_u32:
+; CHECK: uaddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
+ %add.i = add <2 x i64> %0, %a
+ ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsubl_s8:
+; CHECK: ssubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vmovl.i.i = sext <8 x i8> %a to <8 x i16>
+ %vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
+ %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i
+ ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsubl_s16:
+; CHECK: ssubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vmovl.i.i = sext <4 x i16> %a to <4 x i32>
+ %vmovl.i2.i = sext <4 x i16> %b to <4 x i32>
+ %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i
+ ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsubl_s32:
+; CHECK: ssubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+ %vmovl.i.i = sext <2 x i32> %a to <2 x i64>
+ %vmovl.i2.i = sext <2 x i32> %b to <2 x i64>
+ %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i
+ ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsubl_u8:
+; CHECK: usubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
+ %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
+ %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i
+ ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsubl_u16:
+; CHECK: usubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vmovl.i.i = zext <4 x i16> %a to <4 x i32>
+ %vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
+ %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i
+ ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsubl_u32:
+; CHECK: usubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+ %vmovl.i.i = zext <2 x i32> %a to <2 x i64>
+ %vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
+ %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i
+ ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsubl_high_s8:
+; CHECK: ssubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
+ %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16>
+ %sub.i = sub <8 x i16> %0, %1
+ ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsubl_high_s16:
+; CHECK: ssubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
+ %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32>
+ %sub.i = sub <4 x i32> %0, %1
+ ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsubl_high_s32:
+; CHECK: ssubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
+ %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64>
+ %sub.i = sub <2 x i64> %0, %1
+ ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsubl_high_u8:
+; CHECK: usubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
+ %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16>
+ %sub.i = sub <8 x i16> %0, %1
+ ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsubl_high_u16:
+; CHECK: usubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
+ %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32>
+ %sub.i = sub <4 x i32> %0, %1
+ ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsubl_high_u32:
+; CHECK: usubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
+ %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64>
+ %sub.i = sub <2 x i64> %0, %1
+ ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) {
+; CHECK: test_vsubw_s8:
+; CHECK: ssubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
+entry:
+ %vmovl.i.i = sext <8 x i8> %b to <8 x i16>
+ %sub.i = sub <8 x i16> %a, %vmovl.i.i
+ ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) {
+; CHECK: test_vsubw_s16:
+; CHECK: ssubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
+entry:
+ %vmovl.i.i = sext <4 x i16> %b to <4 x i32>
+ %sub.i = sub <4 x i32> %a, %vmovl.i.i
+ ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) {
+; CHECK: test_vsubw_s32:
+; CHECK: ssubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
+entry:
+ %vmovl.i.i = sext <2 x i32> %b to <2 x i64>
+ %sub.i = sub <2 x i64> %a, %vmovl.i.i
+ ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) {
+; CHECK: test_vsubw_u8:
+; CHECK: usubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
+entry:
+ %vmovl.i.i = zext <8 x i8> %b to <8 x i16>
+ %sub.i = sub <8 x i16> %a, %vmovl.i.i
+ ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) {
+; CHECK: test_vsubw_u16:
+; CHECK: usubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
+entry:
+ %vmovl.i.i = zext <4 x i16> %b to <4 x i32>
+ %sub.i = sub <4 x i32> %a, %vmovl.i.i
+ ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) {
+; CHECK: test_vsubw_u32:
+; CHECK: usubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
+entry:
+ %vmovl.i.i = zext <2 x i32> %b to <2 x i64>
+ %sub.i = sub <2 x i64> %a, %vmovl.i.i
+ ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) {
+; CHECK: test_vsubw_high_s8:
+; CHECK: ssubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
+ %sub.i = sub <8 x i16> %a, %0
+ ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) {
+; CHECK: test_vsubw_high_s16:
+; CHECK: ssubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
+ %sub.i = sub <4 x i32> %a, %0
+ ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) {
+; CHECK: test_vsubw_high_s32:
+; CHECK: ssubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
+ %sub.i = sub <2 x i64> %a, %0
+ ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) {
+; CHECK: test_vsubw_high_u8:
+; CHECK: usubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
+ %sub.i = sub <8 x i16> %a, %0
+ ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) {
+; CHECK: test_vsubw_high_u16:
+; CHECK: usubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
+ %sub.i = sub <4 x i32> %a, %0
+ ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) {
+; CHECK: test_vsubw_high_u32:
+; CHECK: usubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
+ %sub.i = sub <2 x i64> %a, %0
+ ret <2 x i64> %sub.i
+}
+
+define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vaddhn_s16:
+; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vaddhn.i = add <8 x i16> %a, %b
+ %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8>
+ ret <8 x i8> %vaddhn2.i
+}
+
+define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vaddhn_s32:
+; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vaddhn.i = add <4 x i32> %a, %b
+ %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
+ %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16>
+ ret <4 x i16> %vaddhn2.i
+}
+
+define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vaddhn_s64:
+; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+ %vaddhn.i = add <2 x i64> %a, %b
+ %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
+ %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32>
+ ret <2 x i32> %vaddhn2.i
+}
+
+define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vaddhn_u16:
+; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vaddhn.i = add <8 x i16> %a, %b
+ %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8>
+ ret <8 x i8> %vaddhn2.i
+}
+
+define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vaddhn_u32:
+; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vaddhn.i = add <4 x i32> %a, %b
+ %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
+ %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16>
+ ret <4 x i16> %vaddhn2.i
+}
+
+define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vaddhn_u64:
+; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+ %vaddhn.i = add <2 x i64> %a, %b
+ %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
+ %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32>
+ ret <2 x i32> %vaddhn2.i
+}
+
+define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vaddhn_high_s16:
+; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vaddhn.i.i = add <8 x i16> %a, %b
+ %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8>
+ %0 = bitcast <8 x i8> %r to <1 x i64>
+ %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+ ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vaddhn_high_s32:
+; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vaddhn.i.i = add <4 x i32> %a, %b
+ %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
+ %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16>
+ %0 = bitcast <4 x i16> %r to <1 x i64>
+ %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+ ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vaddhn_high_s64:
+; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+ %vaddhn.i.i = add <2 x i64> %a, %b
+ %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
+ %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32>
+ %0 = bitcast <2 x i32> %r to <1 x i64>
+ %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vaddhn_high_u16:
+; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vaddhn.i.i = add <8 x i16> %a, %b
+ %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8>
+ %0 = bitcast <8 x i8> %r to <1 x i64>
+ %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+ ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vaddhn_high_u32:
+; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vaddhn.i.i = add <4 x i32> %a, %b
+ %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
+ %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16>
+ %0 = bitcast <4 x i16> %r to <1 x i64>
+ %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+ ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vaddhn_high_u64:
+; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+ %vaddhn.i.i = add <2 x i64> %a, %b
+ %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
+ %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32>
+ %0 = bitcast <2 x i32> %r to <1 x i64>
+ %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vraddhn_s16:
+; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vraddhn2.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i8> %vraddhn2.i
+}
+
+define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vraddhn_s32:
+; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vraddhn2.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i16> %vraddhn2.i
+}
+
+define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vraddhn_s64:
+; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+ %vraddhn2.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i32> %vraddhn2.i
+}
+
+define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vraddhn_u16:
+; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vraddhn2.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i8> %vraddhn2.i
+}
+
+define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vraddhn_u32:
+; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vraddhn2.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i16> %vraddhn2.i
+}
+
+define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vraddhn_u64:
+; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+ %vraddhn2.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i32> %vraddhn2.i
+}
+
+define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vraddhn_high_s16:
+; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vraddhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+ %0 = bitcast <8 x i8> %r to <1 x i64>
+ %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+ ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vraddhn_high_s32:
+; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vraddhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+ %0 = bitcast <4 x i16> %r to <1 x i64>
+ %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+ ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vraddhn_high_s64:
+; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+ %vraddhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+ %0 = bitcast <2 x i32> %r to <1 x i64>
+ %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vraddhn_high_u16:
+; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vraddhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+ %0 = bitcast <8 x i8> %r to <1 x i64>
+ %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+ ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vraddhn_high_u32:
+; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vraddhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+ %0 = bitcast <4 x i16> %r to <1 x i64>
+ %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+ ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vraddhn_high_u64:
+; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+ %vraddhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+ %0 = bitcast <2 x i32> %r to <1 x i64>
+ %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsubhn_s16:
+; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vsubhn.i = sub <8 x i16> %a, %b
+ %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8>
+ ret <8 x i8> %vsubhn2.i
+}
+
+define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsubhn_s32:
+; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vsubhn.i = sub <4 x i32> %a, %b
+ %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
+ %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16>
+ ret <4 x i16> %vsubhn2.i
+}
+
+define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsubhn_s64:
+; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+ %vsubhn.i = sub <2 x i64> %a, %b
+ %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
+ %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32>
+ ret <2 x i32> %vsubhn2.i
+}
+
+define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsubhn_u16:
+; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vsubhn.i = sub <8 x i16> %a, %b
+ %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8>
+ ret <8 x i8> %vsubhn2.i
+}
+
+define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsubhn_u32:
+; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vsubhn.i = sub <4 x i32> %a, %b
+ %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
+ %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16>
+ ret <4 x i16> %vsubhn2.i
+}
+
+define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsubhn_u64:
+; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+ %vsubhn.i = sub <2 x i64> %a, %b
+ %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
+ %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32>
+ ret <2 x i32> %vsubhn2.i
+}
+
+define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsubhn_high_s16:
+; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vsubhn.i.i = sub <8 x i16> %a, %b
+ %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8>
+ %0 = bitcast <8 x i8> %r to <1 x i64>
+ %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+ ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsubhn_high_s32:
+; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vsubhn.i.i = sub <4 x i32> %a, %b
+ %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
+ %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16>
+ %0 = bitcast <4 x i16> %r to <1 x i64>
+ %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+ ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsubhn_high_s64:
+; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+ %vsubhn.i.i = sub <2 x i64> %a, %b
+ %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
+ %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32>
+ %0 = bitcast <2 x i32> %r to <1 x i64>
+ %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsubhn_high_u16:
+; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vsubhn.i.i = sub <8 x i16> %a, %b
+ %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8>
+ %0 = bitcast <8 x i8> %r to <1 x i64>
+ %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+ ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsubhn_high_u32:
+; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vsubhn.i.i = sub <4 x i32> %a, %b
+ %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
+ %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16>
+ %0 = bitcast <4 x i16> %r to <1 x i64>
+ %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+ ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsubhn_high_u64:
+; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+ %vsubhn.i.i = sub <2 x i64> %a, %b
+ %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
+ %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32>
+ %0 = bitcast <2 x i32> %r to <1 x i64>
+ %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vrsubhn_s16:
+; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vrsubhn2.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i8> %vrsubhn2.i
+}
+
+define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vrsubhn_s32:
+; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vrsubhn2.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i16> %vrsubhn2.i
+}
+
+define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vrsubhn_s64:
+; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+ %vrsubhn2.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i32> %vrsubhn2.i
+}
+
+define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vrsubhn_u16:
+; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vrsubhn2.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i8> %vrsubhn2.i
+}
+
+define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vrsubhn_u32:
+; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vrsubhn2.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i16> %vrsubhn2.i
+}
+
+define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vrsubhn_u64:
+; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+ %vrsubhn2.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i32> %vrsubhn2.i
+}
+
+define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vrsubhn_high_s16:
+; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vrsubhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+ %0 = bitcast <8 x i8> %r to <1 x i64>
+ %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+ ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vrsubhn_high_s32:
+; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vrsubhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+ %0 = bitcast <4 x i16> %r to <1 x i64>
+ %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+ ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vrsubhn_high_s64:
+; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+ %vrsubhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+ %0 = bitcast <2 x i32> %r to <1 x i64>
+ %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vrsubhn_high_u16:
+; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %vrsubhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+ %0 = bitcast <8 x i8> %r to <1 x i64>
+ %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+ ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vrsubhn_high_u32:
+; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %vrsubhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+ %0 = bitcast <4 x i16> %r to <1 x i64>
+ %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+ ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vrsubhn_high_u64:
+; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+ %vrsubhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+ %0 = bitcast <2 x i32> %r to <1 x i64>
+ %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64>
+ %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+ %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vabdl_s8:
+; CHECK: sabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vabd.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b)
+ %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
+ ret <8 x i16> %vmovl.i.i
+}
+
+define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vabdl_s16:
+; CHECK: sabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vabd2.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b)
+ %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
+ ret <4 x i32> %vmovl.i.i
+}
+
+define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vabdl_s32:
+; CHECK: sabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+ %vabd2.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b)
+ %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
+ ret <2 x i64> %vmovl.i.i
+}
+
+define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vabdl_u8:
+; CHECK: uabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vabd.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b)
+ %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
+ ret <8 x i16> %vmovl.i.i
+}
+
+define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vabdl_u16:
+; CHECK: uabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vabd2.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b)
+ %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
+ ret <4 x i32> %vmovl.i.i
+}
+
+define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vabdl_u32:
+; CHECK: uabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+ %vabd2.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b)
+ %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
+ ret <2 x i64> %vmovl.i.i
+}
+
+define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vabal_s8:
+; CHECK: sabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c)
+ %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
+ %add.i = add <8 x i16> %vmovl.i.i.i, %a
+ ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vabal_s16:
+; CHECK: sabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c)
+ %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
+ %add.i = add <4 x i32> %vmovl.i.i.i, %a
+ ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vabal_s32:
+; CHECK: sabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+ %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c)
+ %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
+ %add.i = add <2 x i64> %vmovl.i.i.i, %a
+ ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vabal_u8:
+; CHECK: uabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c)
+ %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
+ %add.i = add <8 x i16> %vmovl.i.i.i, %a
+ ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vabal_u16:
+; CHECK: uabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c)
+ %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
+ %add.i = add <4 x i32> %vmovl.i.i.i, %a
+ ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vabal_u32:
+; CHECK: uabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+ %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c)
+ %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
+ %add.i = add <2 x i64> %vmovl.i.i.i, %a
+ ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vabdl_high_s8:
+; CHECK: sabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+ %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
+ ret <8 x i16> %vmovl.i.i.i
+}
+
+define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vabdl_high_s16:
+; CHECK: sabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+ %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
+ ret <4 x i32> %vmovl.i.i.i
+}
+
+define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vabdl_high_s32:
+; CHECK: sabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+ %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
+ ret <2 x i64> %vmovl.i.i.i
+}
+
+define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vabdl_high_u8:
+; CHECK: uabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+ %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
+ ret <8 x i16> %vmovl.i.i.i
+}
+
+define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vabdl_high_u16:
+; CHECK: uabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+ %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
+ ret <4 x i32> %vmovl.i.i.i
+}
+
+define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vabdl_high_u32:
+; CHECK: uabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+ %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
+ ret <2 x i64> %vmovl.i.i.i
+}
+
+define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vabal_high_s8:
+; CHECK: sabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %vabd.i.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+ %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16>
+ %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a
+ ret <8 x i16> %add.i.i
+}
+
+define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vabal_high_s16:
+; CHECK: sabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %vabd2.i.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+ %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32>
+ %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a
+ ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vabal_high_s32:
+; CHECK: sabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %vabd2.i.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+ %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64>
+ %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a
+ ret <2 x i64> %add.i.i
+}
+
+define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vabal_high_u8:
+; CHECK: uabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %vabd.i.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+ %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16>
+ %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a
+ ret <8 x i16> %add.i.i
+}
+
+define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vabal_high_u16:
+; CHECK: uabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %vabd2.i.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+ %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32>
+ %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a
+ ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vabal_high_u32:
+; CHECK: uabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %vabd2.i.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+ %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64>
+ %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a
+ ret <2 x i64> %add.i.i
+}
+
+define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vmull_s8:
+; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %a, <8 x i8> %b)
+ ret <8 x i16> %vmull.i
+}
+
+define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vmull_s16:
+; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %b)
+ ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vmull_s32:
+; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+ %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %b)
+ ret <2 x i64> %vmull2.i
+}
+
+define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vmull_u8:
+; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %a, <8 x i8> %b)
+ ret <8 x i16> %vmull.i
+}
+
+define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vmull_u16:
+; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %b)
+ ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vmull_u32:
+; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+ %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %b)
+ ret <2 x i64> %vmull2.i
+}
+
+define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vmull_high_s8:
+; CHECK: smull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+ ret <8 x i16> %vmull.i.i
+}
+
+define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vmull_high_s16:
+; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+ ret <4 x i32> %vmull2.i.i
+}
+
+define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vmull_high_s32:
+; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+ ret <2 x i64> %vmull2.i.i
+}
+
+define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vmull_high_u8:
+; CHECK: umull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+ ret <8 x i16> %vmull.i.i
+}
+
+define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vmull_high_u16:
+; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+ ret <4 x i32> %vmull2.i.i
+}
+
+define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vmull_high_u32:
+; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+ ret <2 x i64> %vmull2.i.i
+}
+
+define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vmlal_s8:
+; CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
+ %add.i = add <8 x i16> %vmull.i.i, %a
+ ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vmlal_s16:
+; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
+ %add.i = add <4 x i32> %vmull2.i.i, %a
+ ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vmlal_s32:
+; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+ %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
+ %add.i = add <2 x i64> %vmull2.i.i, %a
+ ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vmlal_u8:
+; CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
+ %add.i = add <8 x i16> %vmull.i.i, %a
+ ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vmlal_u16:
+; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
+ %add.i = add <4 x i32> %vmull2.i.i, %a
+ ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vmlal_u32:
+; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+ %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
+ %add.i = add <2 x i64> %vmull2.i.i, %a
+ ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vmlal_high_s8:
+; CHECK: smlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+ %add.i.i = add <8 x i16> %vmull.i.i.i, %a
+ ret <8 x i16> %add.i.i
+}
+
+define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vmlal_high_s16:
+; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+ %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
+ ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vmlal_high_s32:
+; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+ %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
+ ret <2 x i64> %add.i.i
+}
+
+define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vmlal_high_u8:
+; CHECK: umlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+ %add.i.i = add <8 x i16> %vmull.i.i.i, %a
+ ret <8 x i16> %add.i.i
+}
+
+define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vmlal_high_u16:
+; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+ %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
+ ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vmlal_high_u32:
+; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+ %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
+ ret <2 x i64> %add.i.i
+}
+
+define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vmlsl_s8:
+; CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
+ %sub.i = sub <8 x i16> %a, %vmull.i.i
+ ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vmlsl_s16:
+; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
+ %sub.i = sub <4 x i32> %a, %vmull2.i.i
+ ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vmlsl_s32:
+; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+ %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
+ %sub.i = sub <2 x i64> %a, %vmull2.i.i
+ ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vmlsl_u8:
+; CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
+ %sub.i = sub <8 x i16> %a, %vmull.i.i
+ ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vmlsl_u16:
+; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
+ %sub.i = sub <4 x i32> %a, %vmull2.i.i
+ ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vmlsl_u32:
+; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+ %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
+ %sub.i = sub <2 x i64> %a, %vmull2.i.i
+ ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vmlsl_high_s8:
+; CHECK: smlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+ %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i
+ ret <8 x i16> %sub.i.i
+}
+
+define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vmlsl_high_s16:
+; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+ %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
+ ret <4 x i32> %sub.i.i
+}
+
+define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vmlsl_high_s32:
+; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+ %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
+ ret <2 x i64> %sub.i.i
+}
+
+define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vmlsl_high_u8:
+; CHECK: umlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+ %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i
+ ret <8 x i16> %sub.i.i
+}
+
+define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vmlsl_high_u16:
+; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+ %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
+ ret <4 x i32> %sub.i.i
+}
+
+define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vmlsl_high_u32:
+; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+ %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
+ ret <2 x i64> %sub.i.i
+}
+
+define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vqdmull_s16:
+; CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
+ ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vqdmull_s32:
+; CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+ %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
+ ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vqdmlal_s16:
+; CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
+ %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+ ret <4 x i32> %vqdmlal4.i
+}
+
+define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vqdmlal_s32:
+; CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+ %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
+ %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+ ret <2 x i64> %vqdmlal4.i
+}
+
+define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vqdmlsl_s16:
+; CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+ %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
+ %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+ ret <4 x i32> %vqdmlsl4.i
+}
+
+define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vqdmlsl_s32:
+; CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+ %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
+ %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+ ret <2 x i64> %vqdmlsl4.i
+}
+
+define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vqdmull_high_s16:
+; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %vqdmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+ ret <4 x i32> %vqdmull2.i.i
+}
+
+define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vqdmull_high_s32:
+; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %vqdmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+ ret <2 x i64> %vqdmull2.i.i
+}
+
+define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vqdmlal_high_s16:
+; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %vqdmlal2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+ %vqdmlal4.i.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i.i)
+ ret <4 x i32> %vqdmlal4.i.i
+}
+
+define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vqdmlal_high_s32:
+; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %vqdmlal2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+ %vqdmlal4.i.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i.i)
+ ret <2 x i64> %vqdmlal4.i.i
+}
+
+define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vqdmlsl_high_s16:
+; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+ %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %vqdmlsl2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+ %vqdmlsl4.i.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i.i)
+ ret <4 x i32> %vqdmlsl4.i.i
+}
+
+define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vqdmlsl_high_s32:
+; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+ %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %vqdmlsl2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+ %vqdmlsl4.i.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i.i)
+ ret <2 x i64> %vqdmlsl4.i.i
+}
+
+define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vmull_p8:
+; CHECK: pmull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+ %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %a, <8 x i8> %b)
+ ret <8 x i16> %vmull.i
+}
+
+define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vmull_high_p8:
+; CHECK: pmull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+ %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+ ret <8 x i16> %vmull.i.i
+}
+
diff --git a/test/MC/AArch64/neon-3vdiff.s b/test/MC/AArch64/neon-3vdiff.s
new file mode 100644
index 0000000..337b94c
--- /dev/null
+++ b/test/MC/AArch64/neon-3vdiff.s
@@ -0,0 +1,411 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Instructions with 3 different vector data types
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+// Long
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+// Long - Variant 1
+//------------------------------------------------------------------------------
+
+ saddl v0.8h, v1.8b, v2.8b
+ saddl v0.4s, v1.4h, v2.4h
+ saddl v0.2d, v1.2s, v2.2s
+
+// CHECK: saddl v0.8h, v1.8b, v2.8b // encoding: [0x20,0x00,0x22,0x0e]
+// CHECK: saddl v0.4s, v1.4h, v2.4h // encoding: [0x20,0x00,0x62,0x0e]
+// CHECK: saddl v0.2d, v1.2s, v2.2s // encoding: [0x20,0x00,0xa2,0x0e]
+
+ saddl2 v0.4s, v1.8h, v2.8h
+ saddl2 v0.8h, v1.16b, v2.16b
+ saddl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: saddl2 v0.4s, v1.8h, v2.8h // encoding: [0x20,0x00,0x62,0x4e]
+// CHECK: saddl2 v0.8h, v1.16b, v2.16b // encoding: [0x20,0x00,0x22,0x4e]
+// CHECK: saddl2 v0.2d, v1.4s, v2.4s // encoding: [0x20,0x00,0xa2,0x4e]
+
+ uaddl v0.8h, v1.8b, v2.8b
+ uaddl v0.4s, v1.4h, v2.4h
+ uaddl v0.2d, v1.2s, v2.2s
+
+// CHECK: uaddl v0.8h, v1.8b, v2.8b // encoding: [0x20,0x00,0x22,0x2e]
+// CHECK: uaddl v0.4s, v1.4h, v2.4h // encoding: [0x20,0x00,0x62,0x2e]
+// CHECK: uaddl v0.2d, v1.2s, v2.2s // encoding: [0x20,0x00,0xa2,0x2e]
+
+ uaddl2 v0.8h, v1.16b, v2.16b
+ uaddl2 v0.4s, v1.8h, v2.8h
+ uaddl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: uaddl2 v0.8h, v1.16b, v2.16b // encoding: [0x20,0x00,0x22,0x6e]
+// CHECK: uaddl2 v0.4s, v1.8h, v2.8h // encoding: [0x20,0x00,0x62,0x6e]
+// CHECK: uaddl2 v0.2d, v1.4s, v2.4s // encoding: [0x20,0x00,0xa2,0x6e]
+
+ ssubl v0.8h, v1.8b, v2.8b
+ ssubl v0.4s, v1.4h, v2.4h
+ ssubl v0.2d, v1.2s, v2.2s
+
+// CHECK: ssubl v0.8h, v1.8b, v2.8b // encoding: [0x20,0x20,0x22,0x0e]
+// CHECK: ssubl v0.4s, v1.4h, v2.4h // encoding: [0x20,0x20,0x62,0x0e]
+// CHECK: ssubl v0.2d, v1.2s, v2.2s // encoding: [0x20,0x20,0xa2,0x0e]
+
+ ssubl2 v0.8h, v1.16b, v2.16b
+ ssubl2 v0.4s, v1.8h, v2.8h
+ ssubl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: ssubl2 v0.8h, v1.16b, v2.16b // encoding: [0x20,0x20,0x22,0x4e]
+// CHECK: ssubl2 v0.4s, v1.8h, v2.8h // encoding: [0x20,0x20,0x62,0x4e]
+// CHECK: ssubl2 v0.2d, v1.4s, v2.4s // encoding: [0x20,0x20,0xa2,0x4e]
+
+ usubl v0.8h, v1.8b, v2.8b
+ usubl v0.4s, v1.4h, v2.4h
+ usubl v0.2d, v1.2s, v2.2s
+
+// CHECK: usubl v0.8h, v1.8b, v2.8b // encoding: [0x20,0x20,0x22,0x2e]
+// CHECK: usubl v0.4s, v1.4h, v2.4h // encoding: [0x20,0x20,0x62,0x2e]
+// CHECK: usubl v0.2d, v1.2s, v2.2s // encoding: [0x20,0x20,0xa2,0x2e]
+
+ usubl2 v0.8h, v1.16b, v2.16b
+ usubl2 v0.4s, v1.8h, v2.8h
+ usubl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: usubl2 v0.8h, v1.16b, v2.16b // encoding: [0x20,0x20,0x22,0x6e]
+// CHECK: usubl2 v0.4s, v1.8h, v2.8h // encoding: [0x20,0x20,0x62,0x6e]
+// CHECK: usubl2 v0.2d, v1.4s, v2.4s // encoding: [0x20,0x20,0xa2,0x6e]
+
+ sabal v0.8h, v1.8b, v2.8b
+ sabal v0.4s, v1.4h, v2.4h
+ sabal v0.2d, v1.2s, v2.2s
+
+// CHECK: sabal v0.8h, v1.8b, v2.8b // encoding: [0x20,0x50,0x22,0x0e]
+// CHECK: sabal v0.4s, v1.4h, v2.4h // encoding: [0x20,0x50,0x62,0x0e]
+// CHECK: sabal v0.2d, v1.2s, v2.2s // encoding: [0x20,0x50,0xa2,0x0e]
+
+ sabal2 v0.8h, v1.16b, v2.16b
+ sabal2 v0.4s, v1.8h, v2.8h
+ sabal2 v0.2d, v1.4s, v2.4s
+
+// CHECK: sabal2 v0.8h, v1.16b, v2.16b // encoding: [0x20,0x50,0x22,0x4e]
+// CHECK: sabal2 v0.4s, v1.8h, v2.8h // encoding: [0x20,0x50,0x62,0x4e]
+// CHECK: sabal2 v0.2d, v1.4s, v2.4s // encoding: [0x20,0x50,0xa2,0x4e]
+
+ uabal v0.8h, v1.8b, v2.8b
+ uabal v0.4s, v1.4h, v2.4h
+ uabal v0.2d, v1.2s, v2.2s
+
+// CHECK: uabal v0.8h, v1.8b, v2.8b // encoding: [0x20,0x50,0x22,0x2e]
+// CHECK: uabal v0.4s, v1.4h, v2.4h // encoding: [0x20,0x50,0x62,0x2e]
+// CHECK: uabal v0.2d, v1.2s, v2.2s // encoding: [0x20,0x50,0xa2,0x2e]
+
+ uabal2 v0.8h, v1.16b, v2.16b
+ uabal2 v0.4s, v1.8h, v2.8h
+ uabal2 v0.2d, v1.4s, v2.4s
+
+// CHECK: uabal2 v0.8h, v1.16b, v2.16b // encoding: [0x20,0x50,0x22,0x6e]
+// CHECK: uabal2 v0.4s, v1.8h, v2.8h // encoding: [0x20,0x50,0x62,0x6e]
+// CHECK: uabal2 v0.2d, v1.4s, v2.4s // encoding: [0x20,0x50,0xa2,0x6e]
+
+ sabdl v0.8h, v1.8b, v2.8b
+ sabdl v0.4s, v1.4h, v2.4h
+ sabdl v0.2d, v1.2s, v2.2s
+
+// CHECK: sabdl v0.8h, v1.8b, v2.8b // encoding: [0x20,0x70,0x22,0x0e]
+// CHECK: sabdl v0.4s, v1.4h, v2.4h // encoding: [0x20,0x70,0x62,0x0e]
+// CHECK: sabdl v0.2d, v1.2s, v2.2s // encoding: [0x20,0x70,0xa2,0x0e]
+
+ sabdl2 v0.8h, v1.16b, v2.16b
+ sabdl2 v0.4s, v1.8h, v2.8h
+ sabdl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: sabdl2 v0.8h, v1.16b, v2.16b // encoding: [0x20,0x70,0x22,0x4e]
+// CHECK: sabdl2 v0.4s, v1.8h, v2.8h // encoding: [0x20,0x70,0x62,0x4e]
+// CHECK: sabdl2 v0.2d, v1.4s, v2.4s // encoding: [0x20,0x70,0xa2,0x4e]
+
+ uabdl v0.8h, v1.8b, v2.8b
+ uabdl v0.4s, v1.4h, v2.4h
+ uabdl v0.2d, v1.2s, v2.2s
+
+// CHECK: uabdl v0.8h, v1.8b, v2.8b // encoding: [0x20,0x70,0x22,0x2e]
+// CHECK: uabdl v0.4s, v1.4h, v2.4h // encoding: [0x20,0x70,0x62,0x2e]
+// CHECK: uabdl v0.2d, v1.2s, v2.2s // encoding: [0x20,0x70,0xa2,0x2e]
+
+ uabdl2 v0.8h, v1.16b, v2.16b
+ uabdl2 v0.4s, v1.8h, v2.8h
+ uabdl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: uabdl2 v0.8h, v1.16b, v2.16b // encoding: [0x20,0x70,0x22,0x6e]
+// CHECK: uabdl2 v0.4s, v1.8h, v2.8h // encoding: [0x20,0x70,0x62,0x6e]
+// CHECK: uabdl2 v0.2d, v1.4s, v2.4s // encoding: [0x20,0x70,0xa2,0x6e]
+
+ smlal v0.8h, v1.8b, v2.8b
+ smlal v0.4s, v1.4h, v2.4h
+ smlal v0.2d, v1.2s, v2.2s
+
+// CHECK: smlal v0.8h, v1.8b, v2.8b // encoding: [0x20,0x80,0x22,0x0e]
+// CHECK: smlal v0.4s, v1.4h, v2.4h // encoding: [0x20,0x80,0x62,0x0e]
+// CHECK: smlal v0.2d, v1.2s, v2.2s // encoding: [0x20,0x80,0xa2,0x0e]
+
+ smlal2 v0.8h, v1.16b, v2.16b
+ smlal2 v0.4s, v1.8h, v2.8h
+ smlal2 v0.2d, v1.4s, v2.4s
+
+// CHECK: smlal2 v0.8h, v1.16b, v2.16b // encoding: [0x20,0x80,0x22,0x4e]
+// CHECK: smlal2 v0.4s, v1.8h, v2.8h // encoding: [0x20,0x80,0x62,0x4e]
+// CHECK: smlal2 v0.2d, v1.4s, v2.4s // encoding: [0x20,0x80,0xa2,0x4e]
+
+ umlal v0.8h, v1.8b, v2.8b
+ umlal v0.4s, v1.4h, v2.4h
+ umlal v0.2d, v1.2s, v2.2s
+
+// CHECK: umlal v0.8h, v1.8b, v2.8b // encoding: [0x20,0x80,0x22,0x2e]
+// CHECK: umlal v0.4s, v1.4h, v2.4h // encoding: [0x20,0x80,0x62,0x2e]
+// CHECK: umlal v0.2d, v1.2s, v2.2s // encoding: [0x20,0x80,0xa2,0x2e]
+
+ umlal2 v0.8h, v1.16b, v2.16b
+ umlal2 v0.4s, v1.8h, v2.8h
+ umlal2 v0.2d, v1.4s, v2.4s
+
+// CHECK: umlal2 v0.8h, v1.16b, v2.16b // encoding: [0x20,0x80,0x22,0x6e]
+// CHECK: umlal2 v0.4s, v1.8h, v2.8h // encoding: [0x20,0x80,0x62,0x6e]
+// CHECK: umlal2 v0.2d, v1.4s, v2.4s // encoding: [0x20,0x80,0xa2,0x6e]
+
+ smlsl v0.8h, v1.8b, v2.8b
+ smlsl v0.4s, v1.4h, v2.4h
+ smlsl v0.2d, v1.2s, v2.2s
+
+// CHECK: smlsl v0.8h, v1.8b, v2.8b // encoding: [0x20,0xa0,0x22,0x0e]
+// CHECK: smlsl v0.4s, v1.4h, v2.4h // encoding: [0x20,0xa0,0x62,0x0e]
+// CHECK: smlsl v0.2d, v1.2s, v2.2s // encoding: [0x20,0xa0,0xa2,0x0e]
+
+ smlsl2 v0.8h, v1.16b, v2.16b
+ smlsl2 v0.4s, v1.8h, v2.8h
+ smlsl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: smlsl2 v0.8h, v1.16b, v2.16b // encoding: [0x20,0xa0,0x22,0x4e]
+// CHECK: smlsl2 v0.4s, v1.8h, v2.8h // encoding: [0x20,0xa0,0x62,0x4e]
+// CHECK: smlsl2 v0.2d, v1.4s, v2.4s // encoding: [0x20,0xa0,0xa2,0x4e]
+
+ umlsl v0.8h, v1.8b, v2.8b
+ umlsl v0.4s, v1.4h, v2.4h
+ umlsl v0.2d, v1.2s, v2.2s
+
+// CHECK: umlsl v0.8h, v1.8b, v2.8b // encoding: [0x20,0xa0,0x22,0x2e]
+// CHECK: umlsl v0.4s, v1.4h, v2.4h // encoding: [0x20,0xa0,0x62,0x2e]
+// CHECK: umlsl v0.2d, v1.2s, v2.2s // encoding: [0x20,0xa0,0xa2,0x2e]
+
+ umlsl2 v0.8h, v1.16b, v2.16b
+ umlsl2 v0.4s, v1.8h, v2.8h
+ umlsl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: umlsl2 v0.8h, v1.16b, v2.16b // encoding: [0x20,0xa0,0x22,0x6e]
+// CHECK: umlsl2 v0.4s, v1.8h, v2.8h // encoding: [0x20,0xa0,0x62,0x6e]
+// CHECK: umlsl2 v0.2d, v1.4s, v2.4s // encoding: [0x20,0xa0,0xa2,0x6e]
+
+ smull v0.8h, v1.8b, v2.8b
+ smull v0.4s, v1.4h, v2.4h
+ smull v0.2d, v1.2s, v2.2s
+
+// CHECK: smull v0.8h, v1.8b, v2.8b // encoding: [0x20,0xc0,0x22,0x0e]
+// CHECK: smull v0.4s, v1.4h, v2.4h // encoding: [0x20,0xc0,0x62,0x0e]
+// CHECK: smull v0.2d, v1.2s, v2.2s // encoding: [0x20,0xc0,0xa2,0x0e]
+
+ smull2 v0.8h, v1.16b, v2.16b
+ smull2 v0.4s, v1.8h, v2.8h
+ smull2 v0.2d, v1.4s, v2.4s
+
+// CHECK: smull2 v0.8h, v1.16b, v2.16b // encoding: [0x20,0xc0,0x22,0x4e]
+// CHECK: smull2 v0.4s, v1.8h, v2.8h // encoding: [0x20,0xc0,0x62,0x4e]
+// CHECK: smull2 v0.2d, v1.4s, v2.4s // encoding: [0x20,0xc0,0xa2,0x4e]
+
+ umull v0.8h, v1.8b, v2.8b
+ umull v0.4s, v1.4h, v2.4h
+ umull v0.2d, v1.2s, v2.2s
+
+// CHECK: umull v0.8h, v1.8b, v2.8b // encoding: [0x20,0xc0,0x22,0x2e]
+// CHECK: umull v0.4s, v1.4h, v2.4h // encoding: [0x20,0xc0,0x62,0x2e]
+// CHECK: umull v0.2d, v1.2s, v2.2s // encoding: [0x20,0xc0,0xa2,0x2e]
+
+ umull2 v0.8h, v1.16b, v2.16b
+ umull2 v0.4s, v1.8h, v2.8h
+ umull2 v0.2d, v1.4s, v2.4s
+
+// CHECK: umull2 v0.8h, v1.16b, v2.16b // encoding: [0x20,0xc0,0x22,0x6e]
+// CHECK: umull2 v0.4s, v1.8h, v2.8h // encoding: [0x20,0xc0,0x62,0x6e]
+// CHECK: umull2 v0.2d, v1.4s, v2.4s // encoding: [0x20,0xc0,0xa2,0x6e]
+
+//------------------------------------------------------------------------------
+// Long - Variant 2
+//------------------------------------------------------------------------------
+
+ sqdmlal v0.4s, v1.4h, v2.4h
+ sqdmlal v0.2d, v1.2s, v2.2s
+
+// CHECK: sqdmlal v0.4s, v1.4h, v2.4h // encoding: [0x20,0x90,0x62,0x0e]
+// CHECK: sqdmlal v0.2d, v1.2s, v2.2s // encoding: [0x20,0x90,0xa2,0x0e]
+
+ sqdmlal2 v0.4s, v1.8h, v2.8h
+ sqdmlal2 v0.2d, v1.4s, v2.4s
+
+// CHECK: sqdmlal2 v0.4s, v1.8h, v2.8h // encoding: [0x20,0x90,0x62,0x4e]
+// CHECK: sqdmlal2 v0.2d, v1.4s, v2.4s // encoding: [0x20,0x90,0xa2,0x4e]
+
+ sqdmlsl v0.4s, v1.4h, v2.4h
+ sqdmlsl v0.2d, v1.2s, v2.2s
+
+// CHECK: sqdmlsl v0.4s, v1.4h, v2.4h // encoding: [0x20,0xb0,0x62,0x0e]
+// CHECK: sqdmlsl v0.2d, v1.2s, v2.2s // encoding: [0x20,0xb0,0xa2,0x0e]
+
+ sqdmlsl2 v0.4s, v1.8h, v2.8h
+ sqdmlsl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: sqdmlsl2 v0.4s, v1.8h, v2.8h // encoding: [0x20,0xb0,0x62,0x4e]
+// CHECK: sqdmlsl2 v0.2d, v1.4s, v2.4s // encoding: [0x20,0xb0,0xa2,0x4e]
+
+ sqdmull v0.4s, v1.4h, v2.4h
+ sqdmull v0.2d, v1.2s, v2.2s
+
+// CHECK: sqdmull v0.4s, v1.4h, v2.4h // encoding: [0x20,0xd0,0x62,0x0e]
+// CHECK: sqdmull v0.2d, v1.2s, v2.2s // encoding: [0x20,0xd0,0xa2,0x0e]
+
+ sqdmull2 v0.4s, v1.8h, v2.8h
+ sqdmull2 v0.2d, v1.4s, v2.4s
+
+// CHECK: sqdmull2 v0.4s, v1.8h, v2.8h // encoding: [0x20,0xd0,0x62,0x4e]
+// CHECK: sqdmull2 v0.2d, v1.4s, v2.4s // encoding: [0x20,0xd0,0xa2,0x4e]
+
+//------------------------------------------------------------------------------
+// Long - Variant 3
+//------------------------------------------------------------------------------
+
+ pmull v0.8h, v1.8b, v2.8b
+
+// CHECK: pmull v0.8h, v1.8b, v2.8b // encoding: [0x20,0xe0,0x22,0x0e]
+
+ pmull2 v0.8h, v1.16b, v2.16b
+
+// CHECK: pmull2 v0.8h, v1.16b, v2.16b // encoding: [0x20,0xe0,0x22,0x4e]
+
+//------------------------------------------------------------------------------
+// Widen
+//------------------------------------------------------------------------------
+
+ saddw v0.8h, v1.8h, v2.8b
+ saddw v0.4s, v1.4s, v2.4h
+ saddw v0.2d, v1.2d, v2.2s
+
+// CHECK: saddw v0.8h, v1.8h, v2.8b // encoding: [0x20,0x10,0x22,0x0e]
+// CHECK: saddw v0.4s, v1.4s, v2.4h // encoding: [0x20,0x10,0x62,0x0e]
+// CHECK: saddw v0.2d, v1.2d, v2.2s // encoding: [0x20,0x10,0xa2,0x0e]
+
+ saddw2 v0.8h, v1.8h, v2.16b
+ saddw2 v0.4s, v1.4s, v2.8h
+ saddw2 v0.2d, v1.2d, v2.4s
+
+// CHECK: saddw2 v0.8h, v1.8h, v2.16b // encoding: [0x20,0x10,0x22,0x4e]
+// CHECK: saddw2 v0.4s, v1.4s, v2.8h // encoding: [0x20,0x10,0x62,0x4e]
+// CHECK: saddw2 v0.2d, v1.2d, v2.4s // encoding: [0x20,0x10,0xa2,0x4e]
+
+ uaddw v0.8h, v1.8h, v2.8b
+ uaddw v0.4s, v1.4s, v2.4h
+ uaddw v0.2d, v1.2d, v2.2s
+
+// CHECK: uaddw v0.8h, v1.8h, v2.8b // encoding: [0x20,0x10,0x22,0x2e]
+// CHECK: uaddw v0.4s, v1.4s, v2.4h // encoding: [0x20,0x10,0x62,0x2e]
+// CHECK: uaddw v0.2d, v1.2d, v2.2s // encoding: [0x20,0x10,0xa2,0x2e]
+
+ uaddw2 v0.8h, v1.8h, v2.16b
+ uaddw2 v0.4s, v1.4s, v2.8h
+ uaddw2 v0.2d, v1.2d, v2.4s
+
+// CHECK: uaddw2 v0.8h, v1.8h, v2.16b // encoding: [0x20,0x10,0x22,0x6e]
+// CHECK: uaddw2 v0.4s, v1.4s, v2.8h // encoding: [0x20,0x10,0x62,0x6e]
+// CHECK: uaddw2 v0.2d, v1.2d, v2.4s // encoding: [0x20,0x10,0xa2,0x6e]
+
+ ssubw v0.8h, v1.8h, v2.8b
+ ssubw v0.4s, v1.4s, v2.4h
+ ssubw v0.2d, v1.2d, v2.2s
+
+// CHECK: ssubw v0.8h, v1.8h, v2.8b // encoding: [0x20,0x30,0x22,0x0e]
+// CHECK: ssubw v0.4s, v1.4s, v2.4h // encoding: [0x20,0x30,0x62,0x0e]
+// CHECK: ssubw v0.2d, v1.2d, v2.2s // encoding: [0x20,0x30,0xa2,0x0e]
+
+ ssubw2 v0.8h, v1.8h, v2.16b
+ ssubw2 v0.4s, v1.4s, v2.8h
+ ssubw2 v0.2d, v1.2d, v2.4s
+
+// CHECK: ssubw2 v0.8h, v1.8h, v2.16b // encoding: [0x20,0x30,0x22,0x4e]
+// CHECK: ssubw2 v0.4s, v1.4s, v2.8h // encoding: [0x20,0x30,0x62,0x4e]
+// CHECK: ssubw2 v0.2d, v1.2d, v2.4s // encoding: [0x20,0x30,0xa2,0x4e]
+
+ usubw v0.8h, v1.8h, v2.8b
+ usubw v0.4s, v1.4s, v2.4h
+ usubw v0.2d, v1.2d, v2.2s
+
+// CHECK: usubw v0.8h, v1.8h, v2.8b // encoding: [0x20,0x30,0x22,0x2e]
+// CHECK: usubw v0.4s, v1.4s, v2.4h // encoding: [0x20,0x30,0x62,0x2e]
+// CHECK: usubw v0.2d, v1.2d, v2.2s // encoding: [0x20,0x30,0xa2,0x2e]
+
+ usubw2 v0.8h, v1.8h, v2.16b
+ usubw2 v0.4s, v1.4s, v2.8h
+ usubw2 v0.2d, v1.2d, v2.4s
+
+// CHECK: usubw2 v0.8h, v1.8h, v2.16b // encoding: [0x20,0x30,0x22,0x6e]
+// CHECK: usubw2 v0.4s, v1.4s, v2.8h // encoding: [0x20,0x30,0x62,0x6e]
+// CHECK: usubw2 v0.2d, v1.2d, v2.4s // encoding: [0x20,0x30,0xa2,0x6e]
+
+//------------------------------------------------------------------------------
+// Narrow
+//------------------------------------------------------------------------------
+
+ addhn v0.8b, v1.8h, v2.8h
+ addhn v0.4h, v1.4s, v2.4s
+ addhn v0.2s, v1.2d, v2.2d
+
+// CHECK: addhn v0.8b, v1.8h, v2.8h // encoding: [0x20,0x40,0x22,0x0e]
+// CHECK: addhn v0.4h, v1.4s, v2.4s // encoding: [0x20,0x40,0x62,0x0e]
+// CHECK: addhn v0.2s, v1.2d, v2.2d // encoding: [0x20,0x40,0xa2,0x0e]
+
+ addhn2 v0.16b, v1.8h, v2.8h
+ addhn2 v0.8h, v1.4s, v2.4s
+ addhn2 v0.4s, v1.2d, v2.2d
+
+// CHECK: addhn2 v0.16b, v1.8h, v2.8h // encoding: [0x20,0x40,0x22,0x4e]
+// CHECK: addhn2 v0.8h, v1.4s, v2.4s // encoding: [0x20,0x40,0x62,0x4e]
+// CHECK: addhn2 v0.4s, v1.2d, v2.2d // encoding: [0x20,0x40,0xa2,0x4e]
+
+ raddhn v0.8b, v1.8h, v2.8h
+ raddhn v0.4h, v1.4s, v2.4s
+ raddhn v0.2s, v1.2d, v2.2d
+
+// CHECK: raddhn v0.8b, v1.8h, v2.8h // encoding: [0x20,0x40,0x22,0x2e]
+// CHECK: raddhn v0.4h, v1.4s, v2.4s // encoding: [0x20,0x40,0x62,0x2e]
+// CHECK: raddhn v0.2s, v1.2d, v2.2d // encoding: [0x20,0x40,0xa2,0x2e]
+
+ raddhn2 v0.16b, v1.8h, v2.8h
+ raddhn2 v0.8h, v1.4s, v2.4s
+ raddhn2 v0.4s, v1.2d, v2.2d
+
+// CHECK: raddhn2 v0.16b, v1.8h, v2.8h // encoding: [0x20,0x40,0x22,0x6e]
+// CHECK: raddhn2 v0.8h, v1.4s, v2.4s // encoding: [0x20,0x40,0x62,0x6e]
+// CHECK: raddhn2 v0.4s, v1.2d, v2.2d // encoding: [0x20,0x40,0xa2,0x6e]
+
+ rsubhn v0.8b, v1.8h, v2.8h
+ rsubhn v0.4h, v1.4s, v2.4s
+ rsubhn v0.2s, v1.2d, v2.2d
+
+// CHECK: rsubhn v0.8b, v1.8h, v2.8h // encoding: [0x20,0x60,0x22,0x2e]
+// CHECK: rsubhn v0.4h, v1.4s, v2.4s // encoding: [0x20,0x60,0x62,0x2e]
+// CHECK: rsubhn v0.2s, v1.2d, v2.2d // encoding: [0x20,0x60,0xa2,0x2e]
+
+ rsubhn2 v0.16b, v1.8h, v2.8h
+ rsubhn2 v0.8h, v1.4s, v2.4s
+ rsubhn2 v0.4s, v1.2d, v2.2d
+
+// CHECK: rsubhn2 v0.16b, v1.8h, v2.8h // encoding: [0x20,0x60,0x22,0x6e]
+// CHECK: rsubhn2 v0.8h, v1.4s, v2.4s // encoding: [0x20,0x60,0x62,0x6e]
+// CHECK: rsubhn2 v0.4s, v1.2d, v2.2d // encoding: [0x20,0x60,0xa2,0x6e]
diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s
index 52305f1..c85db70 100644
--- a/test/MC/AArch64/neon-diagnostics.s
+++ b/test/MC/AArch64/neon-diagnostics.s
@@ -2008,3 +2008,742 @@
// CHECK-ERROR: fcvtzu v0.2d, v1.2d, #65
// CHECK-ERROR: ^
+//----------------------------------------------------------------------
+// Vector operation on 3 operands with different types
+//----------------------------------------------------------------------
+
+ // Mismatched and invalid vector types
+ saddl v0.8h, v1.8h, v2.8b
+ saddl v0.4s, v1.4s, v2.4h
+ saddl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: saddl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: saddl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: saddl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ saddl2 v0.4s, v1.8s, v2.8h
+ saddl2 v0.8h, v1.16h, v2.16b
+ saddl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: saddl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: saddl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: saddl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR: ^
+
+ uaddl v0.8h, v1.8h, v2.8b
+ uaddl v0.4s, v1.4s, v2.4h
+ uaddl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uaddl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uaddl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uaddl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ uaddl2 v0.8h, v1.16h, v2.16b
+ uaddl2 v0.4s, v1.8s, v2.8h
+ uaddl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uaddl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uaddl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uaddl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR: ^
+
+ ssubl v0.8h, v1.8h, v2.8b
+ ssubl v0.4s, v1.4s, v2.4h
+ ssubl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ssubl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ssubl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ssubl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ ssubl2 v0.8h, v1.16h, v2.16b
+ ssubl2 v0.4s, v1.8s, v2.8h
+ ssubl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ssubl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ssubl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ssubl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR: ^
+
+ usubl v0.8h, v1.8h, v2.8b
+ usubl v0.4s, v1.4s, v2.4h
+ usubl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: usubl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: usubl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: usubl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ usubl2 v0.8h, v1.16h, v2.16b
+ usubl2 v0.4s, v1.8s, v2.8h
+ usubl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: usubl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: usubl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: usubl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR: ^
+
+ sabal v0.8h, v1.8h, v2.8b
+ sabal v0.4s, v1.4s, v2.4h
+ sabal v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sabal v0.8h, v1.8h, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sabal v0.4s, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sabal v0.2d, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ sabal2 v0.8h, v1.16h, v2.16b
+ sabal2 v0.4s, v1.8s, v2.8h
+ sabal2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sabal2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sabal2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sabal2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR: ^
+
+ uabal v0.8h, v1.8h, v2.8b
+ uabal v0.4s, v1.4s, v2.4h
+ uabal v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uabal v0.8h, v1.8h, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uabal v0.4s, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uabal v0.2d, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ uabal2 v0.8h, v1.16h, v2.16b
+ uabal2 v0.4s, v1.8s, v2.8h
+ uabal2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uabal2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uabal2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uabal2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR: ^
+
+ sabdl v0.8h, v1.8h, v2.8b
+ sabdl v0.4s, v1.4s, v2.4h
+ sabdl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sabdl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sabdl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sabdl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ sabdl2 v0.8h, v1.16h, v2.16b
+ sabdl2 v0.4s, v1.8s, v2.8h
+ sabdl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sabdl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sabdl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sabdl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR: ^
+
+ uabdl v0.8h, v1.8h, v2.8b
+ uabdl v0.4s, v1.4s, v2.4h
+ uabdl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uabdl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uabdl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uabdl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ uabdl2 v0.8h, v1.16h, v2.16b
+ uabdl2 v0.4s, v1.8s, v2.8h
+ uabdl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uabdl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uabdl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uabdl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR: ^
+
+ smlal v0.8h, v1.8h, v2.8b
+ smlal v0.4s, v1.4s, v2.4h
+ smlal v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smlal v0.8h, v1.8h, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smlal v0.4s, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smlal v0.2d, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ smlal2 v0.8h, v1.16h, v2.16b
+ smlal2 v0.4s, v1.8s, v2.8h
+ smlal2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smlal2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smlal2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smlal2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR: ^
+
+ umlal v0.8h, v1.8h, v2.8b
+ umlal v0.4s, v1.4s, v2.4h
+ umlal v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umlal v0.8h, v1.8h, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umlal v0.4s, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umlal v0.2d, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ umlal2 v0.8h, v1.16h, v2.16b
+ umlal2 v0.4s, v1.8s, v2.8h
+ umlal2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umlal2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umlal2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umlal2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR: ^
+
+ smlsl v0.8h, v1.8h, v2.8b
+ smlsl v0.4s, v1.4s, v2.4h
+ smlsl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smlsl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smlsl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smlsl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ smlsl2 v0.8h, v1.16h, v2.16b
+ smlsl2 v0.4s, v1.8s, v2.8h
+ smlsl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smlsl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smlsl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smlsl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR: ^
+
+ umlsl v0.8h, v1.8h, v2.8b
+ umlsl v0.4s, v1.4s, v2.4h
+ umlsl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umlsl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umlsl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umlsl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ umlsl2 v0.8h, v1.16h, v2.16b
+ umlsl2 v0.4s, v1.8s, v2.8h
+ umlsl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umlsl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umlsl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umlsl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR: ^
+
+ smull v0.8h, v1.8h, v2.8b
+ smull v0.4s, v1.4s, v2.4h
+ smull v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smull v0.8h, v1.8h, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smull v0.4s, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smull v0.2d, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ smull2 v0.8h, v1.16h, v2.16b
+ smull2 v0.4s, v1.8s, v2.8h
+ smull2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smull2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smull2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smull2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR: ^
+
+ umull v0.8h, v1.8h, v2.8b
+ umull v0.4s, v1.4s, v2.4h
+ umull v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umull v0.8h, v1.8h, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umull v0.4s, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umull v0.2d, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ umull2 v0.8h, v1.16h, v2.16b
+ umull2 v0.4s, v1.8s, v2.8h
+ umull2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umull2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umull2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umull2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR: ^
+
+//------------------------------------------------------------------------------
+// Long - Variant 2
+//------------------------------------------------------------------------------
+
+ sqdmlal v0.4s, v1.4s, v2.4h
+ sqdmlal v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmlal v0.4s, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmlal v0.2d, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ sqdmlal2 v0.4s, v1.8s, v2.8h
+ sqdmlal2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmlal2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmlal2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR: ^
+
+ // Mismatched vector types
+ sqdmlal v0.8h, v1.8b, v2.8b
+ sqdmlal2 v0.8h, v1.16b, v2.16b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmlal v0.8h, v1.8b, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmlal2 v0.8h, v1.16b, v2.16b
+// CHECK-ERROR: ^
+
+ sqdmlsl v0.4s, v1.4s, v2.4h
+ sqdmlsl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmlsl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmlsl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ sqdmlsl2 v0.4s, v1.8s, v2.8h
+ sqdmlsl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmlsl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmlsl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR: ^
+
+ // Mismatched vector types
+ sqdmlsl v0.8h, v1.8b, v2.8b
+ sqdmlsl2 v0.8h, v1.16b, v2.16b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmlsl v0.8h, v1.8b, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmlsl2 v0.8h, v1.16b, v2.16b
+// CHECK-ERROR: ^
+
+
+ sqdmull v0.4s, v1.4s, v2.4h
+ sqdmull v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmull v0.4s, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmull v0.2d, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ sqdmull2 v0.4s, v1.8s, v2.8h
+ sqdmull2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmull2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmull2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR: ^
+
+ // Mismatched vector types
+ sqdmull v0.8h, v1.8b, v2.8b
+ sqdmull2 v0.8h, v1.16b, v2.16b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmull v0.8h, v1.8b, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmull2 v0.8h, v1.16b, v2.16b
+// CHECK-ERROR: ^
+
+
+//------------------------------------------------------------------------------
+// Long - Variant 3
+//------------------------------------------------------------------------------
+
+ pmull v0.8h, v1.8h, v2.8b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: pmull v0.8h, v1.8h, v2.8b
+// CHECK-ERROR: ^
+
+ // Mismatched vector types
+ pmull v0.4s, v1.4h, v2.4h
+ pmull v0.2d, v1.2s, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: pmull v0.4s, v1.4h, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: pmull v0.2d, v1.2s, v2.2s
+// CHECK-ERROR: ^
+
+
+ pmull2 v0.8h, v1.16h, v2.16b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: pmull2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR: ^
+
+ // Mismatched vector types
+ pmull2 v0.4s, v1.8h v2.8h
+ pmull2 v0.2d, v1.4s, v2.4s
+
+// CHECK-ERROR: error: expected comma before next operand
+// CHECK-ERROR: pmull2 v0.4s, v1.8h v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: pmull2 v0.2d, v1.4s, v2.4s
+// CHECK-ERROR: ^
+
+//------------------------------------------------------------------------------
+// Widen
+//------------------------------------------------------------------------------
+
+ saddw v0.8h, v1.8h, v2.8h
+ saddw v0.4s, v1.4s, v2.4s
+ saddw v0.2d, v1.2d, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: saddw v0.8h, v1.8h, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: saddw v0.4s, v1.4s, v2.4s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: saddw v0.2d, v1.2d, v2.2d
+// CHECK-ERROR: ^
+
+ saddw2 v0.8h, v1.8h, v2.16h
+ saddw2 v0.4s, v1.4s, v2.8s
+ saddw2 v0.2d, v1.2d, v2.4d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: saddw2 v0.8h, v1.8h, v2.16h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: saddw2 v0.4s, v1.4s, v2.8s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: saddw2 v0.2d, v1.2d, v2.4d
+// CHECK-ERROR: ^
+
+ uaddw v0.8h, v1.8h, v2.8h
+ uaddw v0.4s, v1.4s, v2.4s
+ uaddw v0.2d, v1.2d, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uaddw v0.8h, v1.8h, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uaddw v0.4s, v1.4s, v2.4s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uaddw v0.2d, v1.2d, v2.2d
+// CHECK-ERROR: ^
+
+ uaddw2 v0.8h, v1.8h, v2.16h
+ uaddw2 v0.4s, v1.4s, v2.8s
+ uaddw2 v0.2d, v1.2d, v2.4d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uaddw2 v0.8h, v1.8h, v2.16h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uaddw2 v0.4s, v1.4s, v2.8s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uaddw2 v0.2d, v1.2d, v2.4d
+// CHECK-ERROR: ^
+
+ ssubw v0.8h, v1.8h, v2.8h
+ ssubw v0.4s, v1.4s, v2.4s
+ ssubw v0.2d, v1.2d, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ssubw v0.8h, v1.8h, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ssubw v0.4s, v1.4s, v2.4s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ssubw v0.2d, v1.2d, v2.2d
+// CHECK-ERROR: ^
+
+ ssubw2 v0.8h, v1.8h, v2.16h
+ ssubw2 v0.4s, v1.4s, v2.8s
+ ssubw2 v0.2d, v1.2d, v2.4d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ssubw2 v0.8h, v1.8h, v2.16h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ssubw2 v0.4s, v1.4s, v2.8s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ssubw2 v0.2d, v1.2d, v2.4d
+// CHECK-ERROR: ^
+
+ usubw v0.8h, v1.8h, v2.8h
+ usubw v0.4s, v1.4s, v2.4s
+ usubw v0.2d, v1.2d, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: usubw v0.8h, v1.8h, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: usubw v0.4s, v1.4s, v2.4s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: usubw v0.2d, v1.2d, v2.2d
+// CHECK-ERROR: ^
+
+ usubw2 v0.8h, v1.8h, v2.16h
+ usubw2 v0.4s, v1.4s, v2.8s
+ usubw2 v0.2d, v1.2d, v2.4d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: usubw2 v0.8h, v1.8h, v2.16h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: usubw2 v0.4s, v1.4s, v2.8s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: usubw2 v0.2d, v1.2d, v2.4d
+// CHECK-ERROR: ^
+
+//------------------------------------------------------------------------------
+// Narrow
+//------------------------------------------------------------------------------
+
+ addhn v0.8b, v1.8h, v2.8d
+ addhn v0.4h, v1.4s, v2.4h
+ addhn v0.2s, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: addhn v0.8b, v1.8h, v2.8d
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: addhn v0.4h, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: addhn v0.2s, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ addhn2 v0.16b, v1.8h, v2.8b
+ addhn2 v0.8h, v1.4s, v2.4h
+ addhn2 v0.4s, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: addhn2 v0.16b, v1.8h, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: addhn2 v0.8h, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: addhn2 v0.4s, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ raddhn v0.8b, v1.8h, v2.8b
+ raddhn v0.4h, v1.4s, v2.4h
+ raddhn v0.2s, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: raddhn v0.8b, v1.8h, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: raddhn v0.4h, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: raddhn v0.2s, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ raddhn2 v0.16b, v1.8h, v2.8b
+ raddhn2 v0.8h, v1.4s, v2.4h
+ raddhn2 v0.4s, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: raddhn2 v0.16b, v1.8h, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: raddhn2 v0.8h, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: raddhn2 v0.4s, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ rsubhn v0.8b, v1.8h, v2.8b
+ rsubhn v0.4h, v1.4s, v2.4h
+ rsubhn v0.2s, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: rsubhn v0.8b, v1.8h, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: rsubhn v0.4h, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: rsubhn v0.2s, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
+ rsubhn2 v0.16b, v1.8h, v2.8b
+ rsubhn2 v0.8h, v1.4s, v2.4h
+ rsubhn2 v0.4s, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: rsubhn2 v0.16b, v1.8h, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: rsubhn2 v0.8h, v1.4s, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: rsubhn2 v0.4s, v1.2d, v2.2s
+// CHECK-ERROR: ^
+
diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt
index a7029b2..fa9f73f 100644
--- a/test/MC/Disassembler/AArch64/neon-instructions.txt
+++ b/test/MC/Disassembler/AArch64/neon-instructions.txt
@@ -1089,3 +1089,363 @@
0x20,0xfc,0x3d,0x2f
0x20,0xfc,0x3d,0x6f
0x20,0xfc,0x7d,0x6f
+
+
+#------------------------------------------------------------------------------
+# Vector with 3 operands having different data types
+#------------------------------------------------------------------------------
+
+#------------------------------------------------------------------------------
+# Long
+#------------------------------------------------------------------------------
+
+#------------------------------------------------------------------------------
+# Long - Variant 1
+#------------------------------------------------------------------------------
+
+# CHECK: saddl v0.8h, v1.8b, v2.8b
+# CHECK: saddl v0.4s, v1.4h, v2.4h
+# CHECK: saddl v0.2d, v1.2s, v2.2s
+0x20 0x00 0x22 0x0e
+0x20 0x00 0x62 0x0e
+0x20 0x00 0xa2 0x0e
+
+# CHECK: saddl2 v0.4s, v1.8h, v2.8h
+# CHECK: saddl2 v0.8h, v1.16b, v2.16b
+# CHECK: saddl2 v0.2d, v1.4s, v2.4s
+0x20 0x00 0x62 0x4e
+0x20 0x00 0x22 0x4e
+0x20 0x00 0xa2 0x4e
+
+# CHECK: uaddl v0.8h, v1.8b, v2.8b
+# CHECK: uaddl v0.4s, v1.4h, v2.4h
+# CHECK: uaddl v0.2d, v1.2s, v2.2s
+0x20 0x00 0x22 0x2e
+0x20 0x00 0x62 0x2e
+0x20 0x00 0xa2 0x2e
+
+# CHECK: uaddl2 v0.8h, v1.16b, v2.16b
+# CHECK: uaddl2 v0.4s, v1.8h, v2.8h
+# CHECK: uaddl2 v0.2d, v1.4s, v2.4s
+0x20 0x00 0x22 0x6e
+0x20 0x00 0x62 0x6e
+0x20 0x00 0xa2 0x6e
+
+# CHECK: ssubl v0.8h, v1.8b, v2.8b
+# CHECK: ssubl v0.4s, v1.4h, v2.4h
+# CHECK: ssubl v0.2d, v1.2s, v2.2s
+0x20 0x20 0x22 0x0e
+0x20 0x20 0x62 0x0e
+0x20 0x20 0xa2 0x0e
+
+# CHECK: ssubl2 v0.8h, v1.16b, v2.16b
+# CHECK: ssubl2 v0.4s, v1.8h, v2.8h
+# CHECK: ssubl2 v0.2d, v1.4s, v2.4s
+0x20 0x20 0x22 0x4e
+0x20 0x20 0x62 0x4e
+0x20 0x20 0xa2 0x4e
+
+# CHECK: usubl v0.8h, v1.8b, v2.8b
+# CHECK: usubl v0.4s, v1.4h, v2.4h
+# CHECK: usubl v0.2d, v1.2s, v2.2s
+0x20 0x20 0x22 0x2e
+0x20 0x20 0x62 0x2e
+0x20 0x20 0xa2 0x2e
+
+# CHECK: usubl2 v0.8h, v1.16b, v2.16b
+# CHECK: usubl2 v0.4s, v1.8h, v2.8h
+# CHECK: usubl2 v0.2d, v1.4s, v2.4s
+0x20 0x20 0x22 0x6e
+0x20 0x20 0x62 0x6e
+0x20 0x20 0xa2 0x6e
+
+# CHECK: sabal v0.8h, v1.8b, v2.8b
+# CHECK: sabal v0.4s, v1.4h, v2.4h
+# CHECK: sabal v0.2d, v1.2s, v2.2s
+0x20 0x50 0x22 0x0e
+0x20 0x50 0x62 0x0e
+0x20 0x50 0xa2 0x0e
+
+# CHECK: sabal2 v0.8h, v1.16b, v2.16b
+# CHECK: sabal2 v0.4s, v1.8h, v2.8h
+# CHECK: sabal2 v0.2d, v1.4s, v2.4s
+0x20 0x50 0x22 0x4e
+0x20 0x50 0x62 0x4e
+0x20 0x50 0xa2 0x4e
+
+# CHECK: uabal v0.8h, v1.8b, v2.8b
+# CHECK: uabal v0.4s, v1.4h, v2.4h
+# CHECK: uabal v0.2d, v1.2s, v2.2s
+0x20 0x50 0x22 0x2e
+0x20 0x50 0x62 0x2e
+0x20 0x50 0xa2 0x2e
+
+# CHECK: uabal2 v0.8h, v1.16b, v2.16b
+# CHECK: uabal2 v0.4s, v1.8h, v2.8h
+# CHECK: uabal2 v0.2d, v1.4s, v2.4s
+0x20 0x50 0x22 0x6e
+0x20 0x50 0x62 0x6e
+0x20 0x50 0xa2 0x6e
+
+# CHECK: sabdl v0.8h, v1.8b, v2.8b
+# CHECK: sabdl v0.4s, v1.4h, v2.4h
+# CHECK: sabdl v0.2d, v1.2s, v2.2s
+0x20 0x70 0x22 0x0e
+0x20 0x70 0x62 0x0e
+0x20 0x70 0xa2 0x0e
+
+# CHECK: sabdl2 v0.8h, v1.16b, v2.16b
+# CHECK: sabdl2 v0.4s, v1.8h, v2.8h
+# CHECK: sabdl2 v0.2d, v1.4s, v2.4s
+0x20 0x70 0x22 0x4e
+0x20 0x70 0x62 0x4e
+0x20 0x70 0xa2 0x4e
+
+# CHECK: uabdl v0.8h, v1.8b, v2.8b
+# CHECK: uabdl v0.4s, v1.4h, v2.4h
+# CHECK: uabdl v0.2d, v1.2s, v2.2s
+0x20 0x70 0x22 0x2e
+0x20 0x70 0x62 0x2e
+0x20 0x70 0xa2 0x2e
+
+# CHECK: uabdl2 v0.8h, v1.16b, v2.16b
+# CHECK: uabdl2 v0.4s, v1.8h, v2.8h
+# CHECK: uabdl2 v0.2d, v1.4s, v2.4s
+0x20 0x70 0x22 0x6e
+0x20 0x70 0x62 0x6e
+0x20 0x70 0xa2 0x6e
+
+# CHECK: smlal v0.8h, v1.8b, v2.8b
+# CHECK: smlal v0.4s, v1.4h, v2.4h
+# CHECK: smlal v0.2d, v1.2s, v2.2s
+0x20 0x80 0x22 0x0e
+0x20 0x80 0x62 0x0e
+0x20 0x80 0xa2 0x0e
+
+# CHECK: smlal2 v0.8h, v1.16b, v2.16b
+# CHECK: smlal2 v0.4s, v1.8h, v2.8h
+# CHECK: smlal2 v0.2d, v1.4s, v2.4s
+0x20 0x80 0x22 0x4e
+0x20 0x80 0x62 0x4e
+0x20 0x80 0xa2 0x4e
+
+# CHECK: umlal v0.8h, v1.8b, v2.8b
+# CHECK: umlal v0.4s, v1.4h, v2.4h
+# CHECK: umlal v0.2d, v1.2s, v2.2s
+
+0x20 0x80 0x22 0x2e
+0x20 0x80 0x62 0x2e
+0x20 0x80 0xa2 0x2e
+
+# CHECK: umlal2 v0.8h, v1.16b, v2.16b
+# CHECK: umlal2 v0.4s, v1.8h, v2.8h
+# CHECK: umlal2 v0.2d, v1.4s, v2.4s
+0x20 0x80 0x22 0x6e
+0x20 0x80 0x62 0x6e
+0x20 0x80 0xa2 0x6e
+
+# CHECK: smlsl v0.8h, v1.8b, v2.8b
+# CHECK: smlsl v0.4s, v1.4h, v2.4h
+# CHECK: smlsl v0.2d, v1.2s, v2.2s
+0x20 0xa0 0x22 0x0e
+0x20 0xa0 0x62 0x0e
+0x20 0xa0 0xa2 0x0e
+
+# CHECK: smlsl2 v0.8h, v1.16b, v2.16b
+# CHECK: smlsl2 v0.4s, v1.8h, v2.8h
+# CHECK: smlsl2 v0.2d, v1.4s, v2.4s
+0x20 0xa0 0x22 0x4e
+0x20 0xa0 0x62 0x4e
+0x20 0xa0 0xa2 0x4e
+
+# CHECK: umlsl v0.8h, v1.8b, v2.8b
+# CHECK: umlsl v0.4s, v1.4h, v2.4h
+# CHECK: umlsl v0.2d, v1.2s, v2.2s
+0x20 0xa0 0x22 0x2e
+0x20 0xa0 0x62 0x2e
+0x20 0xa0 0xa2 0x2e
+
+# CHECK: umlsl2 v0.8h, v1.16b, v2.16b
+# CHECK: umlsl2 v0.4s, v1.8h, v2.8h
+# CHECK: umlsl2 v0.2d, v1.4s, v2.4s
+0x20 0xa0 0x22 0x6e
+0x20 0xa0 0x62 0x6e
+0x20 0xa0 0xa2 0x6e
+
+# CHECK: smull v0.8h, v1.8b, v2.8b
+# CHECK: smull v0.4s, v1.4h, v2.4h
+# CHECK: smull v0.2d, v1.2s, v2.2s
+0x20 0xc0 0x22 0x0e
+0x20 0xc0 0x62 0x0e
+0x20 0xc0 0xa2 0x0e
+
+# CHECK: smull2 v0.8h, v1.16b, v2.16b
+# CHECK: smull2 v0.4s, v1.8h, v2.8h
+# CHECK: smull2 v0.2d, v1.4s, v2.4s
+0x20 0xc0 0x22 0x4e
+0x20 0xc0 0x62 0x4e
+0x20 0xc0 0xa2 0x4e
+
+# CHECK: umull v0.8h, v1.8b, v2.8b
+# CHECK: umull v0.4s, v1.4h, v2.4h
+# CHECK: umull v0.2d, v1.2s, v2.2s
+0x20 0xc0 0x22 0x2e
+0x20 0xc0 0x62 0x2e
+0x20 0xc0 0xa2 0x2e
+
+# CHECK: umull2 v0.8h, v1.16b, v2.16b
+# CHECK: umull2 v0.4s, v1.8h, v2.8h
+# CHECK: umull2 v0.2d, v1.4s, v2.4s
+0x20 0xc0 0x22 0x6e
+0x20 0xc0 0x62 0x6e
+0x20 0xc0 0xa2 0x6e
+
+#------------------------------------------------------------------------------
+# Long - Variant 2
+#------------------------------------------------------------------------------
+
+# CHECK: sqdmlal v0.4s, v1.4h, v2.4h
+# CHECK: sqdmlal v0.2d, v1.2s, v2.2s
+0x20 0x90 0x62 0x0e
+0x20 0x90 0xa2 0x0e
+
+# CHECK: sqdmlal2 v0.4s, v1.8h, v2.8h
+# CHECK: sqdmlal2 v0.2d, v1.4s, v2.4s
+0x20 0x90 0x62 0x4e
+0x20 0x90 0xa2 0x4e
+
+# CHECK: sqdmlsl v0.4s, v1.4h, v2.4h
+# CHECK: sqdmlsl v0.2d, v1.2s, v2.2s
+0x20 0xb0 0x62 0x0e
+0x20 0xb0 0xa2 0x0e
+
+# CHECK: sqdmlsl2 v0.4s, v1.8h, v2.8h
+# CHECK: sqdmlsl2 v0.2d, v1.4s, v2.4s
+0x20 0xb0 0x62 0x4e
+0x20 0xb0 0xa2 0x4e
+
+# CHECK: sqdmull v0.4s, v1.4h, v2.4h
+# CHECK: sqdmull v0.2d, v1.2s, v2.2s
+0x20 0xd0 0x62 0x0e
+0x20 0xd0 0xa2 0x0e
+
+# CHECK: sqdmull2 v0.4s, v1.8h, v2.8h
+# CHECK: sqdmull2 v0.2d, v1.4s, v2.4s
+0x20 0xd0 0x62 0x4e
+0x20 0xd0 0xa2 0x4e
+
+#------------------------------------------------------------------------------
+# Long - Variant 3
+#------------------------------------------------------------------------------
+
+# CHECK: pmull v0.8h, v1.8b, v2.8b
+0x20 0xe0 0x22 0x0e
+
+# CHECK: pmull2 v0.8h, v1.16b, v2.16b
+0x20 0xe0 0x22 0x4e
+
+#------------------------------------------------------------------------------
+# Widen
+#------------------------------------------------------------------------------
+
+# CHECK: saddw v0.8h, v1.8h, v2.8b
+# CHECK: saddw v0.4s, v1.4s, v2.4h
+# CHECK: saddw v0.2d, v1.2d, v2.2s
+0x20 0x10 0x22 0x0e
+0x20 0x10 0x62 0x0e
+0x20 0x10 0xa2 0x0e
+
+# CHECK: saddw2 v0.8h, v1.8h, v2.16b
+# CHECK: saddw2 v0.4s, v1.4s, v2.8h
+# CHECK: saddw2 v0.2d, v1.2d, v2.4s
+0x20 0x10 0x22 0x4e
+0x20 0x10 0x62 0x4e
+0x20 0x10 0xa2 0x4e
+
+# CHECK: uaddw v0.8h, v1.8h, v2.8b
+# CHECK: uaddw v0.4s, v1.4s, v2.4h
+# CHECK: uaddw v0.2d, v1.2d, v2.2s
+0x20 0x10 0x22 0x2e
+0x20 0x10 0x62 0x2e
+0x20 0x10 0xa2 0x2e
+
+# CHECK: uaddw2 v0.8h, v1.8h, v2.16b
+# CHECK: uaddw2 v0.4s, v1.4s, v2.8h
+# CHECK: uaddw2 v0.2d, v1.2d, v2.4s
+0x20 0x10 0x22 0x6e
+0x20 0x10 0x62 0x6e
+0x20 0x10 0xa2 0x6e
+
+# CHECK: ssubw v0.8h, v1.8h, v2.8b
+# CHECK: ssubw v0.4s, v1.4s, v2.4h
+# CHECK: ssubw v0.2d, v1.2d, v2.2s
+0x20 0x30 0x22 0x0e
+0x20 0x30 0x62 0x0e
+0x20 0x30 0xa2 0x0e
+
+# CHECK: ssubw2 v0.8h, v1.8h, v2.16b
+# CHECK: ssubw2 v0.4s, v1.4s, v2.8h
+# CHECK: ssubw2 v0.2d, v1.2d, v2.4s
+0x20 0x30 0x22 0x4e
+0x20 0x30 0x62 0x4e
+0x20 0x30 0xa2 0x4e
+
+# CHECK: usubw v0.8h, v1.8h, v2.8b
+# CHECK: usubw v0.4s, v1.4s, v2.4h
+# CHECK: usubw v0.2d, v1.2d, v2.2s
+0x20 0x30 0x22 0x2e
+0x20 0x30 0x62 0x2e
+0x20 0x30 0xa2 0x2e
+
+# CHECK: usubw2 v0.8h, v1.8h, v2.16b
+# CHECK: usubw2 v0.4s, v1.4s, v2.8h
+# CHECK: usubw2 v0.2d, v1.2d, v2.4s
+0x20 0x30 0x22 0x6e
+0x20 0x30 0x62 0x6e
+0x20 0x30 0xa2 0x6e
+
+#------------------------------------------------------------------------------
+# Narrow
+#------------------------------------------------------------------------------
+
+# CHECK: addhn v0.8b, v1.8h, v2.8h
+# CHECK: addhn v0.4h, v1.4s, v2.4s
+# CHECK: addhn v0.2s, v1.2d, v2.2d
+0x20 0x40 0x22 0x0e
+0x20 0x40 0x62 0x0e
+0x20 0x40 0xa2 0x0e
+
+# CHECK: addhn2 v0.16b, v1.8h, v2.8h
+# CHECK: addhn2 v0.8h, v1.4s, v2.4s
+# CHECK: addhn2 v0.4s, v1.2d, v2.2d
+0x20 0x40 0x22 0x4e
+0x20 0x40 0x62 0x4e
+0x20 0x40 0xa2 0x4e
+
+# CHECK: raddhn v0.8b, v1.8h, v2.8h
+# CHECK: raddhn v0.4h, v1.4s, v2.4s
+# CHECK: raddhn v0.2s, v1.2d, v2.2d
+0x20 0x40 0x22 0x2e
+0x20 0x40 0x62 0x2e
+0x20 0x40 0xa2 0x2e
+
+# CHECK: raddhn2 v0.16b, v1.8h, v2.8h
+# CHECK: raddhn2 v0.8h, v1.4s, v2.4s
+# CHECK: raddhn2 v0.4s, v1.2d, v2.2d
+0x20 0x40 0x22 0x6e
+0x20 0x40 0x62 0x6e
+0x20 0x40 0xa2 0x6e
+
+# CHECK: rsubhn v0.8b, v1.8h, v2.8h
+# CHECK: rsubhn v0.4h, v1.4s, v2.4s
+# CHECK: rsubhn v0.2s, v1.2d, v2.2d
+0x20 0x60 0x22 0x2e
+0x20 0x60 0x62 0x2e
+0x20 0x60 0xa2 0x2e
+
+# CHECK: rsubhn2 v0.16b, v1.8h, v2.8h
+# CHECK: rsubhn2 v0.8h, v1.4s, v2.4s
+# CHECK: rsubhn2 v0.4s, v1.2d, v2.2d
+0x20 0x60 0x22 0x6e
+0x20 0x60 0x62 0x6e
+0x20 0x60 0xa2 0x6e