29 files changed, 862 insertions, 259 deletions
diff --git a/test/CodeGen/AArch64/neon-add-sub.ll b/test/CodeGen/AArch64/neon-add-sub.ll
index 65ec8a2..566e029 100644
--- a/test/CodeGen/AArch64/neon-add-sub.ll
+++ b/test/CodeGen/AArch64/neon-add-sub.ll
@@ -118,15 +118,3 @@ define <2 x double> @sub2xdouble(<2 x double> %A, <2 x double> %B) {
 	ret <2 x double> %tmp3
 }
 
-define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) {
-;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
-	%tmp3 = add <1 x i64> %A, %B;
-	ret <1 x i64> %tmp3
-}
-
-define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) {
-;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
-	%tmp3 = sub <1 x i64> %A, %B;
-	ret <1 x i64> %tmp3
-}
-
diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll
index c2854ed..2c50059 100644
--- a/test/CodeGen/AArch64/neon-copy.ll
+++ b/test/CodeGen/AArch64/neon-copy.ll
@@ -146,7 +146,7 @@ define i32 @umovw2s(<2 x i32> %tmp1) {
 }
 
 define i64 @umovx1d(<1 x i64> %tmp1) {
-;CHECK: umov {{x[0-31]+}}, {{v[0-31]+}}.d[0]
+;CHECK: fmov {{x[0-31]+}}, {{d[0-31]+}}
   %tmp3 = extractelement <1 x i64> %tmp1, i32 0
   ret i64 %tmp3
 }
diff --git a/test/CodeGen/AArch64/neon-rounding-shift.ll b/test/CodeGen/AArch64/neon-rounding-shift.ll
index 404e491..5b4ec28 100644
--- a/test/CodeGen/AArch64/neon-rounding-shift.ll
+++ b/test/CodeGen/AArch64/neon-rounding-shift.ll
@@ -102,23 +102,6 @@ define <4 x i32> @test_srshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
   ret <4 x i32> %tmp1
 }
 
-declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_urshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: urshl d0, d0, d1
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_srshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: srshl d0, d0, d1
-  ret <1 x i64> %tmp1
-}
-
 declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>)
 declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>)
 
diff --git a/test/CodeGen/AArch64/neon-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-saturating-add-sub.ll
index b2fac1f..fc60d90 100644
--- a/test/CodeGen/AArch64/neon-saturating-add-sub.ll
+++ b/test/CodeGen/AArch64/neon-saturating-add-sub.ll
@@ -102,22 +102,7 @@ define <4 x i32> @test_sqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
   ret <4 x i32> %tmp1
 }
 
-declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqadd_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: uqadd d0, d0, d1
-  ret <1 x i64> %tmp1
-}
 
-define <1 x i64> @test_sqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqadd_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sqadd d0, d0, d1
-  ret <1 x i64> %tmp1
-}
 
 declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>)
 declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
@@ -254,21 +239,3 @@ define <2 x i64> @test_sqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; CHECK: sqsub v0.2d, v0.2d, v1.2d
   ret <2 x i64> %tmp1
 }
-
-declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqsub_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: uqsub d0, d0, d1
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqsub_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sqsub d0, d0, d1
-  ret <1 x i64> %tmp1
-}
-
diff --git a/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll b/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll
index 05d8dfe..d89262c 100644
--- a/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll
+++ b/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll
@@ -102,23 +102,6 @@ define <4 x i32> @test_sqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
   ret <4 x i32> %tmp1
 }
 
-declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqrshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: uqrshl d0, d0, d1
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqrshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sqrshl d0, d0, d1
-  ret <1 x i64> %tmp1
-}
-
 declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>)
 declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>)
 
diff --git a/test/CodeGen/AArch64/neon-saturating-shift.ll b/test/CodeGen/AArch64/neon-saturating-shift.ll
index 3b7f78c..11009fb 100644
--- a/test/CodeGen/AArch64/neon-saturating-shift.ll
+++ b/test/CodeGen/AArch64/neon-saturating-shift.ll
@@ -102,23 +102,6 @@ define <4 x i32> @test_sqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
   ret <4 x i32> %tmp1
 }
 
-declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: uqshl d0, d0, d1
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sqshl d0, d0, d1
-  ret <1 x i64> %tmp1
-}
-
 declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>)
 declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>)
 
diff --git a/test/CodeGen/AArch64/neon-scalar-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-add-sub.ll
new file mode 100644
index 0000000..09ca880
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-add-sub.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) {
+;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+	%tmp3 = add <1 x i64> %A, %B;
+	ret <1 x i64> %tmp3
+}
+
+define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) {
+;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+	%tmp3 = sub <1 x i64> %A, %B;
+	ret <1 x i64> %tmp3
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_add_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_add_v1i64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_uadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uadd_v1i64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_sub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sub_v1i64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_usub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_usub_v1i64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+
+
diff --git a/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll b/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll
new file mode 100644
index 0000000..309997b
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll
@@ -0,0 +1,103 @@
+; RUN: llc -march=aarch64 -mattr=+neon < %s | FileCheck %s
+
+declare <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64>)
+
+define <1 x i64> @test_addp_v1i64(<2 x i64> %a) {
+; CHECK: test_addp_v1i64:
+        %val = call <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64> %a)
+; CHECK: addp d0, v0.2d
+        ret <1 x i64> %val
+}
+
+declare <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float>)
+
+define <1 x float> @test_faddp_v1f32(<2 x float> %a) {
+; CHECK: test_faddp_v1f32:
+        %val = call <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float> %a)
+; CHECK: faddp s0, v0.2s
+        ret <1 x float> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double>)
+
+define <1 x double> @test_faddp_v1f64(<2 x double> %a) {
+; CHECK: test_faddp_v1f64:
+        %val = call <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double> %a)
+; CHECK: faddp d0, v0.2d
+        ret <1 x double> %val
+}
+
+
+declare <1 x float> @llvm.aarch64.neon.vpmax(<2 x float>)
+
+define <1 x float> @test_fmaxp_v1f32(<2 x float> %a) {
+; CHECK: test_fmaxp_v1f32:
+        %val = call <1 x float> @llvm.aarch64.neon.vpmax(<2 x float> %a)
+; CHECK: fmaxp s0, v0.2s
+        ret <1 x float> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double>)
+
+define <1 x double> @test_fmaxp_v1f64(<2 x double> %a) {
+; CHECK: test_fmaxp_v1f64:
+        %val = call <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double> %a)
+; CHECK: fmaxp d0, v0.2d
+        ret <1 x double> %val
+}
+
+
+declare <1 x float> @llvm.aarch64.neon.vpmin(<2 x float>)
+
+define <1 x float> @test_fminp_v1f32(<2 x float> %a) {
+; CHECK: test_fminp_v1f32:
+        %val = call <1 x float> @llvm.aarch64.neon.vpmin(<2 x float> %a)
+; CHECK: fminp s0, v0.2s
+        ret <1 x float> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.vpminq(<2 x double>)
+
+define <1 x double> @test_fminp_v1f64(<2 x double> %a) {
+; CHECK: test_fminp_v1f64:
+        %val = call <1 x double> @llvm.aarch64.neon.vpminq(<2 x double> %a)
+; CHECK: fminp d0, v0.2d
+        ret <1 x double> %val
+}
+
+declare <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float>)
+
+define <1 x float> @test_fmaxnmp_v1f32(<2 x float> %a) {
+; CHECK: test_fmaxnmp_v1f32:
+        %val = call <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float> %a)
+; CHECK: fmaxnmp s0, v0.2s
+        ret <1 x float> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double>)
+
+define <1 x double> @test_fmaxnmp_v1f64(<2 x double> %a) {
+; CHECK: test_fmaxnmp_v1f64:
+        %val = call <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double> %a)
+; CHECK: fmaxnmp d0, v0.2d
+        ret <1 x double> %val
+}
+
+declare <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float>)
+
+define <1 x float> @test_fminnmp_v1f32(<2 x float> %a) {
+; CHECK: test_fminnmp_v1f32:
+        %val = call <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float> %a)
+; CHECK: fminnmp s0, v0.2s
+        ret <1 x float> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double>)
+
+define <1 x double> @test_fminnmp_v1f64(<2 x double> %a) {
+; CHECK: test_fminnmp_v1f64:
+        %val = call <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double> %a)
+; CHECK: fminnmp d0, v0.2d
+        ret <1 x double> %val
+}
+
diff --git a/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll b/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll
new file mode 100644
index 0000000..83ceb4e
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+
+declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_urshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: urshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_srshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: srshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_urshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_urshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: urshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_srshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_srshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: srshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+
+
diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll
new file mode 100644
index 0000000..9e12978
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll
@@ -0,0 +1,171 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqadd_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: uqadd d0, d0, d1
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqadd_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: sqadd d0, d0, d1
+  ret <1 x i64> %tmp1
+}
+
+declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqsub_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: uqsub d0, d0, d1
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqsub_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: sqsub d0, d0, d1
+  ret <1 x i64> %tmp1
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vqaddu.v1i8(<1 x i8>, <1 x i8>)
+declare <1 x i8> @llvm.aarch64.neon.vqadds.v1i8(<1 x i8>, <1 x i8>)
+
+define <1 x i8> @test_uqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_uqadd_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqaddu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: uqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+define <1 x i8> @test_sqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_sqadd_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqadds.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: sqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vqsubu.v1i8(<1 x i8>, <1 x i8>)
+declare <1 x i8> @llvm.aarch64.neon.vqsubs.v1i8(<1 x i8>, <1 x i8>)
+
+define <1 x i8> @test_uqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_uqsub_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqsubu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: uqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+define <1 x i8> @test_sqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_sqsub_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqsubs.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: sqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vqaddu.v1i16(<1 x i16>, <1 x i16>)
+declare <1 x i16> @llvm.aarch64.neon.vqadds.v1i16(<1 x i16>, <1 x i16>)
+
+define <1 x i16> @test_uqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_uqadd_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqaddu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: uqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+define <1 x i16> @test_sqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_sqadd_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqadds.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: sqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vqsubu.v1i16(<1 x i16>, <1 x i16>)
+declare <1 x i16> @llvm.aarch64.neon.vqsubs.v1i16(<1 x i16>, <1 x i16>)
+
+define <1 x i16> @test_uqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_uqsub_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqsubu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: uqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+define <1 x i16> @test_sqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_sqsub_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqsubs.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: sqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqaddu.v1i32(<1 x i32>, <1 x i32>)
+declare <1 x i32> @llvm.aarch64.neon.vqadds.v1i32(<1 x i32>, <1 x i32>)
+
+define <1 x i32> @test_uqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_uqadd_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqaddu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: uqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+define <1 x i32> @test_sqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_sqadd_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqadds.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: sqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqsubu.v1i32(<1 x i32>, <1 x i32>)
+declare <1 x i32> @llvm.aarch64.neon.vqsubs.v1i32(<1 x i32>, <1 x i32>)
+
+define <1 x i32> @test_uqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_uqsub_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqsubu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: uqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+define <1 x i32> @test_sqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_sqsub_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqsubs.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: sqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqadd_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqadd_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqsub_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqsub_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll b/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll
new file mode 100644
index 0000000..0fd67df
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll
@@ -0,0 +1,94 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqrshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqrshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8>, <1 x i8>)
+declare <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8>, <1 x i8>)
+
+define <1 x i8> @test_uqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_uqrshl_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: uqrshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+
+  ret <1 x i8> %tmp1
+}
+
+define <1 x i8> @test_sqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_sqrshl_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: sqrshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16>, <1 x i16>)
+declare <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16>, <1 x i16>)
+
+define <1 x i16> @test_uqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_uqrshl_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: uqrshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+
+  ret <1 x i16> %tmp1
+}
+
+define <1 x i16> @test_sqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_sqrshl_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: sqrshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32>, <1 x i32>)
+declare <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32>, <1 x i32>)
+
+define <1 x i32> @test_uqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_uqrshl_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: uqrshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+
+  ret <1 x i32> %tmp1
+}
+
+define <1 x i32> @test_sqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_sqrshl_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: sqrshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqrshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqrshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+
+
diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll b/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll
new file mode 100644
index 0000000..8fdea24
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll
@@ -0,0 +1,88 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8>, <1 x i8>)
+declare <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8>, <1 x i8>)
+
+define <1 x i8> @test_uqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_uqshl_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: uqshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+define <1 x i8> @test_sqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_sqshl_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: sqshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16>, <1 x i16>)
+declare <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16>, <1 x i16>)
+
+define <1 x i16> @test_uqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_uqshl_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: uqshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+define <1 x i16> @test_sqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_sqshl_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: sqshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32>, <1 x i32>)
+declare <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32>, <1 x i32>)
+
+define <1 x i32> @test_uqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_uqshl_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: uqshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+define <1 x i32> @test_sqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_sqshl_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: sqshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+
diff --git a/test/CodeGen/AArch64/neon-scalar-shift.ll b/test/CodeGen/AArch64/neon-scalar-shift.ll
new file mode 100644
index 0000000..1222be5
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-shift.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_ushl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: ushl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: sshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_ushl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_ushl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: ushl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: sshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+
diff --git a/test/CodeGen/AArch64/neon-shift.ll b/test/CodeGen/AArch64/neon-shift.ll
index 1b8b941..33b04ce 100644
--- a/test/CodeGen/AArch64/neon-shift.ll
+++ b/test/CodeGen/AArch64/neon-shift.ll
@@ -102,23 +102,6 @@ define <4 x i32> @test_sshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
   ret <4 x i32> %tmp1
 }
 
-declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_ushl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: ushl d0, d0, d1
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sshl d0, d0, d1
-  ret <1 x i64> %tmp1
-}
-
 declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>)
 declare <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64>, <2 x i64>)
 
diff --git a/test/MC/AArch64/neon-add-pairwise.s b/test/MC/AArch64/neon-add-pairwise.s
index b586c22..df9938b 100644
--- a/test/MC/AArch64/neon-add-pairwise.s
+++ b/test/MC/AArch64/neon-add-pairwise.s
@@ -32,4 +32,3 @@
 // CHECK: faddp v0.2s, v1.2s, v2.2s       // encoding: [0x20,0xd4,0x22,0x2e]
 // CHECK: faddp v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xd4,0x22,0x6e]
 // CHECK: faddp v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xd4,0x62,0x6e]
-
diff --git a/test/MC/AArch64/neon-add-sub-instructions.s b/test/MC/AArch64/neon-add-sub-instructions.s
index 863798e..68f169b 100644
--- a/test/MC/AArch64/neon-add-sub-instructions.s
+++ b/test/MC/AArch64/neon-add-sub-instructions.s
@@ -64,19 +64,5 @@
 // CHECK: fsub v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xd4,0xa2,0x4e]
 // CHECK: fsub v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xd4,0xe2,0x4e]
 
-//------------------------------------------------------------------------------
-// Scalar Integer Add
-//------------------------------------------------------------------------------
-         add d31, d0, d16
-
-// CHECK: add d31, d0, d16       // encoding: [0x1f,0x84,0xf0,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Sub
-//------------------------------------------------------------------------------
-         sub d1, d7, d8
-
-// CHECK: sub d1, d7, d8       // encoding: [0xe1,0x84,0xe8,0x7e]
-
 
 
diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s
index c85db70..ff175a7 100644
--- a/test/MC/AArch64/neon-diagnostics.s
+++ b/test/MC/AArch64/neon-diagnostics.s
@@ -2747,3 +2747,105 @@
 // CHECK-ERROR:        rsubhn2 v0.4s, v1.2d, v2.2s
 // CHECK-ERROR:                                 ^
 
+//----------------------------------------------------------------------
+// Scalar Reduce Add Pairwise (Integer)
+//----------------------------------------------------------------------
+         // invalid vector types
+      addp s0, d1.2d
+      addp d0, d1.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          addp s0, d1.2d
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          addp d0, d1.2s
+// CHECK-ERROR:                      ^
+
+//----------------------------------------------------------------------
+// Scalar Reduce Add Pairwise (Floating Point)
+//----------------------------------------------------------------------
+         // invalid vector types
+      faddp s0, d1.2d
+      faddp d0, d1.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          faddp s0, d1.2d
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          faddp d0, d1.2s
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Reduce Maximum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+         // mismatched and invalid vector types
+      fmaxp s0, v1.2d
+      fmaxp d31, v2.2s
+      fmaxp h3, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmaxp s0, v1.2d
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmaxp d31, v2.2s
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmaxp h3, v2.2s
+// CHECK-ERROR:                ^
+
+
+//----------------------------------------------------------------------
+// Scalar Reduce Minimum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+         // mismatched and invalid vector types
+      fminp s0, v1.4h
+      fminp d31, v2.8h
+      fminp b3, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fminp s0, v1.4h
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fminp d31, v2.8h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fminp b3, v2.2s
+// CHECK-ERROR:                ^
+
+
+//----------------------------------------------------------------------
+// Scalar Reduce maxNum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+         // mismatched and invalid vector types
+      fmaxnmp s0, v1.8b
+      fmaxnmp d31, v2.16b
+      fmaxnmp v1.2s, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmaxnmp s0, v1.8b
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmaxnmp d31, v2.16b
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: too few operands for instruction
+// CHECK-ERROR:          fmaxnmp v1.2s, v2.2s
+// CHECK-ERROR:          ^
+
+//----------------------------------------------------------------------
+// Scalar Reduce minNum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+         // mismatched and invalid vector types
+      fminnmp s0, v1.2d
+      fminnmp d31, v2.4s
+      fminnmp v1.4s, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fminnmp s0, v1.2d
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fminnmp d31, v2.4s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fminnmp v1.4s, v2.2d
+// CHECK-ERROR:          ^
+
diff --git a/test/MC/AArch64/neon-rounding-shift.s b/test/MC/AArch64/neon-rounding-shift.s
index f3c70d7..e70f766 100644
--- a/test/MC/AArch64/neon-rounding-shift.s
+++ b/test/MC/AArch64/neon-rounding-shift.s
@@ -41,17 +41,5 @@
 // CHECK: urshl v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x54,0xa2,0x6e]
 // CHECK: urshl v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x54,0xe2,0x6e]
 
-//------------------------------------------------------------------------------
-// Scalar Integer Rounding Shift Lef (Signed)
-//------------------------------------------------------------------------------
-         srshl d17, d31, d8
-
-// CHECK: srshl d17, d31, d8      // encoding: [0xf1,0x57,0xe8,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Rounding Shift Lef (Unsigned)
-//------------------------------------------------------------------------------
-         urshl d17, d31, d8
 
-// CHECK: urshl d17, d31, d8      // encoding: [0xf1,0x57,0xe8,0x7e]
 
diff --git a/test/MC/AArch64/neon-saturating-add-sub.s b/test/MC/AArch64/neon-saturating-add-sub.s
index 1032ae4..4a7ed10 100644
--- a/test/MC/AArch64/neon-saturating-add-sub.s
+++ b/test/MC/AArch64/neon-saturating-add-sub.s
@@ -79,55 +79,4 @@
 // CHECK: uqsub v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x2c,0xa2,0x6e]
 // CHECK: uqsub v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x2c,0xe2,0x6e]
 
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Add (Signed)
-//------------------------------------------------------------------------------
-         sqadd b0, b1, b2
-         sqadd h10, h11, h12
-         sqadd s20, s21, s2
-         sqadd d17, d31, d8
-
-// CHECK: sqadd b0, b1, b2        // encoding: [0x20,0x0c,0x22,0x5e]
-// CHECK: sqadd h10, h11, h12     // encoding: [0x6a,0x0d,0x6c,0x5e]
-// CHECK: sqadd s20, s21, s2      // encoding: [0xb4,0x0e,0xa2,0x5e]
-// CHECK: sqadd d17, d31, d8      // encoding: [0xf1,0x0f,0xe8,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Add (Unsigned)
-//------------------------------------------------------------------------------
-         uqadd b0, b1, b2
-         uqadd h10, h11, h12
-         uqadd s20, s21, s2
-         uqadd d17, d31, d8
-
-// CHECK: uqadd b0, b1, b2        // encoding: [0x20,0x0c,0x22,0x7e]
-// CHECK: uqadd h10, h11, h12     // encoding: [0x6a,0x0d,0x6c,0x7e]
-// CHECK: uqadd s20, s21, s2      // encoding: [0xb4,0x0e,0xa2,0x7e]
-// CHECK: uqadd d17, d31, d8      // encoding: [0xf1,0x0f,0xe8,0x7e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Sub (Signed)
-//------------------------------------------------------------------------------
-         sqsub b0, b1, b2
-         sqsub h10, h11, h12
-         sqsub s20, s21, s2
-         sqsub d17, d31, d8
-
-// CHECK: sqsub b0, b1, b2        // encoding: [0x20,0x2c,0x22,0x5e]
-// CHECK: sqsub h10, h11, h12     // encoding: [0x6a,0x2d,0x6c,0x5e]
-// CHECK: sqsub s20, s21, s2      // encoding: [0xb4,0x2e,0xa2,0x5e]
-// CHECK: sqsub d17, d31, d8      // encoding: [0xf1,0x2f,0xe8,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Sub (Unsigned)
-//------------------------------------------------------------------------------
-         uqsub b0, b1, b2
-         uqsub h10, h11, h12
-         uqsub s20, s21, s2
-         uqsub d17, d31, d8
-
-// CHECK: uqsub b0, b1, b2        // encoding: [0x20,0x2c,0x22,0x7e]
-// CHECK: uqsub h10, h11, h12     // encoding: [0x6a,0x2d,0x6c,0x7e]
-// CHECK: uqsub s20, s21, s2      // encoding: [0xb4,0x2e,0xa2,0x7e]
-// CHECK: uqsub d17, d31, d8      // encoding: [0xf1,0x2f,0xe8,0x7e]
 
diff --git a/test/MC/AArch64/neon-saturating-rounding-shift.s b/test/MC/AArch64/neon-saturating-rounding-shift.s
index a36e689..9215c1c 100644
--- a/test/MC/AArch64/neon-saturating-rounding-shift.s
+++ b/test/MC/AArch64/neon-saturating-rounding-shift.s
@@ -41,30 +41,3 @@
 // CHECK: uqrshl v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x5c,0xa2,0x6e]
 // CHECK: uqrshl v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x5c,0xe2,0x6e]
 
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Rounding Shift Lef (Signed)
-//------------------------------------------------------------------------------
-         sqrshl b0, b1, b2
-         sqrshl h10, h11, h12
-         sqrshl s20, s21, s2
-         sqrshl d17, d31, d8
-
-// CHECK: sqrshl b0, b1, b2        // encoding: [0x20,0x5c,0x22,0x5e]
-// CHECK: sqrshl h10, h11, h12     // encoding: [0x6a,0x5d,0x6c,0x5e]
-// CHECK: sqrshl s20, s21, s2      // encoding: [0xb4,0x5e,0xa2,0x5e]
-// CHECK: sqrshl d17, d31, d8      // encoding: [0xf1,0x5f,0xe8,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Rounding Shift Lef (Unsigned)
-//------------------------------------------------------------------------------
-         uqrshl b0, b1, b2
-         uqrshl h10, h11, h12
-         uqrshl s20, s21, s2
-         uqrshl d17, d31, d8
-
-// CHECK: uqrshl b0, b1, b2        // encoding: [0x20,0x5c,0x22,0x7e]
-// CHECK: uqrshl h10, h11, h12     // encoding: [0x6a,0x5d,0x6c,0x7e]
-// CHECK: uqrshl s20, s21, s2      // encoding: [0xb4,0x5e,0xa2,0x7e]
-// CHECK: uqrshl d17, d31, d8      // encoding: [0xf1,0x5f,0xe8,0x7e]
-
-
diff --git a/test/MC/AArch64/neon-saturating-shift.s b/test/MC/AArch64/neon-saturating-shift.s
index 2c8456d..9ae393a 100644
--- a/test/MC/AArch64/neon-saturating-shift.s
+++ b/test/MC/AArch64/neon-saturating-shift.s
@@ -41,29 +41,3 @@
 // CHECK: uqshl v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x4c,0xa2,0x6e]
 // CHECK: uqshl v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x4c,0xe2,0x6e]
 
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Shift Lef (Signed)
-//------------------------------------------------------------------------------
-         sqshl b0, b1, b2
-         sqshl h10, h11, h12
-         sqshl s20, s21, s2
-         sqshl d17, d31, d8
-
-// CHECK: sqshl b0, b1, b2        // encoding: [0x20,0x4c,0x22,0x5e]
-// CHECK: sqshl h10, h11, h12     // encoding: [0x6a,0x4d,0x6c,0x5e]
-// CHECK: sqshl s20, s21, s2      // encoding: [0xb4,0x4e,0xa2,0x5e]
-// CHECK: sqshl d17, d31, d8      // encoding: [0xf1,0x4f,0xe8,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Shift Lef (Unsigned)
-//------------------------------------------------------------------------------
-         uqshl b0, b1, b2
-         uqshl h10, h11, h12
-         uqshl s20, s21, s2
-         uqshl d17, d31, d8
-
-// CHECK: uqshl b0, b1, b2        // encoding: [0x20,0x4c,0x22,0x7e]
-// CHECK: uqshl h10, h11, h12     // encoding: [0x6a,0x4d,0x6c,0x7e]
-// CHECK: uqshl s20, s21, s2      // encoding: [0xb4,0x4e,0xa2,0x7e]
-// CHECK: uqshl d17, d31, d8      // encoding: [0xf1,0x4f,0xe8,0x7e]
-
diff --git a/test/MC/AArch64/neon-scalar-add-sub.s b/test/MC/AArch64/neon-scalar-add-sub.s
new file mode 100644
index 0000000..0a3eba7
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-add-sub.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Scalar Integer Add
+//------------------------------------------------------------------------------
+         add d31, d0, d16
+
+// CHECK: add d31, d0, d16       // encoding: [0x1f,0x84,0xf0,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Sub
+//------------------------------------------------------------------------------
+         sub d1, d7, d8
+
+// CHECK: sub d1, d7, d8       // encoding: [0xe1,0x84,0xe8,0x7e]
+
diff --git a/test/MC/AArch64/neon-scalar-reduce-pairwise.s b/test/MC/AArch64/neon-scalar-reduce-pairwise.s
new file mode 100644
index 0000000..403a940
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-reduce-pairwise.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//----------------------------------------------------------------------
+// Scalar Reduce Add Pairwise (Integer)
+//----------------------------------------------------------------------
+      addp d0, v1.2d
+
+// CHECK: addp d0, v1.2d     // encoding: [0x20,0xb8,0xf1,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Reduce Add Pairwise (Floating Point)
+//----------------------------------------------------------------------
+      faddp d20, v1.2d
+
+// CHECK: faddp d20, v1.2d     // encoding: [0x34,0xd8,0x70,0x7e]
+
diff --git a/test/MC/AArch64/neon-scalar-rounding-shift.s b/test/MC/AArch64/neon-scalar-rounding-shift.s
new file mode 100644
index 0000000..6113e09
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-rounding-shift.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+
+//------------------------------------------------------------------------------
+// Scalar Integer Rounding Shift Lef (Signed)
+//------------------------------------------------------------------------------
+         srshl d17, d31, d8
+
+// CHECK: srshl d17, d31, d8      // encoding: [0xf1,0x57,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Rounding Shift Lef (Unsigned)
+//------------------------------------------------------------------------------
+         urshl d17, d31, d8
+
+// CHECK: urshl d17, d31, d8      // encoding: [0xf1,0x57,0xe8,0x7e]
+
diff --git a/test/MC/AArch64/neon-scalar-saturating-add-sub.s b/test/MC/AArch64/neon-scalar-saturating-add-sub.s
new file mode 100644
index 0000000..fc2d50c
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-saturating-add-sub.s
@@ -0,0 +1,54 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Add (Signed)
+//------------------------------------------------------------------------------
+         sqadd b0, b1, b2
+         sqadd h10, h11, h12
+         sqadd s20, s21, s2
+         sqadd d17, d31, d8
+
+// CHECK: sqadd b0, b1, b2        // encoding: [0x20,0x0c,0x22,0x5e]
+// CHECK: sqadd h10, h11, h12     // encoding: [0x6a,0x0d,0x6c,0x5e]
+// CHECK: sqadd s20, s21, s2      // encoding: [0xb4,0x0e,0xa2,0x5e]
+// CHECK: sqadd d17, d31, d8      // encoding: [0xf1,0x0f,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Add (Unsigned)
+//------------------------------------------------------------------------------
+         uqadd b0, b1, b2
+         uqadd h10, h11, h12
+         uqadd s20, s21, s2
+         uqadd d17, d31, d8
+
+// CHECK: uqadd b0, b1, b2        // encoding: [0x20,0x0c,0x22,0x7e]
+// CHECK: uqadd h10, h11, h12     // encoding: [0x6a,0x0d,0x6c,0x7e]
+// CHECK: uqadd s20, s21, s2      // encoding: [0xb4,0x0e,0xa2,0x7e]
+// CHECK: uqadd d17, d31, d8      // encoding: [0xf1,0x0f,0xe8,0x7e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Sub (Signed)
+//------------------------------------------------------------------------------
+         sqsub b0, b1, b2
+         sqsub h10, h11, h12
+         sqsub s20, s21, s2
+         sqsub d17, d31, d8
+
+// CHECK: sqsub b0, b1, b2        // encoding: [0x20,0x2c,0x22,0x5e]
+// CHECK: sqsub h10, h11, h12     // encoding: [0x6a,0x2d,0x6c,0x5e]
+// CHECK: sqsub s20, s21, s2      // encoding: [0xb4,0x2e,0xa2,0x5e]
+// CHECK: sqsub d17, d31, d8      // encoding: [0xf1,0x2f,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Sub (Unsigned)
+//------------------------------------------------------------------------------
+         uqsub b0, b1, b2
+         uqsub h10, h11, h12
+         uqsub s20, s21, s2
+         uqsub d17, d31, d8
+
+// CHECK: uqsub b0, b1, b2        // encoding: [0x20,0x2c,0x22,0x7e]
+// CHECK: uqsub h10, h11, h12     // encoding: [0x6a,0x2d,0x6c,0x7e]
+// CHECK: uqsub s20, s21, s2      // encoding: [0xb4,0x2e,0xa2,0x7e]
+// CHECK: uqsub d17, d31, d8      // encoding: [0xf1,0x2f,0xe8,0x7e]
+
diff --git a/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s b/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s
new file mode 100644
index 0000000..b09a589
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s
@@ -0,0 +1,28 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Rounding Shift Lef (Signed)
+//------------------------------------------------------------------------------
+         sqrshl b0, b1, b2
+         sqrshl h10, h11, h12
+         sqrshl s20, s21, s2
+         sqrshl d17, d31, d8
+
+// CHECK: sqrshl b0, b1, b2        // encoding: [0x20,0x5c,0x22,0x5e]
+// CHECK: sqrshl h10, h11, h12     // encoding: [0x6a,0x5d,0x6c,0x5e]
+// CHECK: sqrshl s20, s21, s2      // encoding: [0xb4,0x5e,0xa2,0x5e]
+// CHECK: sqrshl d17, d31, d8      // encoding: [0xf1,0x5f,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Rounding Shift Lef (Unsigned)
+//------------------------------------------------------------------------------
+         uqrshl b0, b1, b2
+         uqrshl h10, h11, h12
+         uqrshl s20, s21, s2
+         uqrshl d17, d31, d8
+
+// CHECK: uqrshl b0, b1, b2        // encoding: [0x20,0x5c,0x22,0x7e]
+// CHECK: uqrshl h10, h11, h12     // encoding: [0x6a,0x5d,0x6c,0x7e]
+// CHECK: uqrshl s20, s21, s2      // encoding: [0xb4,0x5e,0xa2,0x7e]
+// CHECK: uqrshl d17, d31, d8      // encoding: [0xf1,0x5f,0xe8,0x7e]
+
diff --git a/test/MC/AArch64/neon-scalar-saturating-shift.s b/test/MC/AArch64/neon-scalar-saturating-shift.s
new file mode 100644
index 0000000..b53c9f0
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-saturating-shift.s
@@ -0,0 +1,29 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Shift Lef (Signed)
+//------------------------------------------------------------------------------
+         sqshl b0, b1, b2
+         sqshl h10, h11, h12
+         sqshl s20, s21, s2
+         sqshl d17, d31, d8
+
+// CHECK: sqshl b0, b1, b2        // encoding: [0x20,0x4c,0x22,0x5e]
+// CHECK: sqshl h10, h11, h12     // encoding: [0x6a,0x4d,0x6c,0x5e]
+// CHECK: sqshl s20, s21, s2      // encoding: [0xb4,0x4e,0xa2,0x5e]
+// CHECK: sqshl d17, d31, d8      // encoding: [0xf1,0x4f,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Shift Lef (Unsigned)
+//------------------------------------------------------------------------------
+         uqshl b0, b1, b2
+         uqshl h10, h11, h12
+         uqshl s20, s21, s2
+         uqshl d17, d31, d8
+
+// CHECK: uqshl b0, b1, b2        // encoding: [0x20,0x4c,0x22,0x7e]
+// CHECK: uqshl h10, h11, h12     // encoding: [0x6a,0x4d,0x6c,0x7e]
+// CHECK: uqshl s20, s21, s2      // encoding: [0xb4,0x4e,0xa2,0x7e]
+// CHECK: uqshl d17, d31, d8      // encoding: [0xf1,0x4f,0xe8,0x7e]
+
+
diff --git a/test/MC/AArch64/neon-scalar-shift.s b/test/MC/AArch64/neon-scalar-shift.s
new file mode 100644
index 0000000..366840a
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-shift.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Scalar Integer Shift Lef (Signed)
+//------------------------------------------------------------------------------
+         sshl d17, d31, d8
+
+// CHECK: sshl d17, d31, d8      // encoding: [0xf1,0x47,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Shift Lef (Unsigned)
+//------------------------------------------------------------------------------
+         ushl d17, d31, d8
+
+// CHECK: ushl d17, d31, d8      // encoding: [0xf1,0x47,0xe8,0x7e]
+
diff --git a/test/MC/AArch64/neon-shift.s b/test/MC/AArch64/neon-shift.s
index 23d687c..614e6de 100644
--- a/test/MC/AArch64/neon-shift.s
+++ b/test/MC/AArch64/neon-shift.s
@@ -42,20 +42,6 @@
 // CHECK: ushl v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x44,0xe2,0x6e]
 
 //------------------------------------------------------------------------------
-// Scalar Integer Shift Lef (Signed)
-//------------------------------------------------------------------------------
-         sshl d17, d31, d8
-
-// CHECK: sshl d17, d31, d8      // encoding: [0xf1,0x47,0xe8,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Shift Lef (Unsigned)
-//------------------------------------------------------------------------------
-         ushl d17, d31, d8
-
-// CHECK: ushl d17, d31, d8      // encoding: [0xf1,0x47,0xe8,0x7e]
-
-//------------------------------------------------------------------------------
 // Vector Integer Shift Left by Immediate
 //------------------------------------------------------------------------------
          shl v0.8b, v1.8b, #3