aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/llvm/CodeGen/ValueTypes.h91
-rw-r--r--include/llvm/CodeGen/ValueTypes.td73
-rw-r--r--include/llvm/IR/Intrinsics.td3
-rw-r--r--include/llvm/IR/IntrinsicsAArch64.td79
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp3
-rw-r--r--lib/IR/Function.cpp7
-rw-r--r--lib/IR/ValueTypes.cpp6
-rw-r--r--lib/Target/AArch64/AArch64CallingConv.td9
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp11
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td21
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td16
-rw-r--r--lib/Target/AArch64/AArch64InstrNEON.td253
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.td8
-rw-r--r--test/CodeGen/AArch64/neon-add-sub.ll12
-rw-r--r--test/CodeGen/AArch64/neon-copy.ll2
-rw-r--r--test/CodeGen/AArch64/neon-rounding-shift.ll17
-rw-r--r--test/CodeGen/AArch64/neon-saturating-add-sub.ll33
-rw-r--r--test/CodeGen/AArch64/neon-saturating-rounding-shift.ll17
-rw-r--r--test/CodeGen/AArch64/neon-saturating-shift.ll17
-rw-r--r--test/CodeGen/AArch64/neon-scalar-add-sub.ll50
-rw-r--r--test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll103
-rw-r--r--test/CodeGen/AArch64/neon-scalar-rounding-shift.ll39
-rw-r--r--test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll171
-rw-r--r--test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll94
-rw-r--r--test/CodeGen/AArch64/neon-scalar-saturating-shift.ll88
-rw-r--r--test/CodeGen/AArch64/neon-scalar-shift.ll38
-rw-r--r--test/CodeGen/AArch64/neon-shift.ll17
-rw-r--r--test/MC/AArch64/neon-add-pairwise.s1
-rw-r--r--test/MC/AArch64/neon-add-sub-instructions.s14
-rw-r--r--test/MC/AArch64/neon-diagnostics.s102
-rw-r--r--test/MC/AArch64/neon-rounding-shift.s12
-rw-r--r--test/MC/AArch64/neon-saturating-add-sub.s51
-rw-r--r--test/MC/AArch64/neon-saturating-rounding-shift.s27
-rw-r--r--test/MC/AArch64/neon-saturating-shift.s26
-rw-r--r--test/MC/AArch64/neon-scalar-add-sub.s16
-rw-r--r--test/MC/AArch64/neon-scalar-reduce-pairwise.s16
-rw-r--r--test/MC/AArch64/neon-scalar-rounding-shift.s17
-rw-r--r--test/MC/AArch64/neon-scalar-saturating-add-sub.s54
-rw-r--r--test/MC/AArch64/neon-scalar-saturating-rounding-shift.s28
-rw-r--r--test/MC/AArch64/neon-scalar-saturating-shift.s29
-rw-r--r--test/MC/AArch64/neon-scalar-shift.s16
-rw-r--r--test/MC/AArch64/neon-shift.s14
-rw-r--r--utils/TableGen/CodeGenTarget.cpp3
-rw-r--r--utils/TableGen/IntrinsicEmitter.cpp4
44 files changed, 1320 insertions, 388 deletions
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index 82b8d8a..18b324f 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -67,41 +67,44 @@ namespace llvm {
v32i1 = 17, // 32 x i1
v64i1 = 18, // 64 x i1
- v2i8 = 19, // 2 x i8
- v4i8 = 20, // 4 x i8
- v8i8 = 21, // 8 x i8
- v16i8 = 22, // 16 x i8
- v32i8 = 23, // 32 x i8
- v64i8 = 24, // 64 x i8
- v1i16 = 25, // 1 x i16
- v2i16 = 26, // 2 x i16
- v4i16 = 27, // 4 x i16
- v8i16 = 28, // 8 x i16
- v16i16 = 29, // 16 x i16
- v32i16 = 30, // 32 x i16
- v1i32 = 31, // 1 x i32
- v2i32 = 32, // 2 x i32
- v4i32 = 33, // 4 x i32
- v8i32 = 34, // 8 x i32
- v16i32 = 35, // 16 x i32
- v1i64 = 36, // 1 x i64
- v2i64 = 37, // 2 x i64
- v4i64 = 38, // 4 x i64
- v8i64 = 39, // 8 x i64
- v16i64 = 40, // 16 x i64
+ v1i8 = 19, // 1 x i8
+ v2i8 = 20, // 2 x i8
+ v4i8 = 21, // 4 x i8
+ v8i8 = 22, // 8 x i8
+ v16i8 = 23, // 16 x i8
+ v32i8 = 24, // 32 x i8
+ v64i8 = 25, // 64 x i8
+ v1i16 = 26, // 1 x i16
+ v2i16 = 27, // 2 x i16
+ v4i16 = 28, // 4 x i16
+ v8i16 = 29, // 8 x i16
+ v16i16 = 30, // 16 x i16
+ v32i16 = 31, // 32 x i16
+ v1i32 = 32, // 1 x i32
+ v2i32 = 33, // 2 x i32
+ v4i32 = 34, // 4 x i32
+ v8i32 = 35, // 8 x i32
+ v16i32 = 36, // 16 x i32
+ v1i64 = 37, // 1 x i64
+ v2i64 = 38, // 2 x i64
+ v4i64 = 39, // 4 x i64
+ v8i64 = 40, // 8 x i64
+ v16i64 = 41, // 16 x i64
FIRST_INTEGER_VECTOR_VALUETYPE = v2i1,
LAST_INTEGER_VECTOR_VALUETYPE = v16i64,
- v2f16 = 41, // 2 x f16
- v8f16 = 42, // 8 x f16
- v2f32 = 43, // 2 x f32
- v4f32 = 44, // 4 x f32
- v8f32 = 45, // 8 x f32
- v16f32 = 46, // 16 x f32
- v2f64 = 47, // 2 x f64
- v4f64 = 48, // 4 x f64
- v8f64 = 49, // 8 x f64
+ v2f16 = 42, // 2 x f16
+ v8f16 = 43, // 8 x f16
+ v1f32 = 44, // 1 x f32
+ v2f32 = 45, // 2 x f32
+ v4f32 = 46, // 4 x f32
+ v8f32 = 47, // 8 x f32
+ v16f32 = 48, // 16 x f32
+ v1f64 = 49, // 1 x f64
+ v2f64 = 50, // 2 x f64
+ v4f64 = 51, // 4 x f64
+ v8f64 = 52, // 8 x f64
FIRST_FP_VECTOR_VALUETYPE = v2f16,
LAST_FP_VECTOR_VALUETYPE = v8f64,
@@ -109,17 +112,17 @@ namespace llvm {
FIRST_VECTOR_VALUETYPE = v2i1,
LAST_VECTOR_VALUETYPE = v8f64,
- x86mmx = 50, // This is an X86 MMX value
+ x86mmx = 53, // This is an X86 MMX value
- Glue = 51, // This glues nodes together during pre-RA sched
+ Glue = 54, // This glues nodes together during pre-RA sched
- isVoid = 52, // This has no value
+ isVoid = 55, // This has no value
- Untyped = 53, // This value takes a register, but has
+ Untyped = 56, // This value takes a register, but has
// unspecified type. The register class
// will be determined by the opcode.
- LAST_VALUETYPE = 54, // This always remains at the end of the list.
+ LAST_VALUETYPE = 57, // This always remains at the end of the list.
// This is the current maximum for LAST_VALUETYPE.
// MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
@@ -266,6 +269,7 @@ namespace llvm {
case v16i1 :
case v32i1 :
case v64i1: return i1;
+ case v1i8 :
case v2i8 :
case v4i8 :
case v8i8 :
@@ -290,10 +294,12 @@ namespace llvm {
case v16i64: return i64;
case v2f16:
case v8f16: return f16;
+ case v1f32:
case v2f32:
case v4f32:
case v8f32:
case v16f32: return f32;
+ case v1f64:
case v2f64:
case v4f64:
case v8f64: return f64;
@@ -338,9 +344,12 @@ namespace llvm {
case v2f16:
case v2f32:
case v2f64: return 2;
+ case v1i8:
case v1i16:
case v1i32:
- case v1i64: return 1;
+ case v1i64:
+ case v1f32:
+ case v1f64: return 1;
}
}
@@ -363,6 +372,7 @@ namespace llvm {
case v2i1: return 2;
case v4i1: return 4;
case i8 :
+ case v1i8:
case v8i1: return 8;
case i16 :
case f16:
@@ -375,6 +385,7 @@ namespace llvm {
case v4i8:
case v2i16:
case v2f16:
+ case v1f32:
case v1i32: return 32;
case x86mmx:
case f64 :
@@ -384,7 +395,8 @@ namespace llvm {
case v4i16:
case v2i32:
case v1i64:
- case v2f32: return 64;
+ case v2f32:
+ case v1f64: return 64;
case f80 : return 80;
case f128:
case ppcf128:
@@ -494,6 +506,7 @@ namespace llvm {
if (NumElements == 64) return MVT::v64i1;
break;
case MVT::i8:
+ if (NumElements == 1) return MVT::v1i8;
if (NumElements == 2) return MVT::v2i8;
if (NumElements == 4) return MVT::v4i8;
if (NumElements == 8) return MVT::v8i8;
@@ -528,12 +541,14 @@ namespace llvm {
if (NumElements == 8) return MVT::v8f16;
break;
case MVT::f32:
+ if (NumElements == 1) return MVT::v1f32;
if (NumElements == 2) return MVT::v2f32;
if (NumElements == 4) return MVT::v4f32;
if (NumElements == 8) return MVT::v8f32;
if (NumElements == 16) return MVT::v16f32;
break;
case MVT::f64:
+ if (NumElements == 1) return MVT::v1f64;
if (NumElements == 2) return MVT::v2f64;
if (NumElements == 4) return MVT::v4f64;
if (NumElements == 8) return MVT::v8f64;
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index 28ad936..415dbed 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -39,44 +39,47 @@ def v8i1 : ValueType<8 , 15>; // 8 x i1 vector value
def v16i1 : ValueType<16, 16>; // 16 x i1 vector value
def v32i1 : ValueType<32 , 17>; // 32 x i1 vector value
def v64i1 : ValueType<64 , 18>; // 64 x i1 vector value
-def v2i8 : ValueType<16 , 19>; // 2 x i8 vector value
-def v4i8 : ValueType<32 , 20>; // 4 x i8 vector value
-def v8i8 : ValueType<64 , 21>; // 8 x i8 vector value
-def v16i8 : ValueType<128, 22>; // 16 x i8 vector value
-def v32i8 : ValueType<256, 23>; // 32 x i8 vector value
-def v64i8 : ValueType<512, 24>; // 64 x i8 vector value
-def v1i16 : ValueType<16 , 25>; // 1 x i16 vector value
-def v2i16 : ValueType<32 , 26>; // 2 x i16 vector value
-def v4i16 : ValueType<64 , 27>; // 4 x i16 vector value
-def v8i16 : ValueType<128, 28>; // 8 x i16 vector value
-def v16i16 : ValueType<256, 29>; // 16 x i16 vector value
-def v32i16 : ValueType<512, 30>; // 32 x i16 vector value
-def v1i32 : ValueType<32 , 31>; // 1 x i32 vector value
-def v2i32 : ValueType<64 , 32>; // 2 x i32 vector value
-def v4i32 : ValueType<128, 33>; // 4 x i32 vector value
-def v8i32 : ValueType<256, 34>; // 8 x i32 vector value
-def v16i32 : ValueType<512, 35>; // 16 x i32 vector value
-def v1i64 : ValueType<64 , 36>; // 1 x i64 vector value
-def v2i64 : ValueType<128, 37>; // 2 x i64 vector value
-def v4i64 : ValueType<256, 38>; // 4 x i64 vector value
-def v8i64 : ValueType<512, 39>; // 8 x i64 vector value
-def v16i64 : ValueType<1024,40>; // 16 x i64 vector value
+def v1i8 : ValueType<16, 19>; // 1 x i8 vector value
+def v2i8 : ValueType<16 , 20>; // 2 x i8 vector value
+def v4i8 : ValueType<32 , 21>; // 4 x i8 vector value
+def v8i8 : ValueType<64 , 22>; // 8 x i8 vector value
+def v16i8 : ValueType<128, 23>; // 16 x i8 vector value
+def v32i8 : ValueType<256, 24>; // 32 x i8 vector value
+def v64i8 : ValueType<512, 25>; // 64 x i8 vector value
+def v1i16 : ValueType<16 , 26>; // 1 x i16 vector value
+def v2i16 : ValueType<32 , 27>; // 2 x i16 vector value
+def v4i16 : ValueType<64 , 28>; // 4 x i16 vector value
+def v8i16 : ValueType<128, 29>; // 8 x i16 vector value
+def v16i16 : ValueType<256, 30>; // 16 x i16 vector value
+def v32i16 : ValueType<512, 31>; // 32 x i16 vector value
+def v1i32 : ValueType<32 , 32>; // 1 x i32 vector value
+def v2i32 : ValueType<64 , 33>; // 2 x i32 vector value
+def v4i32 : ValueType<128, 34>; // 4 x i32 vector value
+def v8i32 : ValueType<256, 35>; // 8 x i32 vector value
+def v16i32 : ValueType<512, 36>; // 16 x i32 vector value
+def v1i64 : ValueType<64 , 37>; // 1 x i64 vector value
+def v2i64 : ValueType<128, 38>; // 2 x i64 vector value
+def v4i64 : ValueType<256, 39>; // 4 x i64 vector value
+def v8i64 : ValueType<512, 40>; // 8 x i64 vector value
+def v16i64 : ValueType<1024,41>; // 16 x i64 vector value
-def v2f16 : ValueType<32 , 41>; // 2 x f16 vector value
-def v8f16 : ValueType<128, 42>; // 8 x f16 vector value
-def v2f32 : ValueType<64 , 43>; // 2 x f32 vector value
-def v4f32 : ValueType<128, 44>; // 4 x f32 vector value
-def v8f32 : ValueType<256, 45>; // 8 x f32 vector value
-def v16f32 : ValueType<512, 46>; // 16 x f32 vector value
-def v2f64 : ValueType<128, 47>; // 2 x f64 vector value
-def v4f64 : ValueType<256, 48>; // 4 x f64 vector value
-def v8f64 : ValueType<512, 49>; // 8 x f64 vector value
+def v2f16 : ValueType<32 , 42>; // 2 x f16 vector value
+def v8f16 : ValueType<128, 43>; // 8 x f16 vector value
+def v1f32 : ValueType<32 , 44>; // 1 x f32 vector value
+def v2f32 : ValueType<64 , 45>; // 2 x f32 vector value
+def v4f32 : ValueType<128, 46>; // 4 x f32 vector value
+def v8f32 : ValueType<256, 47>; // 8 x f32 vector value
+def v16f32 : ValueType<512, 48>; // 16 x f32 vector value
+def v1f64 : ValueType<64, 49>; // 1 x f64 vector value
+def v2f64 : ValueType<128, 50>; // 2 x f64 vector value
+def v4f64 : ValueType<256, 51>; // 4 x f64 vector value
+def v8f64 : ValueType<512, 52>; // 8 x f64 vector value
-def x86mmx : ValueType<64 , 50>; // X86 MMX value
-def FlagVT : ValueType<0 , 51>; // Pre-RA sched glue
-def isVoid : ValueType<0 , 52>; // Produces no value
-def untyped: ValueType<8 , 53>; // Produces an untyped value
+def x86mmx : ValueType<64 , 53>; // X86 MMX value
+def FlagVT : ValueType<0 , 54>; // Pre-RA sched glue
+def isVoid : ValueType<0 , 55>; // Produces no value
+def untyped: ValueType<8 , 56>; // Produces an untyped value
def MetadataVT: ValueType<0, 250>; // Metadata
// Pseudo valuetype mapped to the current pointer size to any address space.
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
index c7414e0..30cd4be 100644
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -140,6 +140,7 @@ def llvm_v8i1_ty : LLVMType<v8i1>; // 8 x i1
def llvm_v16i1_ty : LLVMType<v16i1>; // 16 x i1
def llvm_v32i1_ty : LLVMType<v32i1>; // 32 x i1
def llvm_v64i1_ty : LLVMType<v64i1>; // 64 x i1
+def llvm_v1i8_ty : LLVMType<v1i8>; // 1 x i8
def llvm_v2i8_ty : LLVMType<v2i8>; // 2 x i8
def llvm_v4i8_ty : LLVMType<v4i8>; // 4 x i8
def llvm_v8i8_ty : LLVMType<v8i8>; // 8 x i8
@@ -166,10 +167,12 @@ def llvm_v8i64_ty : LLVMType<v8i64>; // 8 x i64
def llvm_v16i64_ty : LLVMType<v16i64>; // 16 x i64
def llvm_v8f16_ty : LLVMType<v8f16>; // 8 x half (__fp16)
+def llvm_v1f32_ty : LLVMType<v1f32>; // 1 x float
def llvm_v2f32_ty : LLVMType<v2f32>; // 2 x float
def llvm_v4f32_ty : LLVMType<v4f32>; // 4 x float
def llvm_v8f32_ty : LLVMType<v8f32>; // 8 x float
def llvm_v16f32_ty : LLVMType<v16f32>; // 16 x float
+def llvm_v1f64_ty : LLVMType<v1f64>; // 1 x double
def llvm_v2f64_ty : LLVMType<v2f64>; // 2 x double
def llvm_v4f64_ty : LLVMType<v4f64>; // 4 x double
def llvm_v8f64_ty : LLVMType<v8f64>; // 8 x double
diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td
index 0a71ea4..4f7252d 100644
--- a/include/llvm/IR/IntrinsicsAArch64.td
+++ b/include/llvm/IR/IntrinsicsAArch64.td
@@ -17,12 +17,10 @@
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
// Vector Absolute Compare (Floating Point)
-def int_aarch64_neon_vacgeq : Intrinsic<[llvm_v2i64_ty],
- [llvm_v2f64_ty, llvm_v2f64_ty],
- [IntrNoMem]>;
-def int_aarch64_neon_vacgtq : Intrinsic<[llvm_v2i64_ty],
- [llvm_v2f64_ty, llvm_v2f64_ty],
- [IntrNoMem]>;
+def int_aarch64_neon_vacgeq :
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
+def int_aarch64_neon_vacgtq :
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
// Vector maxNum (Floating Point)
def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic;
@@ -66,4 +64,73 @@ def int_aarch64_neon_vsqshrn : Neon_N2V_Narrow_Intrinsic;
def int_aarch64_neon_vuqshrn : Neon_N2V_Narrow_Intrinsic;
def int_aarch64_neon_vsqrshrn : Neon_N2V_Narrow_Intrinsic;
def int_aarch64_neon_vuqrshrn : Neon_N2V_Narrow_Intrinsic;
+
+// Scalar Add
+def int_aarch64_neon_vaddds :
+ Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
+def int_aarch64_neon_vadddu :
+ Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
+
+// Scalar Saturating Add (Signed, Unsigned)
+def int_aarch64_neon_vqadds : Neon_2Arg_Intrinsic;
+def int_aarch64_neon_vqaddu : Neon_2Arg_Intrinsic;
+
+// Scalar Sub
+def int_aarch64_neon_vsubds :
+ Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
+def int_aarch64_neon_vsubdu :
+ Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
+
+// Scalar Saturating Sub (Signed, Unsigned)
+def int_aarch64_neon_vqsubs : Neon_2Arg_Intrinsic;
+def int_aarch64_neon_vqsubu : Neon_2Arg_Intrinsic;
+
+// Scalar Shift
+// Scalar Shift Left
+def int_aarch64_neon_vshlds :
+ Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
+def int_aarch64_neon_vshldu :
+ Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
+
+// Scalar Saturating Shift Left
+def int_aarch64_neon_vqshls : Neon_2Arg_Intrinsic;
+def int_aarch64_neon_vqshlu : Neon_2Arg_Intrinsic;
+
+// Scalar Shift Rouding Left
+def int_aarch64_neon_vrshlds :
+ Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
+def int_aarch64_neon_vrshldu :
+ Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
+
+// Scalar Saturating Rounding Shift Left
+def int_aarch64_neon_vqrshls : Neon_2Arg_Intrinsic;
+def int_aarch64_neon_vqrshlu : Neon_2Arg_Intrinsic;
+
+// Scalar Reduce Pairwise Add.
+def int_aarch64_neon_vpadd :
+ Intrinsic<[llvm_v1i64_ty], [llvm_v2i64_ty],[IntrNoMem]>;
+def int_aarch64_neon_vpfadd :
+ Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
+def int_aarch64_neon_vpfaddq :
+ Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+
+// Scalar Reduce Pairwise Floating Point Max/Min.
+def int_aarch64_neon_vpmax :
+ Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
+def int_aarch64_neon_vpmaxq :
+ Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+def int_aarch64_neon_vpmin :
+ Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
+def int_aarch64_neon_vpminq :
+ Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+
+// Scalar Reduce Pairwise Floating Point Maxnm/Minnm.
+def int_aarch64_neon_vpfmaxnm :
+ Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
+def int_aarch64_neon_vpfmaxnmq :
+ Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+def int_aarch64_neon_vpfminnm :
+ Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
+def int_aarch64_neon_vpfminnmq :
+ Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 5df36dd..50bd6c7 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -919,7 +919,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
// type does not have a strange size (eg: it is not i1).
EVT VecVT = N->getValueType(0);
unsigned NumElts = VecVT.getVectorNumElements();
- assert(!(NumElts & 1) && "Legal vector of one illegal element?");
+ assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) &&
+ "Legal vector of one illegal element?");
// Promote the inserted value. The type does not need to match the
// vector element type. Check that any extra bits introduced will be
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index a64a4fa..f4bf774 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -453,7 +453,8 @@ enum IIT_Info {
IIT_STRUCT5 = 22,
IIT_EXTEND_VEC_ARG = 23,
IIT_TRUNC_VEC_ARG = 24,
- IIT_ANYPTR = 25
+ IIT_ANYPTR = 25,
+ IIT_V1 = 26
};
@@ -497,6 +498,10 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
case IIT_I64:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 64));
return;
+ case IIT_V1:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 1));
+ DecodeIITType(NextElt, Infos, OutputTable);
+ return;
case IIT_V2:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 2));
DecodeIITType(NextElt, Infos, OutputTable);
diff --git a/lib/IR/ValueTypes.cpp b/lib/IR/ValueTypes.cpp
index 5aa4d06..3740050 100644
--- a/lib/IR/ValueTypes.cpp
+++ b/lib/IR/ValueTypes.cpp
@@ -134,6 +134,7 @@ std::string EVT::getEVTString() const {
case MVT::v16i1: return "v16i1";
case MVT::v32i1: return "v32i1";
case MVT::v64i1: return "v64i1";
+ case MVT::v1i8: return "v1i8";
case MVT::v2i8: return "v2i8";
case MVT::v4i8: return "v4i8";
case MVT::v8i8: return "v8i8";
@@ -156,12 +157,14 @@ std::string EVT::getEVTString() const {
case MVT::v4i64: return "v4i64";
case MVT::v8i64: return "v8i64";
case MVT::v16i64: return "v16i64";
+ case MVT::v1f32: return "v1f32";
case MVT::v2f32: return "v2f32";
case MVT::v2f16: return "v2f16";
case MVT::v8f16: return "v8f16";
case MVT::v4f32: return "v4f32";
case MVT::v8f32: return "v8f32";
case MVT::v16f32: return "v16f32";
+ case MVT::v1f64: return "v1f64";
case MVT::v2f64: return "v2f64";
case MVT::v4f64: return "v4f64";
case MVT::v8f64: return "v8f64";
@@ -198,6 +201,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v16i1: return VectorType::get(Type::getInt1Ty(Context), 16);
case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32);
case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64);
+ case MVT::v1i8: return VectorType::get(Type::getInt8Ty(Context), 1);
case MVT::v2i8: return VectorType::get(Type::getInt8Ty(Context), 2);
case MVT::v4i8: return VectorType::get(Type::getInt8Ty(Context), 4);
case MVT::v8i8: return VectorType::get(Type::getInt8Ty(Context), 8);
@@ -222,10 +226,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v16i64: return VectorType::get(Type::getInt64Ty(Context), 16);
case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2);
case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8);
+ case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1);
case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2);
case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4);
case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8);
case MVT::v16f32: return VectorType::get(Type::getFloatTy(Context), 16);
+ case MVT::v1f64: return VectorType::get(Type::getDoubleTy(Context), 1);
case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2);
case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4);
case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8);
diff --git a/lib/Target/AArch64/AArch64CallingConv.td b/lib/Target/AArch64/AArch64CallingConv.td
index bff7eeb..a2a9f3f 100644
--- a/lib/Target/AArch64/AArch64CallingConv.td
+++ b/lib/Target/AArch64/AArch64CallingConv.td
@@ -59,9 +59,9 @@ def CC_A64_APCS : CallingConv<[
// Canonicalise the various types that live in different floating-point
// registers. This makes sense because the PCS does not distinguish Short
// Vectors and Floating-point types.
- CCIfType<[v2i8], CCBitConvertToType<f16>>,
- CCIfType<[v4i8, v2i16], CCBitConvertToType<f32>>,
- CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64], CCBitConvertToType<f64>>,
+ CCIfType<[v1i16, v2i8], CCBitConvertToType<f16>>,
+ CCIfType<[v1i32, v4i8, v2i16, v1f32], CCBitConvertToType<f32>>,
+ CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64, v1f64], CCBitConvertToType<f64>>,
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCBitConvertToType<f128>>,
@@ -70,7 +70,8 @@ def CC_A64_APCS : CallingConv<[
// argument is allocated to the least significant bits of register
// v[NSRN]. The NSRN is incremented by one. The argument has now been
// allocated."
- CCIfType<[f16], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>,
+ CCIfType<[v1i8], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>,
+ CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8597f07..48f34c0 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -57,6 +57,12 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
if (Subtarget->hasNEON()) {
// And the vectors
+ addRegisterClass(MVT::v1i8, &AArch64::FPR8RegClass);
+ addRegisterClass(MVT::v1i16, &AArch64::FPR16RegClass);
+ addRegisterClass(MVT::v1i32, &AArch64::FPR32RegClass);
+ addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
+ addRegisterClass(MVT::v1f32, &AArch64::FPR32RegClass);
+ addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass);
addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass);
addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass);
addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass);
@@ -274,16 +280,21 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setExceptionSelectorRegister(AArch64::X1);
if (Subtarget->hasNEON()) {
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1f64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index 735670b..4f48712 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1074,8 +1074,7 @@ class NeonI_2VMisc<bit q, bit u, bits<2> size, bits<5> opcode,
class NeonI_2VShiftImm<bit q, bit u, bits<5> opcode,
dag outs, dag ins, string asmstr,
list<dag> patterns, InstrItinClass itin>
- : A64InstRdn<outs, ins, asmstr, patterns, itin>
-{
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
bits<7> Imm;
let Inst{31} = 0b0;
let Inst{30} = q;
@@ -1129,5 +1128,23 @@ class NeonI_insert<bit q, bit op,
// Inherit Rd in 4-0
}
+// Format AdvSIMD scalar pairwise
+class NeonI_ScalarPair<bit u, bits<2> size, bits<5> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = 0b0;
+ let Inst{30} = 0b1;
+ let Inst{29} = u;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b11000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index fef3019..2332799 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2189,22 +2189,22 @@ def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>;
// Extra patterns for when we're allowed to optimise separate multiplication and
// addition.
let Predicates = [UseFusedMAC] in {
-def : Pat<(fadd FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)),
+def : Pat<(f32 (fadd FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))),
(FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
-def : Pat<(fsub FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)),
+def : Pat<(f32 (fsub FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))),
(FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
-def : Pat<(fsub (fmul FPR32:$Rn, FPR32:$Rm), FPR32:$Ra),
+def : Pat<(f32 (fsub (f32 (fmul FPR32:$Rn, FPR32:$Rm)), FPR32:$Ra)),
(FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
-def : Pat<(fsub (fneg FPR32:$Ra), (fmul FPR32:$Rn, FPR32:$Rm)),
+def : Pat<(f32 (fsub (f32 (fneg FPR32:$Ra)), (f32 (fmul FPR32:$Rn, FPR32:$Rm)))),
(FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
-def : Pat<(fadd FPR64:$Ra, (fmul (f64 FPR64:$Rn), FPR64:$Rm)),
+def : Pat<(f64 (fadd FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))),
(FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-def : Pat<(fsub FPR64:$Ra, (fmul (f64 FPR64:$Rn), FPR64:$Rm)),
+def : Pat<(f64 (fsub FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))),
(FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-def : Pat<(fsub (fmul (f64 FPR64:$Rn), FPR64:$Rm), FPR64:$Ra),
+def : Pat<(f64 (fsub (f64 (fmul FPR64:$Rn, FPR64:$Rm)), FPR64:$Ra)),
(FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-def : Pat<(fsub (fneg (f64 FPR64:$Ra)), (fmul FPR64:$Rn, FPR64:$Rm)),
+def : Pat<(f64 (fsub (f64 (fneg FPR64:$Ra)), (f64 (fmul FPR64:$Rn, FPR64:$Rm)))),
(FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
}
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index 5506aff..4bd5a67 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -2504,11 +2504,12 @@ defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
multiclass NeonI_Op_High<SDPatternOperator op>
{
def _16B : PatFrag<(ops node:$Rn, node:$Rm),
- (op (Neon_top16B node:$Rn), (Neon_top16B node:$Rm))>;
+ (op (v8i8 (Neon_top16B node:$Rn)), (v8i8 (Neon_top16B node:$Rm)))>;
def _8H : PatFrag<(ops node:$Rn, node:$Rm),
- (op (Neon_top8H node:$Rn), (Neon_top8H node:$Rm))>;
+ (op (v4i16 (Neon_top8H node:$Rn)), (v4i16 (Neon_top8H node:$Rm)))>;
def _4S : PatFrag<(ops node:$Rn, node:$Rm),
- (op (Neon_top4S node:$Rn), (Neon_top4S node:$Rm))>;
+ (op (v2i32 (Neon_top4S node:$Rn)), (v2i32 (Neon_top4S node:$Rm)))>;
+
}
defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
@@ -2868,9 +2869,25 @@ multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
}
}
-class Neon_Scalar_D_size_patterns<SDPatternOperator opnode, Instruction INSTD>
- : Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
- (INSTD VPR64:$Rn, VPR64:$Rm)>;
+multiclass Neon_Scalar_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD> {
+ def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
+ (INSTD FPR64:$Rn, FPR64:$Rm)>;
+}
+
+multiclass Neon_Scalar_BHSD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTB, Instruction INSTH,
+ Instruction INSTS, Instruction INSTD>
+ : Neon_Scalar_D_size_patterns<opnode, INSTD> {
+ def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
+ (INSTB FPR8:$Rn, FPR8:$Rm)>;
+
+ def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
+ (INSTH FPR16:$Rn, FPR16:$Rm)>;
+
+ def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
+ (INSTS FPR32:$Rn, FPR32:$Rm)>;
+}
// Scalar Integer Add
let isCommutable = 1 in {
@@ -2880,9 +2897,15 @@ def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
// Scalar Integer Sub
def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
-// Pattern for Scalar Integer Add and Sub with D register
-def : Neon_Scalar_D_size_patterns<add, ADDddd>;
-def : Neon_Scalar_D_size_patterns<sub, SUBddd>;
+// Pattern for Scalar Integer Add and Sub with D register only
+defm : Neon_Scalar_D_size_patterns<add, ADDddd>;
+defm : Neon_Scalar_D_size_patterns<sub, SUBddd>;
+
+// Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
+defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
+defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
+defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
+defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
// Scalar Integer Saturating Add (Signed, Unsigned)
defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
@@ -2892,40 +2915,160 @@ defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
-// Patterns for Scalar Integer Saturating Add, Sub with D register only
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
+// Patterns to match llvm.arm.* intrinsic for
+// Scalar Integer Saturating Add, Sub (Signed, Unsigned)
+defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
+defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
+defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
+defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
+
+// Patterns to match llvm.aarch64.* intrinsic for
+// Scalar Integer Saturating Add, Sub (Signed, Unsigned)
+defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqadds, SQADDbbb, SQADDhhh,
+ SQADDsss, SQADDddd>;
+defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqaddu, UQADDbbb, UQADDhhh,
+ UQADDsss, UQADDddd>;
+defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqsubs, SQSUBbbb, SQSUBhhh,
+ SQSUBsss, SQSUBddd>;
+defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqsubu, UQSUBbbb, UQSUBhhh,
+ UQSUBsss, UQSUBddd>;
// Scalar Integer Shift Left (Signed, Unsigned)
def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
+// Patterns to match llvm.arm.* intrinsic for
+// Scalar Integer Shift Left (Signed, Unsigned)
+defm : Neon_Scalar_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
+defm : Neon_Scalar_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
+
+// Patterns to match llvm.aarch64.* intrinsic for
+// Scalar Integer Shift Left (Signed, Unsigned)
+defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
+defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
+
// Scalar Integer Saturating Shift Left (Signed, Unsigned)
defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
-// Scalar Integer Rouding Shift Left (Signed, Unsigned)
+// Patterns to match llvm.aarch64.* intrinsic for
+// Scalar Integer Saturating Shift Letf (Signed, Unsigned)
+defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb, SQSHLhhh,
+ SQSHLsss, SQSHLddd>;
+defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb, UQSHLhhh,
+ UQSHLsss, UQSHLddd>;
+
+// Patterns to match llvm.arm.* intrinsic for
+// Scalar Integer Saturating Shift Letf (Signed, Unsigned)
+defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
+defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
+
+// Scalar Integer Rounding Shift Left (Signed, Unsigned)
def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
+// Patterns to match llvm.aarch64.* intrinsic for
+// Scalar Integer Rounding Shift Left (Signed, Unsigned)
+defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
+defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
+
+// Patterns to match llvm.arm.* intrinsic for
+// Scalar Integer Rounding Shift Left (Signed, Unsigned)
+defm : Neon_Scalar_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
+defm : Neon_Scalar_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
+
// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
-// Patterns for Scalar Integer Shift Lef, Saturating Shift Left,
-// Rounding Shift Left, Rounding Saturating Shift Left with D register only
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
-def : Neon_Scalar_D_size_patterns<shl, SSHLddd>;
-def : Neon_Scalar_D_size_patterns<shl, USHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
+// Patterns to match llvm.aarch64.* intrinsic for
+// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
+defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb, SQRSHLhhh,
+ SQRSHLsss, SQRSHLddd>;
+defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb, UQRSHLhhh,
+ UQRSHLsss, UQRSHLddd>;
+
+// Patterns to match llvm.arm.* intrinsic for
+// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
+defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
+defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
+
+// Scalar Reduce Pairwise
+
+multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
+ string asmop, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
+ (outs FPR64:$Rd), (ins VPR128:$Rn),
+ !strconcat(asmop, " $Rd, $Rn.2d"),
+ [],
+ NoItinerary>;
+ }
+}
+
+multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
+ string asmop, bit Commutable = 0>
+ : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
+ let isCommutable = Commutable in {
+ def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
+ (outs FPR32:$Rd), (ins VPR64:$Rn),
+ !strconcat(asmop, " $Rd, $Rn.2s"),
+ [],
+ NoItinerary>;
+ }
+}
+
+// Scalar Reduce Addition Pairwise (Integer) with
+// Pattern to match llvm.arm.* intrinsic
+defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
+
+// Pattern to match llvm.aarch64.* intrinsic for
+// Scalar Reduce Addition Pairwise (Integer)
+def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
+ (ADDPvv_D_2D VPR128:$Rn)>;
+
+// Scalar Reduce Addition Pairwise (Floating Point)
+defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
+
+// Scalar Reduce Maximum Pairwise (Floating Point)
+defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
+
+// Scalar Reduce Minimum Pairwise (Floating Point)
+defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
+
+// Scalar Reduce maxNum Pairwise (Floating Point)
+defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
+
+// Scalar Reduce minNum Pairwise (Floating Point)
+defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
+
+multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnodeS,
+ SDPatternOperator opnodeD,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))),
+ (INSTS VPR64:$Rn)>;
+ def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))),
+ (INSTD VPR128:$Rn)>;
+}
+
+// Patterns to match llvm.aarch64.* intrinsic for
+// Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
+ int_aarch64_neon_vpfaddq, FADDPvv_S_2S, FADDPvv_D_2D>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
+ int_aarch64_neon_vpmaxq, FMAXPvv_S_2S, FMAXPvv_D_2D>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
+ int_aarch64_neon_vpminq, FMINPvv_S_2S, FMINPvv_D_2D>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
+ int_aarch64_neon_vpfmaxnmq, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
+ int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
+
//===----------------------------------------------------------------------===//
@@ -2999,6 +3142,14 @@ def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>;
// ...and scalar bitcasts...
+def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>;
+def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>;
+def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
+def : Pat<(f32 (bitconvert (v1f32 FPR32:$src))), (f32 FPR32:$src)>;
+def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
+
+def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>;
+def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>;
def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
@@ -3017,6 +3168,15 @@ def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>;
def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>;
def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>;
+def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>;
+def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>;
+def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1f32 (bitconvert (f32 FPR32:$src))), (v1f32 FPR32:$src)>;
+def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
+
+def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
+def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>;
+
def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
@@ -3349,8 +3509,6 @@ def UMOVwh_pattern : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
neon_uimm2_bare, UMOVwh>;
def UMOVws_pattern : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
neon_uimm1_bare, UMOVws>;
-def UMOVxd_pattern : Neon_UMOV_pattern<v2i64, v1i64, i64, neon_uimm1_bare,
- neon_uimm0_bare, UMOVxd>;
def : Pat<(i32 (and
(i32 (vector_extract
@@ -3389,3 +3547,40 @@ def : Pat<(i64 (zext
(UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
neon_uimm0_bare:$Imm)>;
+// Additional copy patterns for scalar types
+def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
+ (UMOVwb (v16i8
+ (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
+
+def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
+ (UMOVwh (v8i16
+ (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
+
+def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
+ (FMOVws FPR32:$Rn)>;
+
+def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
+ (FMOVxd FPR64:$Rn)>;
+
+def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
+ (f64 FPR64:$Rn)>;
+
+def : Pat<(f32 (vector_extract (v1f32 FPR32:$Rn), (i64 0))),
+ (f32 FPR32:$Rn)>;
+
+def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
+ (v1i8 (EXTRACT_SUBREG (v16i8
+ (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
+ sub_8))>;
+
+def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
+ (v1i16 (EXTRACT_SUBREG (v8i16
+ (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
+ sub_16))>;
+
+def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
+ (FMOVsw $src)>;
+
+def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
+ (FMOVdx $src)>;
+
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
index e0eca23..089cc08 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -133,19 +133,19 @@ foreach Index = 0-31 in {
}
-def FPR8 : RegisterClass<"AArch64", [i8], 8,
+def FPR8 : RegisterClass<"AArch64", [i8, v1i8], 8,
(sequence "B%u", 0, 31)> {
}
-def FPR16 : RegisterClass<"AArch64", [f16], 16,
+def FPR16 : RegisterClass<"AArch64", [f16, v1i16], 16,
(sequence "H%u", 0, 31)> {
}
-def FPR32 : RegisterClass<"AArch64", [f32], 32,
+def FPR32 : RegisterClass<"AArch64", [f32, v1i32, v1f32], 32,
(sequence "S%u", 0, 31)> {
}
-def FPR64 : RegisterClass<"AArch64", [f64, v2f32, v2i32, v4i16, v8i8, v1i64],
+def FPR64 : RegisterClass<"AArch64", [f64, v2f32, v2i32, v4i16, v8i8, v1i64, v1f64],
64, (sequence "D%u", 0, 31)>;
def FPR128 : RegisterClass<"AArch64",
diff --git a/test/CodeGen/AArch64/neon-add-sub.ll b/test/CodeGen/AArch64/neon-add-sub.ll
index 65ec8a2..566e029 100644
--- a/test/CodeGen/AArch64/neon-add-sub.ll
+++ b/test/CodeGen/AArch64/neon-add-sub.ll
@@ -118,15 +118,3 @@ define <2 x double> @sub2xdouble(<2 x double> %A, <2 x double> %B) {
ret <2 x double> %tmp3
}
-define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) {
-;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
- %tmp3 = add <1 x i64> %A, %B;
- ret <1 x i64> %tmp3
-}
-
-define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) {
-;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
- %tmp3 = sub <1 x i64> %A, %B;
- ret <1 x i64> %tmp3
-}
-
diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll
index c2854ed..2c50059 100644
--- a/test/CodeGen/AArch64/neon-copy.ll
+++ b/test/CodeGen/AArch64/neon-copy.ll
@@ -146,7 +146,7 @@ define i32 @umovw2s(<2 x i32> %tmp1) {
}
define i64 @umovx1d(<1 x i64> %tmp1) {
-;CHECK: umov {{x[0-31]+}}, {{v[0-31]+}}.d[0]
+;CHECK: fmov {{x[0-31]+}}, {{d[0-31]+}}
%tmp3 = extractelement <1 x i64> %tmp1, i32 0
ret i64 %tmp3
}
diff --git a/test/CodeGen/AArch64/neon-rounding-shift.ll b/test/CodeGen/AArch64/neon-rounding-shift.ll
index 404e491..5b4ec28 100644
--- a/test/CodeGen/AArch64/neon-rounding-shift.ll
+++ b/test/CodeGen/AArch64/neon-rounding-shift.ll
@@ -102,23 +102,6 @@ define <4 x i32> @test_srshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
ret <4 x i32> %tmp1
}
-declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_urshl_v1i64:
- %tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: urshl d0, d0, d1
- ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_srshl_v1i64:
- %tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: srshl d0, d0, d1
- ret <1 x i64> %tmp1
-}
-
declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>)
diff --git a/test/CodeGen/AArch64/neon-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-saturating-add-sub.ll
index b2fac1f..fc60d90 100644
--- a/test/CodeGen/AArch64/neon-saturating-add-sub.ll
+++ b/test/CodeGen/AArch64/neon-saturating-add-sub.ll
@@ -102,22 +102,7 @@ define <4 x i32> @test_sqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
ret <4 x i32> %tmp1
}
-declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqadd_v1i64:
- %tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: uqadd d0, d0, d1
- ret <1 x i64> %tmp1
-}
-define <1 x i64> @test_sqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqadd_v1i64:
- %tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sqadd d0, d0, d1
- ret <1 x i64> %tmp1
-}
declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
@@ -254,21 +239,3 @@ define <2 x i64> @test_sqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
; CHECK: sqsub v0.2d, v0.2d, v1.2d
ret <2 x i64> %tmp1
}
-
-declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqsub_v1i64:
- %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: uqsub d0, d0, d1
- ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqsub_v1i64:
- %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sqsub d0, d0, d1
- ret <1 x i64> %tmp1
-}
-
diff --git a/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll b/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll
index 05d8dfe..d89262c 100644
--- a/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll
+++ b/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll
@@ -102,23 +102,6 @@ define <4 x i32> @test_sqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
ret <4 x i32> %tmp1
}
-declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqrshl_v1i64:
- %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: uqrshl d0, d0, d1
- ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqrshl_v1i64:
- %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sqrshl d0, d0, d1
- ret <1 x i64> %tmp1
-}
-
declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>)
diff --git a/test/CodeGen/AArch64/neon-saturating-shift.ll b/test/CodeGen/AArch64/neon-saturating-shift.ll
index 3b7f78c..11009fb 100644
--- a/test/CodeGen/AArch64/neon-saturating-shift.ll
+++ b/test/CodeGen/AArch64/neon-saturating-shift.ll
@@ -102,23 +102,6 @@ define <4 x i32> @test_sqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
ret <4 x i32> %tmp1
}
-declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqshl_v1i64:
- %tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: uqshl d0, d0, d1
- ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqshl_v1i64:
- %tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sqshl d0, d0, d1
- ret <1 x i64> %tmp1
-}
-
declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>)
diff --git a/test/CodeGen/AArch64/neon-scalar-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-add-sub.ll
new file mode 100644
index 0000000..09ca880
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-add-sub.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) {
+;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ %tmp3 = add <1 x i64> %A, %B;
+ ret <1 x i64> %tmp3
+}
+
+define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) {
+;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ %tmp3 = sub <1 x i64> %A, %B;
+ ret <1 x i64> %tmp3
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_add_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_add_v1i64:
+ %tmp1 = call <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_uadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uadd_v1i64:
+ %tmp1 = call <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_sub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sub_v1i64:
+ %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_usub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_usub_v1i64:
+ %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
+
+
+
diff --git a/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll b/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll
new file mode 100644
index 0000000..309997b
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll
@@ -0,0 +1,103 @@
+; RUN: llc -march=aarch64 -mattr=+neon < %s | FileCheck %s
+
+declare <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64>)
+
+define <1 x i64> @test_addp_v1i64(<2 x i64> %a) {
+; CHECK: test_addp_v1i64:
+ %val = call <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64> %a)
+; CHECK: addp d0, v0.2d
+ ret <1 x i64> %val
+}
+
+declare <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float>)
+
+define <1 x float> @test_faddp_v1f32(<2 x float> %a) {
+; CHECK: test_faddp_v1f32:
+ %val = call <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float> %a)
+; CHECK: faddp s0, v0.2s
+ ret <1 x float> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double>)
+
+define <1 x double> @test_faddp_v1f64(<2 x double> %a) {
+; CHECK: test_faddp_v1f64:
+ %val = call <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double> %a)
+; CHECK: faddp d0, v0.2d
+ ret <1 x double> %val
+}
+
+
+declare <1 x float> @llvm.aarch64.neon.vpmax(<2 x float>)
+
+define <1 x float> @test_fmaxp_v1f32(<2 x float> %a) {
+; CHECK: test_fmaxp_v1f32:
+ %val = call <1 x float> @llvm.aarch64.neon.vpmax(<2 x float> %a)
+; CHECK: fmaxp s0, v0.2s
+ ret <1 x float> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double>)
+
+define <1 x double> @test_fmaxp_v1f64(<2 x double> %a) {
+; CHECK: test_fmaxp_v1f64:
+ %val = call <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double> %a)
+; CHECK: fmaxp d0, v0.2d
+ ret <1 x double> %val
+}
+
+
+declare <1 x float> @llvm.aarch64.neon.vpmin(<2 x float>)
+
+define <1 x float> @test_fminp_v1f32(<2 x float> %a) {
+; CHECK: test_fminp_v1f32:
+ %val = call <1 x float> @llvm.aarch64.neon.vpmin(<2 x float> %a)
+; CHECK: fminp s0, v0.2s
+ ret <1 x float> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.vpminq(<2 x double>)
+
+define <1 x double> @test_fminp_v1f64(<2 x double> %a) {
+; CHECK: test_fminp_v1f64:
+ %val = call <1 x double> @llvm.aarch64.neon.vpminq(<2 x double> %a)
+; CHECK: fminp d0, v0.2d
+ ret <1 x double> %val
+}
+
+declare <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float>)
+
+define <1 x float> @test_fmaxnmp_v1f32(<2 x float> %a) {
+; CHECK: test_fmaxnmp_v1f32:
+ %val = call <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float> %a)
+; CHECK: fmaxnmp s0, v0.2s
+ ret <1 x float> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double>)
+
+define <1 x double> @test_fmaxnmp_v1f64(<2 x double> %a) {
+; CHECK: test_fmaxnmp_v1f64:
+ %val = call <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double> %a)
+; CHECK: fmaxnmp d0, v0.2d
+ ret <1 x double> %val
+}
+
+declare <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float>)
+
+define <1 x float> @test_fminnmp_v1f32(<2 x float> %a) {
+; CHECK: test_fminnmp_v1f32:
+ %val = call <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float> %a)
+; CHECK: fminnmp s0, v0.2s
+ ret <1 x float> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double>)
+
+define <1 x double> @test_fminnmp_v1f64(<2 x double> %a) {
+; CHECK: test_fminnmp_v1f64:
+ %val = call <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double> %a)
+; CHECK: fminnmp d0, v0.2d
+ ret <1 x double> %val
+}
+
diff --git a/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll b/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll
new file mode 100644
index 0000000..83ceb4e
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+
+declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_urshl_v1i64:
+ %tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: urshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_srshl_v1i64:
+ %tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: srshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_urshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_urshl_v1i64_aarch64:
+ %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: urshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_srshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_srshl_v1i64_aarch64:
+ %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: srshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
+
+
+
diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll
new file mode 100644
index 0000000..9e12978
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll
@@ -0,0 +1,171 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqadd_v1i64:
+ %tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: uqadd d0, d0, d1
+ ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqadd_v1i64:
+ %tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: sqadd d0, d0, d1
+ ret <1 x i64> %tmp1
+}
+
+declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqsub_v1i64:
+ %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: uqsub d0, d0, d1
+ ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqsub_v1i64:
+ %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: sqsub d0, d0, d1
+ ret <1 x i64> %tmp1
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vqaddu.v1i8(<1 x i8>, <1 x i8>)
+declare <1 x i8> @llvm.aarch64.neon.vqadds.v1i8(<1 x i8>, <1 x i8>)
+
+define <1 x i8> @test_uqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_uqadd_v1i8_aarch64:
+ %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqaddu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: uqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+ ret <1 x i8> %tmp1
+}
+
+define <1 x i8> @test_sqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_sqadd_v1i8_aarch64:
+ %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqadds.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: sqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+ ret <1 x i8> %tmp1
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vqsubu.v1i8(<1 x i8>, <1 x i8>)
+declare <1 x i8> @llvm.aarch64.neon.vqsubs.v1i8(<1 x i8>, <1 x i8>)
+
+define <1 x i8> @test_uqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_uqsub_v1i8_aarch64:
+ %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqsubu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: uqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+ ret <1 x i8> %tmp1
+}
+
+define <1 x i8> @test_sqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_sqsub_v1i8_aarch64:
+ %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqsubs.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: sqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+ ret <1 x i8> %tmp1
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vqaddu.v1i16(<1 x i16>, <1 x i16>)
+declare <1 x i16> @llvm.aarch64.neon.vqadds.v1i16(<1 x i16>, <1 x i16>)
+
+define <1 x i16> @test_uqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_uqadd_v1i16_aarch64:
+ %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqaddu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: uqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+ ret <1 x i16> %tmp1
+}
+
+define <1 x i16> @test_sqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_sqadd_v1i16_aarch64:
+ %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqadds.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: sqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+ ret <1 x i16> %tmp1
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vqsubu.v1i16(<1 x i16>, <1 x i16>)
+declare <1 x i16> @llvm.aarch64.neon.vqsubs.v1i16(<1 x i16>, <1 x i16>)
+
+define <1 x i16> @test_uqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_uqsub_v1i16_aarch64:
+ %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqsubu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: uqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+ ret <1 x i16> %tmp1
+}
+
+define <1 x i16> @test_sqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_sqsub_v1i16_aarch64:
+ %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqsubs.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: sqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+ ret <1 x i16> %tmp1
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqaddu.v1i32(<1 x i32>, <1 x i32>)
+declare <1 x i32> @llvm.aarch64.neon.vqadds.v1i32(<1 x i32>, <1 x i32>)
+
+define <1 x i32> @test_uqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_uqadd_v1i32_aarch64:
+ %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqaddu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: uqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+ ret <1 x i32> %tmp1
+}
+
+define <1 x i32> @test_sqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_sqadd_v1i32_aarch64:
+ %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqadds.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: sqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+ ret <1 x i32> %tmp1
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqsubu.v1i32(<1 x i32>, <1 x i32>)
+declare <1 x i32> @llvm.aarch64.neon.vqsubs.v1i32(<1 x i32>, <1 x i32>)
+
+define <1 x i32> @test_uqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_uqsub_v1i32_aarch64:
+ %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqsubu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: uqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+ ret <1 x i32> %tmp1
+}
+
+define <1 x i32> @test_sqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_sqsub_v1i32_aarch64:
+ %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqsubs.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: sqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+ ret <1 x i32> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqadd_v1i64_aarch64:
+ %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqadd_v1i64_aarch64:
+ %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqsub_v1i64_aarch64:
+ %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqsub_v1i64_aarch64:
+ %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll b/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll
new file mode 100644
index 0000000..0fd67df
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll
@@ -0,0 +1,94 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqrshl_v1i64:
+ %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+
+ ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqrshl_v1i64:
+ %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8>, <1 x i8>)
+declare <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8>, <1 x i8>)
+
+define <1 x i8> @test_uqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_uqrshl_v1i8_aarch64:
+ %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: uqrshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+
+ ret <1 x i8> %tmp1
+}
+
+define <1 x i8> @test_sqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_sqrshl_v1i8_aarch64:
+ %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: sqrshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+ ret <1 x i8> %tmp1
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16>, <1 x i16>)
+declare <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16>, <1 x i16>)
+
+define <1 x i16> @test_uqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_uqrshl_v1i16_aarch64:
+ %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: uqrshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+
+ ret <1 x i16> %tmp1
+}
+
+define <1 x i16> @test_sqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_sqrshl_v1i16_aarch64:
+ %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: sqrshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+ ret <1 x i16> %tmp1
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32>, <1 x i32>)
+declare <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32>, <1 x i32>)
+
+define <1 x i32> @test_uqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_uqrshl_v1i32_aarch64:
+ %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: uqrshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+
+ ret <1 x i32> %tmp1
+}
+
+define <1 x i32> @test_sqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_sqrshl_v1i32_aarch64:
+ %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: sqrshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+ ret <1 x i32> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqrshl_v1i64_aarch64:
+ %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+
+ ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqrshl_v1i64_aarch64:
+ %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
+
+
+
diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll b/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll
new file mode 100644
index 0000000..8fdea24
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll
@@ -0,0 +1,88 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqshl_v1i64:
+ %tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqshl_v1i64:
+ %tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8>, <1 x i8>)
+declare <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8>, <1 x i8>)
+
+define <1 x i8> @test_uqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_uqshl_v1i8_aarch64:
+ %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: uqshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+ ret <1 x i8> %tmp1
+}
+
+define <1 x i8> @test_sqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_sqshl_v1i8_aarch64:
+ %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: sqshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+ ret <1 x i8> %tmp1
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16>, <1 x i16>)
+declare <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16>, <1 x i16>)
+
+define <1 x i16> @test_uqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_uqshl_v1i16_aarch64:
+ %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: uqshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+ ret <1 x i16> %tmp1
+}
+
+define <1 x i16> @test_sqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_sqshl_v1i16_aarch64:
+ %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: sqshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+ ret <1 x i16> %tmp1
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32>, <1 x i32>)
+declare <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32>, <1 x i32>)
+
+define <1 x i32> @test_uqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_uqshl_v1i32_aarch64:
+ %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: uqshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+ ret <1 x i32> %tmp1
+}
+
+define <1 x i32> @test_sqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_sqshl_v1i32_aarch64:
+ %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: sqshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+ ret <1 x i32> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqshl_v1i64_aarch64:
+ %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqshl_v1i64_aarch64:
+ %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
+
+
diff --git a/test/CodeGen/AArch64/neon-scalar-shift.ll b/test/CodeGen/AArch64/neon-scalar-shift.ll
new file mode 100644
index 0000000..1222be5
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-shift.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_ushl_v1i64:
+ %tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: ushl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+
+ ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sshl_v1i64:
+ %tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: sshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_ushl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_ushl_v1i64_aarch64:
+ %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: ushl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sshl_v1i64_aarch64:
+ %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: sshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+ ret <1 x i64> %tmp1
+}
+
+
diff --git a/test/CodeGen/AArch64/neon-shift.ll b/test/CodeGen/AArch64/neon-shift.ll
index 1b8b941..33b04ce 100644
--- a/test/CodeGen/AArch64/neon-shift.ll
+++ b/test/CodeGen/AArch64/neon-shift.ll
@@ -102,23 +102,6 @@ define <4 x i32> @test_sshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
ret <4 x i32> %tmp1
}
-declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_ushl_v1i64:
- %tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: ushl d0, d0, d1
- ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sshl_v1i64:
- %tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sshl d0, d0, d1
- ret <1 x i64> %tmp1
-}
-
declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64>, <2 x i64>)
diff --git a/test/MC/AArch64/neon-add-pairwise.s b/test/MC/AArch64/neon-add-pairwise.s
index b586c22..df9938b 100644
--- a/test/MC/AArch64/neon-add-pairwise.s
+++ b/test/MC/AArch64/neon-add-pairwise.s
@@ -32,4 +32,3 @@
// CHECK: faddp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0x22,0x2e]
// CHECK: faddp v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0x22,0x6e]
// CHECK: faddp v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0x62,0x6e]
-
diff --git a/test/MC/AArch64/neon-add-sub-instructions.s b/test/MC/AArch64/neon-add-sub-instructions.s
index 863798e..68f169b 100644
--- a/test/MC/AArch64/neon-add-sub-instructions.s
+++ b/test/MC/AArch64/neon-add-sub-instructions.s
@@ -64,19 +64,5 @@
// CHECK: fsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0xa2,0x4e]
// CHECK: fsub v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0xe2,0x4e]
-//------------------------------------------------------------------------------
-// Scalar Integer Add
-//------------------------------------------------------------------------------
- add d31, d0, d16
-
-// CHECK: add d31, d0, d16 // encoding: [0x1f,0x84,0xf0,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Sub
-//------------------------------------------------------------------------------
- sub d1, d7, d8
-
-// CHECK: sub d1, d7, d8 // encoding: [0xe1,0x84,0xe8,0x7e]
-
diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s
index c85db70..ff175a7 100644
--- a/test/MC/AArch64/neon-diagnostics.s
+++ b/test/MC/AArch64/neon-diagnostics.s
@@ -2747,3 +2747,105 @@
// CHECK-ERROR: rsubhn2 v0.4s, v1.2d, v2.2s
// CHECK-ERROR: ^
+//----------------------------------------------------------------------
+// Scalar Reduce Add Pairwise (Integer)
+//----------------------------------------------------------------------
+ // invalid vector types
+ addp s0, d1.2d
+ addp d0, d1.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: addp s0, d1.2d
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: addp d0, d1.2s
+// CHECK-ERROR: ^
+
+//----------------------------------------------------------------------
+// Scalar Reduce Add Pairwise (Floating Point)
+//----------------------------------------------------------------------
+ // invalid vector types
+ faddp s0, d1.2d
+ faddp d0, d1.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: faddp s0, d1.2d
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: faddp d0, d1.2s
+// CHECK-ERROR: ^
+
+//----------------------------------------------------------------------
+// Scalar Reduce Maximum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+ // mismatched and invalid vector types
+ fmaxp s0, v1.2d
+ fmaxp d31, v2.2s
+ fmaxp h3, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: fmaxp s0, v1.2d
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: fmaxp d31, v2.2s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: fmaxp h3, v2.2s
+// CHECK-ERROR: ^
+
+
+//----------------------------------------------------------------------
+// Scalar Reduce Minimum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+ // mismatched and invalid vector types
+ fminp s0, v1.4h
+ fminp d31, v2.8h
+ fminp b3, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: fminp s0, v1.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: fminp d31, v2.8h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: fminp b3, v2.2s
+// CHECK-ERROR: ^
+
+
+//----------------------------------------------------------------------
+// Scalar Reduce maxNum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+ // mismatched and invalid vector types
+ fmaxnmp s0, v1.8b
+ fmaxnmp d31, v2.16b
+ fmaxnmp v1.2s, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: fmaxnmp s0, v1.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: fmaxnmp d31, v2.16b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: too few operands for instruction
+// CHECK-ERROR: fmaxnmp v1.2s, v2.2s
+// CHECK-ERROR: ^
+
+//----------------------------------------------------------------------
+// Scalar Reduce minNum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+ // mismatched and invalid vector types
+ fminnmp s0, v1.2d
+ fminnmp d31, v2.4s
+ fminnmp v1.4s, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: fminnmp s0, v1.2d
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: fminnmp d31, v2.4s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: fminnmp v1.4s, v2.2d
+// CHECK-ERROR: ^
+
diff --git a/test/MC/AArch64/neon-rounding-shift.s b/test/MC/AArch64/neon-rounding-shift.s
index f3c70d7..e70f766 100644
--- a/test/MC/AArch64/neon-rounding-shift.s
+++ b/test/MC/AArch64/neon-rounding-shift.s
@@ -41,17 +41,5 @@
// CHECK: urshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x54,0xa2,0x6e]
// CHECK: urshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x54,0xe2,0x6e]
-//------------------------------------------------------------------------------
-// Scalar Integer Rounding Shift Lef (Signed)
-//------------------------------------------------------------------------------
- srshl d17, d31, d8
-
-// CHECK: srshl d17, d31, d8 // encoding: [0xf1,0x57,0xe8,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Rounding Shift Lef (Unsigned)
-//------------------------------------------------------------------------------
- urshl d17, d31, d8
-// CHECK: urshl d17, d31, d8 // encoding: [0xf1,0x57,0xe8,0x7e]
diff --git a/test/MC/AArch64/neon-saturating-add-sub.s b/test/MC/AArch64/neon-saturating-add-sub.s
index 1032ae4..4a7ed10 100644
--- a/test/MC/AArch64/neon-saturating-add-sub.s
+++ b/test/MC/AArch64/neon-saturating-add-sub.s
@@ -79,55 +79,4 @@
// CHECK: uqsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0x2c,0xa2,0x6e]
// CHECK: uqsub v0.2d, v1.2d, v2.2d // encoding: [0x20,0x2c,0xe2,0x6e]
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Add (Signed)
-//------------------------------------------------------------------------------
- sqadd b0, b1, b2
- sqadd h10, h11, h12
- sqadd s20, s21, s2
- sqadd d17, d31, d8
-
-// CHECK: sqadd b0, b1, b2 // encoding: [0x20,0x0c,0x22,0x5e]
-// CHECK: sqadd h10, h11, h12 // encoding: [0x6a,0x0d,0x6c,0x5e]
-// CHECK: sqadd s20, s21, s2 // encoding: [0xb4,0x0e,0xa2,0x5e]
-// CHECK: sqadd d17, d31, d8 // encoding: [0xf1,0x0f,0xe8,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Add (Unsigned)
-//------------------------------------------------------------------------------
- uqadd b0, b1, b2
- uqadd h10, h11, h12
- uqadd s20, s21, s2
- uqadd d17, d31, d8
-
-// CHECK: uqadd b0, b1, b2 // encoding: [0x20,0x0c,0x22,0x7e]
-// CHECK: uqadd h10, h11, h12 // encoding: [0x6a,0x0d,0x6c,0x7e]
-// CHECK: uqadd s20, s21, s2 // encoding: [0xb4,0x0e,0xa2,0x7e]
-// CHECK: uqadd d17, d31, d8 // encoding: [0xf1,0x0f,0xe8,0x7e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Sub (Signed)
-//------------------------------------------------------------------------------
- sqsub b0, b1, b2
- sqsub h10, h11, h12
- sqsub s20, s21, s2
- sqsub d17, d31, d8
-
-// CHECK: sqsub b0, b1, b2 // encoding: [0x20,0x2c,0x22,0x5e]
-// CHECK: sqsub h10, h11, h12 // encoding: [0x6a,0x2d,0x6c,0x5e]
-// CHECK: sqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x5e]
-// CHECK: sqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Sub (Unsigned)
-//------------------------------------------------------------------------------
- uqsub b0, b1, b2
- uqsub h10, h11, h12
- uqsub s20, s21, s2
- uqsub d17, d31, d8
-
-// CHECK: uqsub b0, b1, b2 // encoding: [0x20,0x2c,0x22,0x7e]
-// CHECK: uqsub h10, h11, h12 // encoding: [0x6a,0x2d,0x6c,0x7e]
-// CHECK: uqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x7e]
-// CHECK: uqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x7e]
diff --git a/test/MC/AArch64/neon-saturating-rounding-shift.s b/test/MC/AArch64/neon-saturating-rounding-shift.s
index a36e689..9215c1c 100644
--- a/test/MC/AArch64/neon-saturating-rounding-shift.s
+++ b/test/MC/AArch64/neon-saturating-rounding-shift.s
@@ -41,30 +41,3 @@
// CHECK: uqrshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x5c,0xa2,0x6e]
// CHECK: uqrshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x5c,0xe2,0x6e]
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Rounding Shift Lef (Signed)
-//------------------------------------------------------------------------------
- sqrshl b0, b1, b2
- sqrshl h10, h11, h12
- sqrshl s20, s21, s2
- sqrshl d17, d31, d8
-
-// CHECK: sqrshl b0, b1, b2 // encoding: [0x20,0x5c,0x22,0x5e]
-// CHECK: sqrshl h10, h11, h12 // encoding: [0x6a,0x5d,0x6c,0x5e]
-// CHECK: sqrshl s20, s21, s2 // encoding: [0xb4,0x5e,0xa2,0x5e]
-// CHECK: sqrshl d17, d31, d8 // encoding: [0xf1,0x5f,0xe8,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Rounding Shift Lef (Unsigned)
-//------------------------------------------------------------------------------
- uqrshl b0, b1, b2
- uqrshl h10, h11, h12
- uqrshl s20, s21, s2
- uqrshl d17, d31, d8
-
-// CHECK: uqrshl b0, b1, b2 // encoding: [0x20,0x5c,0x22,0x7e]
-// CHECK: uqrshl h10, h11, h12 // encoding: [0x6a,0x5d,0x6c,0x7e]
-// CHECK: uqrshl s20, s21, s2 // encoding: [0xb4,0x5e,0xa2,0x7e]
-// CHECK: uqrshl d17, d31, d8 // encoding: [0xf1,0x5f,0xe8,0x7e]
-
-
diff --git a/test/MC/AArch64/neon-saturating-shift.s b/test/MC/AArch64/neon-saturating-shift.s
index 2c8456d..9ae393a 100644
--- a/test/MC/AArch64/neon-saturating-shift.s
+++ b/test/MC/AArch64/neon-saturating-shift.s
@@ -41,29 +41,3 @@
// CHECK: uqshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x4c,0xa2,0x6e]
// CHECK: uqshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x4c,0xe2,0x6e]
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Shift Lef (Signed)
-//------------------------------------------------------------------------------
- sqshl b0, b1, b2
- sqshl h10, h11, h12
- sqshl s20, s21, s2
- sqshl d17, d31, d8
-
-// CHECK: sqshl b0, b1, b2 // encoding: [0x20,0x4c,0x22,0x5e]
-// CHECK: sqshl h10, h11, h12 // encoding: [0x6a,0x4d,0x6c,0x5e]
-// CHECK: sqshl s20, s21, s2 // encoding: [0xb4,0x4e,0xa2,0x5e]
-// CHECK: sqshl d17, d31, d8 // encoding: [0xf1,0x4f,0xe8,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Saturating Shift Lef (Unsigned)
-//------------------------------------------------------------------------------
- uqshl b0, b1, b2
- uqshl h10, h11, h12
- uqshl s20, s21, s2
- uqshl d17, d31, d8
-
-// CHECK: uqshl b0, b1, b2 // encoding: [0x20,0x4c,0x22,0x7e]
-// CHECK: uqshl h10, h11, h12 // encoding: [0x6a,0x4d,0x6c,0x7e]
-// CHECK: uqshl s20, s21, s2 // encoding: [0xb4,0x4e,0xa2,0x7e]
-// CHECK: uqshl d17, d31, d8 // encoding: [0xf1,0x4f,0xe8,0x7e]
-
diff --git a/test/MC/AArch64/neon-scalar-add-sub.s b/test/MC/AArch64/neon-scalar-add-sub.s
new file mode 100644
index 0000000..0a3eba7
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-add-sub.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Scalar Integer Add
+//------------------------------------------------------------------------------
+ add d31, d0, d16
+
+// CHECK: add d31, d0, d16 // encoding: [0x1f,0x84,0xf0,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Sub
+//------------------------------------------------------------------------------
+ sub d1, d7, d8
+
+// CHECK: sub d1, d7, d8 // encoding: [0xe1,0x84,0xe8,0x7e]
+
diff --git a/test/MC/AArch64/neon-scalar-reduce-pairwise.s b/test/MC/AArch64/neon-scalar-reduce-pairwise.s
new file mode 100644
index 0000000..403a940
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-reduce-pairwise.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//----------------------------------------------------------------------
+// Scalar Reduce Add Pairwise (Integer)
+//----------------------------------------------------------------------
+ addp d0, v1.2d
+
+// CHECK: addp d0, v1.2d // encoding: [0x20,0xb8,0xf1,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Reduce Add Pairwise (Floating Point)
+//----------------------------------------------------------------------
+ faddp d20, v1.2d
+
+// CHECK: faddp d20, v1.2d // encoding: [0x34,0xd8,0x70,0x7e]
+
diff --git a/test/MC/AArch64/neon-scalar-rounding-shift.s b/test/MC/AArch64/neon-scalar-rounding-shift.s
new file mode 100644
index 0000000..6113e09
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-rounding-shift.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+
+//------------------------------------------------------------------------------
+// Scalar Integer Rounding Shift Lef (Signed)
+//------------------------------------------------------------------------------
+ srshl d17, d31, d8
+
+// CHECK: srshl d17, d31, d8 // encoding: [0xf1,0x57,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Rounding Shift Lef (Unsigned)
+//------------------------------------------------------------------------------
+ urshl d17, d31, d8
+
+// CHECK: urshl d17, d31, d8 // encoding: [0xf1,0x57,0xe8,0x7e]
+
diff --git a/test/MC/AArch64/neon-scalar-saturating-add-sub.s b/test/MC/AArch64/neon-scalar-saturating-add-sub.s
new file mode 100644
index 0000000..fc2d50c
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-saturating-add-sub.s
@@ -0,0 +1,54 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Add (Signed)
+//------------------------------------------------------------------------------
+ sqadd b0, b1, b2
+ sqadd h10, h11, h12
+ sqadd s20, s21, s2
+ sqadd d17, d31, d8
+
+// CHECK: sqadd b0, b1, b2 // encoding: [0x20,0x0c,0x22,0x5e]
+// CHECK: sqadd h10, h11, h12 // encoding: [0x6a,0x0d,0x6c,0x5e]
+// CHECK: sqadd s20, s21, s2 // encoding: [0xb4,0x0e,0xa2,0x5e]
+// CHECK: sqadd d17, d31, d8 // encoding: [0xf1,0x0f,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Add (Unsigned)
+//------------------------------------------------------------------------------
+ uqadd b0, b1, b2
+ uqadd h10, h11, h12
+ uqadd s20, s21, s2
+ uqadd d17, d31, d8
+
+// CHECK: uqadd b0, b1, b2 // encoding: [0x20,0x0c,0x22,0x7e]
+// CHECK: uqadd h10, h11, h12 // encoding: [0x6a,0x0d,0x6c,0x7e]
+// CHECK: uqadd s20, s21, s2 // encoding: [0xb4,0x0e,0xa2,0x7e]
+// CHECK: uqadd d17, d31, d8 // encoding: [0xf1,0x0f,0xe8,0x7e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Sub (Signed)
+//------------------------------------------------------------------------------
+ sqsub b0, b1, b2
+ sqsub h10, h11, h12
+ sqsub s20, s21, s2
+ sqsub d17, d31, d8
+
+// CHECK: sqsub b0, b1, b2 // encoding: [0x20,0x2c,0x22,0x5e]
+// CHECK: sqsub h10, h11, h12 // encoding: [0x6a,0x2d,0x6c,0x5e]
+// CHECK: sqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x5e]
+// CHECK: sqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Sub (Unsigned)
+//------------------------------------------------------------------------------
+ uqsub b0, b1, b2
+ uqsub h10, h11, h12
+ uqsub s20, s21, s2
+ uqsub d17, d31, d8
+
+// CHECK: uqsub b0, b1, b2 // encoding: [0x20,0x2c,0x22,0x7e]
+// CHECK: uqsub h10, h11, h12 // encoding: [0x6a,0x2d,0x6c,0x7e]
+// CHECK: uqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x7e]
+// CHECK: uqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x7e]
+
diff --git a/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s b/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s
new file mode 100644
index 0000000..b09a589
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s
@@ -0,0 +1,28 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Rounding Shift Lef (Signed)
+//------------------------------------------------------------------------------
+ sqrshl b0, b1, b2
+ sqrshl h10, h11, h12
+ sqrshl s20, s21, s2
+ sqrshl d17, d31, d8
+
+// CHECK: sqrshl b0, b1, b2 // encoding: [0x20,0x5c,0x22,0x5e]
+// CHECK: sqrshl h10, h11, h12 // encoding: [0x6a,0x5d,0x6c,0x5e]
+// CHECK: sqrshl s20, s21, s2 // encoding: [0xb4,0x5e,0xa2,0x5e]
+// CHECK: sqrshl d17, d31, d8 // encoding: [0xf1,0x5f,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Rounding Shift Lef (Unsigned)
+//------------------------------------------------------------------------------
+ uqrshl b0, b1, b2
+ uqrshl h10, h11, h12
+ uqrshl s20, s21, s2
+ uqrshl d17, d31, d8
+
+// CHECK: uqrshl b0, b1, b2 // encoding: [0x20,0x5c,0x22,0x7e]
+// CHECK: uqrshl h10, h11, h12 // encoding: [0x6a,0x5d,0x6c,0x7e]
+// CHECK: uqrshl s20, s21, s2 // encoding: [0xb4,0x5e,0xa2,0x7e]
+// CHECK: uqrshl d17, d31, d8 // encoding: [0xf1,0x5f,0xe8,0x7e]
+
diff --git a/test/MC/AArch64/neon-scalar-saturating-shift.s b/test/MC/AArch64/neon-scalar-saturating-shift.s
new file mode 100644
index 0000000..b53c9f0
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-saturating-shift.s
@@ -0,0 +1,29 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Shift Lef (Signed)
+//------------------------------------------------------------------------------
+ sqshl b0, b1, b2
+ sqshl h10, h11, h12
+ sqshl s20, s21, s2
+ sqshl d17, d31, d8
+
+// CHECK: sqshl b0, b1, b2 // encoding: [0x20,0x4c,0x22,0x5e]
+// CHECK: sqshl h10, h11, h12 // encoding: [0x6a,0x4d,0x6c,0x5e]
+// CHECK: sqshl s20, s21, s2 // encoding: [0xb4,0x4e,0xa2,0x5e]
+// CHECK: sqshl d17, d31, d8 // encoding: [0xf1,0x4f,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Shift Lef (Unsigned)
+//------------------------------------------------------------------------------
+ uqshl b0, b1, b2
+ uqshl h10, h11, h12
+ uqshl s20, s21, s2
+ uqshl d17, d31, d8
+
+// CHECK: uqshl b0, b1, b2 // encoding: [0x20,0x4c,0x22,0x7e]
+// CHECK: uqshl h10, h11, h12 // encoding: [0x6a,0x4d,0x6c,0x7e]
+// CHECK: uqshl s20, s21, s2 // encoding: [0xb4,0x4e,0xa2,0x7e]
+// CHECK: uqshl d17, d31, d8 // encoding: [0xf1,0x4f,0xe8,0x7e]
+
+
diff --git a/test/MC/AArch64/neon-scalar-shift.s b/test/MC/AArch64/neon-scalar-shift.s
new file mode 100644
index 0000000..366840a
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-shift.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Scalar Integer Shift Lef (Signed)
+//------------------------------------------------------------------------------
+ sshl d17, d31, d8
+
+// CHECK: sshl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Shift Lef (Unsigned)
+//------------------------------------------------------------------------------
+ ushl d17, d31, d8
+
+// CHECK: ushl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x7e]
+
diff --git a/test/MC/AArch64/neon-shift.s b/test/MC/AArch64/neon-shift.s
index 23d687c..614e6de 100644
--- a/test/MC/AArch64/neon-shift.s
+++ b/test/MC/AArch64/neon-shift.s
@@ -42,20 +42,6 @@
// CHECK: ushl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x44,0xe2,0x6e]
//------------------------------------------------------------------------------
-// Scalar Integer Shift Lef (Signed)
-//------------------------------------------------------------------------------
- sshl d17, d31, d8
-
-// CHECK: sshl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x5e]
-
-//------------------------------------------------------------------------------
-// Scalar Integer Shift Lef (Unsigned)
-//------------------------------------------------------------------------------
- ushl d17, d31, d8
-
-// CHECK: ushl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x7e]
-
-//------------------------------------------------------------------------------
// Vector Integer Shift Left by Immediate
//------------------------------------------------------------------------------
shl v0.8b, v1.8b, #3
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index 72fa9ec..c8290da 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -75,6 +75,7 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::v16i1: return "MVT::v16i1";
case MVT::v32i1: return "MVT::v32i1";
case MVT::v64i1: return "MVT::v64i1";
+ case MVT::v1i8: return "MVT::v1i8";
case MVT::v2i8: return "MVT::v2i8";
case MVT::v4i8: return "MVT::v4i8";
case MVT::v8i8: return "MVT::v8i8";
@@ -99,10 +100,12 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::v16i64: return "MVT::v16i64";
case MVT::v2f16: return "MVT::v2f16";
case MVT::v8f16: return "MVT::v8f16";
+ case MVT::v1f32: return "MVT::v1f32";
case MVT::v2f32: return "MVT::v2f32";
case MVT::v4f32: return "MVT::v4f32";
case MVT::v8f32: return "MVT::v8f32";
case MVT::v16f32: return "MVT::v16f32";
+ case MVT::v1f64: return "MVT::v1f64";
case MVT::v2f64: return "MVT::v2f64";
case MVT::v4f64: return "MVT::v4f64";
case MVT::v8f64: return "MVT::v8f64";
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index c508795..f6ea69c 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -260,7 +260,8 @@ enum IIT_Info {
IIT_STRUCT5 = 22,
IIT_EXTEND_VEC_ARG = 23,
IIT_TRUNC_VEC_ARG = 24,
- IIT_ANYPTR = 25
+ IIT_ANYPTR = 25,
+ IIT_V1 = 26
};
@@ -350,6 +351,7 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
EVT VVT = VT;
switch (VVT.getVectorNumElements()) {
default: PrintFatalError("unhandled vector type width in intrinsic!");
+ case 1: Sig.push_back(IIT_V1); break;
case 2: Sig.push_back(IIT_V2); break;
case 4: Sig.push_back(IIT_V4); break;
case 8: Sig.push_back(IIT_V8); break;