aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td28
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td2
-rw-r--r--lib/Target/AArch64/AArch64InstrNEON.td318
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp10
-rw-r--r--test/MC/AArch64/neon-3vdiff.s2
-rw-r--r--test/MC/AArch64/neon-diagnostics.s247
-rw-r--r--test/MC/AArch64/neon-scalar-by-elem-mla.s44
-rw-r--r--test/MC/AArch64/neon-scalar-by-elem-mul.s37
-rw-r--r--test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s46
-rw-r--r--test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s58
-rw-r--r--test/MC/AArch64/neon-scalar-dup.s29
-rw-r--r--test/MC/AArch64/neon-simd-copy.s2
-rw-r--r--test/MC/AArch64/neon-simd-shift.s2
-rw-r--r--test/MC/Disassembler/AArch64/neon-instructions.txt213
14 files changed, 1001 insertions, 37 deletions
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index b0aadfb..2c8cc6b 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1359,5 +1359,33 @@ class NeonI_Crypto_3VSHA<bits<2> size, bits<3> opcode,
// Inherit Rd in 4-0
}
+// Format AdvSIMD scalar x indexed element
+class NeonI_ScalarXIndexedElem<bit u, bit szhi, bit szlo,
+ bits<4> opcode, dag outs, dag ins,
+ string asmstr, list<dag> patterns,
+ InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin>
+{
+ let Inst{31} = 0b0;
+ let Inst{30} = 0b1;
+ let Inst{29} = u;
+ let Inst{28-24} = 0b11111;
+ let Inst{23} = szhi;
+ let Inst{22} = szlo;
+ // l in Inst{21}
+ // m in Instr{20}
+ // Inherit Rm in 19-16
+ let Inst{15-12} = opcode;
+ // h in Inst{11}
+ let Inst{10} = 0b0;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+// Format AdvSIMD scalar copy - insert from element to scalar
+class NeonI_ScalarCopy<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : NeonI_copy<0b1, 0b0, 0b0000, outs, ins, asmstr, patterns, itin> {
+ let Inst{28} = 0b1;
+}
}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index ae217f9..23d81fc 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1278,7 +1278,7 @@ def : Pat<(i64 (sext_inreg (anyext i32:$Rn), i1)),
// UBFX makes sense as an implementation of a 64-bit zero-extension too. Could
// use either 64-bit or 32-bit variant, but 32-bit might be more efficient.
-def : Pat<(zext i32:$Rn), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31),
+def : Pat<(i64 (zext i32:$Rn)), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31),
sub_32)>;
//===-------------------------------
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index 95e54f2..83bb1fa 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -4671,6 +4671,294 @@ defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
+def neon_uimm0_bare : Operand<i64>,
+ ImmLeaf<i64, [{return Imm == 0;}]> {
+ let ParserMatchClass = neon_uimm0_asmoperand;
+ let PrintMethod = "printUImmBareOperand";
+}
+
+def neon_uimm1_bare : Operand<i64>,
+ ImmLeaf<i64, [{(void)Imm; return true;}]> {
+ let ParserMatchClass = neon_uimm1_asmoperand;
+ let PrintMethod = "printUImmBareOperand";
+}
+
+def neon_uimm2_bare : Operand<i64>,
+ ImmLeaf<i64, [{(void)Imm; return true;}]> {
+ let ParserMatchClass = neon_uimm2_asmoperand;
+ let PrintMethod = "printUImmBareOperand";
+}
+
+def neon_uimm3_bare : Operand<i64>,
+ ImmLeaf<i64, [{(void)Imm; return true;}]> {
+ let ParserMatchClass = uimm3_asmoperand;
+ let PrintMethod = "printUImmBareOperand";
+}
+
+def neon_uimm4_bare : Operand<i64>,
+ ImmLeaf<i64, [{(void)Imm; return true;}]> {
+ let ParserMatchClass = uimm4_asmoperand;
+ let PrintMethod = "printUImmBareOperand";
+}
+
+
+// Scalar by element Arithmetic
+
+class NeonI_ScalarXIndexedElemArith<string asmop, bits<4> opcode,
+ string rmlane, bit u, bit szhi, bit szlo,
+ RegisterClass ResFPR, RegisterClass OpFPR,
+ RegisterOperand OpVPR, Operand OpImm>
+ : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
+ (outs ResFPR:$Rd),
+ (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
+ asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
+ [],
+ NoItinerary> {
+ bits<3> Imm;
+ bits<5> MRm;
+}
+
+class NeonI_ScalarXIndexedElemArith_Constraint_Impl<string asmop, bits<4> opcode,
+ string rmlane,
+ bit u, bit szhi, bit szlo,
+ RegisterClass ResFPR,
+ RegisterClass OpFPR,
+ RegisterOperand OpVPR,
+ Operand OpImm>
+ : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
+ (outs ResFPR:$Rd),
+ (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
+ asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
+ [],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+ bits<3> Imm;
+ bits<5> MRm;
+}
+
+// Scalar Floating Point multiply (scalar, by element)
+def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul",
+ 0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul",
+ 0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
+ let Inst{11} = Imm{0}; // h
+ let Inst{21} = 0b0; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Floating Point multiply extended (scalar, by element)
+def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx",
+ 0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx",
+ 0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
+ let Inst{11} = Imm{0}; // h
+ let Inst{21} = 0b0; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Floating Point fused multiply-add (scalar, by element)
+def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
+ 0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
+ 0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
+ let Inst{11} = Imm{0}; // h
+ let Inst{21} = 0b0; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Floating Point fused multiply-subtract (scalar, by element)
+def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
+ 0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
+ 0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
+ let Inst{11} = Imm{0}; // h
+ let Inst{21} = 0b0; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Signed saturating doubling multiply-add long (scalar, by element)
+def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
+ 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
+ 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
+ let Inst{11} = Imm{2}; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
+ 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
+ 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Signed saturating doubling
+// multiply-subtract long (scalar, by element)
+def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
+ 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
+ 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
+ let Inst{11} = Imm{2}; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
+ 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
+ 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Signed saturating doubling multiply long (scalar, by element)
+def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull",
+ 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull",
+ 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
+ let Inst{11} = Imm{2}; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull",
+ 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull",
+ 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Signed saturating doubling multiply returning
+// high half (scalar, by element)
+def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
+ 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
+ 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
+ let Inst{11} = Imm{2}; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
+ 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
+ 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Signed saturating rounding doubling multiply
+// returning high half (scalar, by element)
+def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
+ 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
+ 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
+ let Inst{11} = Imm{2}; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
+ 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
+ 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+
+
+// Scalar Copy - DUP element to scalar
+class NeonI_Scalar_DUP<string asmop, string asmlane,
+ RegisterClass ResRC, RegisterOperand VPRC,
+ Operand OpImm>
+ : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm),
+ asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]",
+ [],
+ NoItinerary> {
+ bits<4> Imm;
+}
+
+def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> {
+ let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
+}
+def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> {
+ let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
+}
+def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
+}
+def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> {
+ let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
+}
//===----------------------------------------------------------------------===//
@@ -4792,36 +5080,6 @@ def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
-def neon_uimm0_bare : Operand<i64>,
- ImmLeaf<i64, [{return Imm == 0;}]> {
- let ParserMatchClass = neon_uimm0_asmoperand;
- let PrintMethod = "printUImmBareOperand";
-}
-
-def neon_uimm1_bare : Operand<i64>,
- ImmLeaf<i64, [{(void)Imm; return true;}]> {
- let ParserMatchClass = neon_uimm1_asmoperand;
- let PrintMethod = "printUImmBareOperand";
-}
-
-def neon_uimm2_bare : Operand<i64>,
- ImmLeaf<i64, [{(void)Imm; return true;}]> {
- let ParserMatchClass = neon_uimm2_asmoperand;
- let PrintMethod = "printUImmBareOperand";
-}
-
-def neon_uimm3_bare : Operand<i64>,
- ImmLeaf<i64, [{(void)Imm; return true;}]> {
- let ParserMatchClass = uimm3_asmoperand;
- let PrintMethod = "printUImmBareOperand";
-}
-
-def neon_uimm4_bare : Operand<i64>,
- ImmLeaf<i64, [{(void)Imm; return true;}]> {
- let ParserMatchClass = uimm4_asmoperand;
- let PrintMethod = "printUImmBareOperand";
-}
-
def neon_uimm3 : Operand<i64>,
ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = uimm3_asmoperand;
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 38845b6..c4f3062 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -82,6 +82,8 @@ static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder);
@@ -379,6 +381,14 @@ DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
+static DecodeStatus
+DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+
+ return DecodeFPR64RegisterClass(Inst, RegNo, Address, Decoder);
+}
static DecodeStatus
DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
diff --git a/test/MC/AArch64/neon-3vdiff.s b/test/MC/AArch64/neon-3vdiff.s
index 337b94c..1de6909 100644
--- a/test/MC/AArch64/neon-3vdiff.s
+++ b/test/MC/AArch64/neon-3vdiff.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s
index 5ada875..0a2332b 100644
--- a/test/MC/AArch64/neon-diagnostics.s
+++ b/test/MC/AArch64/neon-diagnostics.s
@@ -4667,7 +4667,7 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqdmlal s17, h27, s12
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: too few operands for instruction
// CHECK-ERROR: sqdmlal d19, s24, d12
// CHECK-ERROR: ^
@@ -4681,7 +4681,7 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqdmlsl s14, h12, s25
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: too few operands for instruction
// CHECK-ERROR: sqdmlsl d12, s23, d13
// CHECK-ERROR: ^
@@ -4695,7 +4695,7 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqdmull s12, h22, s12
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: too few operands for instruction
// CHECK-ERROR: sqdmull d15, s22, d12
// CHECK-ERROR: ^
@@ -5687,3 +5687,244 @@
// CHECK-ERROR <stdin>:4341:17: error: invalid operand for instruction
// CHECK-ERROR trn2 v0.1d, v1.1d, v2.1d
// CHECK-ERROR ^
+
+//----------------------------------------------------------------------
+// Floating Point multiply (scalar, by element)
+//----------------------------------------------------------------------
+ // mismatched and invalid vector types
+ fmul s0, s1, v1.h[0]
+ fmul h0, h1, v1.s[0]
+ // invalid lane
+ fmul s2, s29, v10.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: fmul s0, s1, v1.h[0]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: fmul h0, h1, v1.s[0]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: fmul s2, s29, v10.s[4]
+// CHECK-ERROR: ^
+
+//----------------------------------------------------------------------
+// Floating Point multiply extended (scalar, by element)
+//----------------------------------------------------------------------
+ // mismatched and invalid vector types
+ fmulx d0, d1, v1.b[0]
+ fmulx h0, h1, v1.d[0]
+ // invalid lane
+ fmulx d2, d29, v10.d[3]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: fmulx d0, d1, v1.b[0]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: fmulx h0, h1, v1.d[0]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: fmulx d2, d29, v10.d[3]
+// CHECK-ERROR: ^
+
+//----------------------------------------------------------------------
+// Floating Point fused multiply-add (scalar, by element)
+//----------------------------------------------------------------------
+ // mismatched and invalid vector types
+ fmla b0, b1, v1.b[0]
+ fmla d30, s11, v1.d[1]
+ // invalid lane
+ fmla s16, s22, v16.s[5]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: fmla b0, b1, v1.b[0]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: fmla d30, s11, v1.d[1]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: fmla s16, s22, v16.s[5]
+// CHECK-ERROR: ^
+
+//----------------------------------------------------------------------
+// Floating Point fused multiply-subtract (scalar, by element)
+//----------------------------------------------------------------------
+ // mismatched and invalid vector types
+ fmls s29, h10, v28.s[1]
+ fmls h7, h17, v26.s[2]
+ // invalid lane
+ fmls d16, d22, v16.d[-1]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: fmls s29, h10, v28.s[1]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: fmls h7, h17, v26.s[2]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: expected lane number
+// CHECK-ERROR: fmls d16, d22, v16.d[-1]
+// CHECK-ERROR: ^
+
+//----------------------------------------------------------------------
+// Scalar Signed saturating doubling multiply-add long
+// (scalar, by element)
+//----------------------------------------------------------------------
+ // mismatched and invalid vector types
+ sqdmlal s0, h0, v0.s[0]
+ sqdmlal s8, s9, v14.s[1]
+ // invalid lane
+ sqdmlal s4, s5, v1.s[5]
+ // invalid vector index
+ sqdmlal s0, h0, v17.h[0]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmlal s0, h0, v0.s[0]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmlal s8, s9, v14.s[1]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: sqdmlal s4, s5, v1.s[5]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmlal s0, h0, v17.h[0]
+// CHECK-ERROR: ^
+
+//----------------------------------------------------------------------
+// Scalar Signed saturating doubling multiply-subtract long
+// (scalar, by element)
+//----------------------------------------------------------------------
+ // mismatched and invalid vector types
+ sqdmlsl s1, h1, v1.d[0]
+ sqdmlsl d1, h1, v13.s[0]
+ // invalid lane
+ sqdmlsl d1, s1, v13.s[4]
+ // invalid vector index
+ sqdmlsl s1, h1, v20.h[7]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmlsl s1, h1, v1.d[0]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmlsl d1, h1, v13.s[0]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: sqdmlsl d1, s1, v13.s[4]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmlsl s1, h1, v20.h[7]
+// CHECK-ERROR: ^
+
+//----------------------------------------------------------------------
+// Scalar Signed saturating doubling multiply long (scalar, by element)
+//----------------------------------------------------------------------
+ // mismatched and invalid vector types
+ // invalid lane
+ // invalid vector index
+ // mismatched and invalid vector types
+ sqdmull s1, h1, v1.s[1]
+ sqdmull s1, s1, v4.s[0]
+ // invalid lane
+ sqdmull s12, h17, v9.h[9]
+ // invalid vector index
+ sqdmull s1, h1, v16.h[5]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmull s1, h1, v1.s[1]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmull s1, s1, v4.s[0]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: sqdmull s12, h17, v9.h[9]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmull s1, h1, v16.h[5]
+// CHECK-ERROR: ^
+
+//----------------------------------------------------------------------
+// Scalar Signed saturating doubling multiply returning
+// high half (scalar, by element)
+//----------------------------------------------------------------------
+ // mismatched and invalid vector types
+ sqdmulh h0, s1, v0.h[0]
+ sqdmulh s25, s26, v27.h[3]
+ // invalid lane
+ sqdmulh s25, s26, v27.s[4]
+ // invalid vector index
+ sqdmulh s0, h1, v30.h[0]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmulh h0, s1, v0.h[0]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmulh s25, s26, v27.h[3]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: sqdmulh s25, s26, v27.s[4]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqdmulh s0, h1, v30.h[0]
+// CHECK-ERROR: ^
+
+//----------------------------------------------------------------------
+// Scalar Signed saturating rounding doubling multiply
+// returning high half (scalar, by element)
+//----------------------------------------------------------------------
+ // mismatched and invalid vector types
+ sqrdmulh h31, h30, v14.s[2]
+ sqrdmulh s5, h6, v7.s[2]
+ // invalid lane
+ sqrdmulh h31, h30, v14.h[9]
+ // invalid vector index
+ sqrdmulh h31, h30, v20.h[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqrdmulh h31, h30, v14.s[2]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqrdmulh s5, h6, v7.s[2]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: sqrdmulh h31, h30, v14.h[9]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: sqrdmulh h31, h30, v20.h[4]
+// CHECK-ERROR: ^
+
+//----------------------------------------------------------------------
+// Scalar Duplicate element (scalar)
+//----------------------------------------------------------------------
+ // mismatched and invalid vector types
+ dup b0, v1.d[0]
+ dup h0, v31.b[8]
+ dup s0, v2.h[4]
+ dup d0, v17.s[3]
+ // invalid lane
+ dup d0, v17.d[4]
+ dup s0, v1.s[7]
+ dup h0, v31.h[16]
+ dup b1, v3.b[16]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: dup b0, v1.d[0]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: dup h0, v31.b[8]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: dup s0, v2.h[4]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: dup d0, v17.s[3]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: dup d0, v17.d[4]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: dup s0, v1.s[7]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: dup h0, v31.h[16]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: dup b1, v3.b[16]
+// CHECK-ERROR: ^
diff --git a/test/MC/AArch64/neon-scalar-by-elem-mla.s b/test/MC/AArch64/neon-scalar-by-elem-mla.s
new file mode 100644
index 0000000..fec9d12
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-by-elem-mla.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Floating Point fused multiply-add (scalar, by element)
+//------------------------------------------------------------------------------
+ fmla s0, s1, v1.s[0]
+ fmla s30, s11, v1.s[1]
+ fmla s4, s5, v7.s[2]
+ fmla s16, s22, v16.s[3]
+ fmla d0, d1, v1.d[0]
+ fmla d30, d11, v1.d[1]
+
+// CHECK: fmla s0, s1, v1.s[0] // encoding: [0x20,0x10,0x81,0x5f]
+// CHECK: fmla s30, s11, v1.s[1] // encoding: [0x7e,0x11,0xa1,0x5f]
+// CHECK: fmla s4, s5, v7.s[2] // encoding: [0xa4,0x18,0x87,0x5f]
+// CHECK: fmla s16, s22, v16.s[3] // encoding: [0xd0,0x1a,0xb0,0x5f]
+// CHECK: fmla d0, d1, v1.d[0] // encoding: [0x20,0x10,0xc1,0x5f]
+// CHECK: fmla d30, d11, v1.d[1] // encoding: [0x7e,0x19,0xc1,0x5f]
+
+//------------------------------------------------------------------------------
+// Floating Point fused multiply-subtract (scalar, by element)
+//------------------------------------------------------------------------------
+
+ fmls s2, s3, v4.s[0]
+ fmls s29, s10, v28.s[1]
+ fmls s5, s12, v23.s[2]
+ fmls s7, s17, v26.s[3]
+ fmls d0, d1, v1.d[0]
+ fmls d30, d11, v1.d[1]
+
+// CHECK: fmls s2, s3, v4.s[0] // encoding: [0x62,0x50,0x84,0x5f]
+// CHECK: fmls s29, s10, v28.s[1] // encoding: [0x5d,0x51,0xbc,0x5f]
+// CHECK: fmls s5, s12, v23.s[2] // encoding: [0x85,0x59,0x97,0x5f]
+// CHECK: fmls s7, s17, v26.s[3] // encoding: [0x27,0x5a,0xba,0x5f]
+// CHECK: fmls d0, d1, v1.d[0] // encoding: [0x20,0x50,0xc1,0x5f]
+// CHECK: fmls d30, d11, v1.d[1] // encoding: [0x7e,0x59,0xc1,0x5f]
+
+
+
+
+
+
+
+
diff --git a/test/MC/AArch64/neon-scalar-by-elem-mul.s b/test/MC/AArch64/neon-scalar-by-elem-mul.s
new file mode 100644
index 0000000..8b8a3f5
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-by-elem-mul.s
@@ -0,0 +1,37 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Floating Point multiply (scalar, by element)
+//------------------------------------------------------------------------------
+ fmul s0, s1, v1.s[0]
+ fmul s30, s11, v1.s[1]
+ fmul s4, s5, v7.s[2]
+ fmul s16, s22, v16.s[3]
+ fmul d0, d1, v1.d[0]
+ fmul d30, d11, v1.d[1]
+
+// CHECK: fmul s0, s1, v1.s[0] // encoding: [0x20,0x90,0x81,0x5f]
+// CHECK: fmul s30, s11, v1.s[1] // encoding: [0x7e,0x91,0xa1,0x5f]
+// CHECK: fmul s4, s5, v7.s[2] // encoding: [0xa4,0x98,0x87,0x5f]
+// CHECK: fmul s16, s22, v16.s[3] // encoding: [0xd0,0x9a,0xb0,0x5f]
+// CHECK: fmul d0, d1, v1.d[0] // encoding: [0x20,0x90,0xc1,0x5f]
+// CHECK: fmul d30, d11, v1.d[1] // encoding: [0x7e,0x99,0xc1,0x5f]
+
+
+//------------------------------------------------------------------------------
+// Floating Point multiply extended (scalar, by element)
+//------------------------------------------------------------------------------
+ fmulx s6, s2, v8.s[0]
+ fmulx s7, s3, v13.s[1]
+ fmulx s9, s7, v9.s[2]
+ fmulx s13, s21, v10.s[3]
+ fmulx d15, d9, v7.d[0]
+ fmulx d13, d12, v11.d[1]
+
+// CHECK: fmulx s6, s2, v8.s[0] // encoding: [0x46,0x90,0x88,0x7f]
+// CHECK: fmulx s7, s3, v13.s[1] // encoding: [0x67,0x90,0xad,0x7f]
+// CHECK: fmulx s9, s7, v9.s[2] // encoding: [0xe9,0x98,0x89,0x7f]
+// CHECK: fmulx s13, s21, v10.s[3] // encoding: [0xad,0x9a,0xaa,0x7f]
+// CHECK: fmulx d15, d9, v7.d[0] // encoding: [0x2f,0x91,0xc7,0x7f]
+// CHECK: fmulx d13, d12, v11.d[1] // encoding: [0x8d,0x99,0xcb,0x7f]
+
diff --git a/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s b/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s
new file mode 100644
index 0000000..e3d7e05
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s
@@ -0,0 +1,46 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//-----------------------------------------------------------------------------
+// Signed saturating doubling multiply-add long (scalar, by element)
+//-----------------------------------------------------------------------------
+ sqdmlal s0, h0, v0.h[0]
+ sqdmlal s7, h1, v4.h[3]
+ sqdmlal s11, h16, v8.h[4]
+ sqdmlal s30, h30, v15.h[7]
+ sqdmlal d0, s0, v3.s[0]
+ sqdmlal d30, s30, v30.s[3]
+ sqdmlal d8, s9, v14.s[1]
+
+// CHECK: sqdmlal s0, h0, v0.h[0] // encoding: [0x00,0x30,0x40,0x5f]
+// CHECK: sqdmlal s7, h1, v4.h[3] // encoding: [0x27,0x30,0x74,0x5f]
+// CHECK: sqdmlal s11, h16, v8.h[4] // encoding: [0x0b,0x3a,0x48,0x5f]
+// CHECK: sqdmlal s30, h30, v15.h[7] // encoding: [0xde,0x3b,0x7f,0x5f]
+// CHECK: sqdmlal d0, s0, v3.s[0] // encoding: [0x00,0x30,0x83,0x5f]
+// CHECK: sqdmlal d30, s30, v30.s[3] // encoding: [0xde,0x3b,0xbe,0x5f]
+// CHECK: sqdmlal d8, s9, v14.s[1] // encoding: [0x28,0x31,0xae,0x5f]
+
+//-----------------------------------------------------------------------------
+// Signed saturating doubling multiply-subtract long (scalar, by element)
+//-----------------------------------------------------------------------------
+ sqdmlsl s1, h1, v1.h[0]
+ sqdmlsl s8, h2, v5.h[1]
+ sqdmlsl s12, h13, v14.h[2]
+ sqdmlsl s29, h28, v11.h[7]
+ sqdmlsl d1, s1, v13.s[0]
+ sqdmlsl d31, s31, v31.s[2]
+ sqdmlsl d16, s18, v28.s[3]
+
+// CHECK: sqdmlsl s1, h1, v1.h[0] // encoding: [0x21,0x70,0x41,0x5f]
+// CHECK: sqdmlsl s8, h2, v5.h[1] // encoding: [0x48,0x70,0x55,0x5f]
+// CHECK: sqdmlsl s12, h13, v14.h[2] // encoding: [0xac,0x71,0x6e,0x5f]
+// CHECK: sqdmlsl s29, h28, v11.h[7] // encoding: [0x9d,0x7b,0x7b,0x5f]
+// CHECK: sqdmlsl d1, s1, v13.s[0] // encoding: [0x21,0x70,0x8d,0x5f]
+// CHECK: sqdmlsl d31, s31, v31.s[2] // encoding: [0xff,0x7b,0x9f,0x5f]
+// CHECK: sqdmlsl d16, s18, v28.s[3] // encoding: [0x50,0x7a,0xbc,0x5f]
+
+
+
+
+
+
+
diff --git a/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s b/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s
new file mode 100644
index 0000000..8a8405e
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s
@@ -0,0 +1,58 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//-----------------------------------------------------------------------------
+// Signed saturating doubling multiply long (scalar, by element)
+//-----------------------------------------------------------------------------
+ sqdmull s1, h1, v1.h[1]
+ sqdmull s8, h2, v5.h[2]
+ sqdmull s12, h17, v9.h[3]
+ sqdmull s31, h31, v15.h[7]
+ sqdmull d1, s1, v4.s[0]
+ sqdmull d31, s31, v31.s[3]
+ sqdmull d9, s10, v15.s[0]
+
+
+// CHECK: sqdmull s1, h1, v1.h[1] // encoding: [0x21,0xb0,0x51,0x5f]
+// CHECK: sqdmull s8, h2, v5.h[2] // encoding: [0x48,0xb0,0x65,0x5f]
+// CHECK: sqdmull s12, h17, v9.h[3] // encoding: [0x2c,0xb2,0x79,0x5f]
+// CHECK: sqdmull s31, h31, v15.h[7] // encoding: [0xff,0xbb,0x7f,0x5f]
+// CHECK: sqdmull d1, s1, v4.s[0] // encoding: [0x21,0xb0,0x84,0x5f]
+// CHECK: sqdmull d31, s31, v31.s[3] // encoding: [0xff,0xbb,0xbf,0x5f]
+// CHECK: sqdmull d9, s10, v15.s[0] // encoding: [0x49,0xb1,0x8f,0x5f]
+
+//-----------------------------------------------------------------------------
+// Scalar Signed saturating doubling multiply returning
+// high half (scalar, by element)
+//-----------------------------------------------------------------------------
+ sqdmulh h0, h1, v0.h[0]
+ sqdmulh h10, h11, v10.h[4]
+ sqdmulh h20, h21, v15.h[7]
+ sqdmulh s25, s26, v27.s[3]
+ sqdmulh s2, s6, v7.s[0]
+
+// CHECK: sqdmulh h0, h1, v0.h[0] // encoding: [0x20,0xc0,0x40,0x5f]
+// CHECK: sqdmulh h10, h11, v10.h[4] // encoding: [0x6a,0xc9,0x4a,0x5f]
+// CHECK: sqdmulh h20, h21, v15.h[7] // encoding: [0xb4,0xca,0x7f,0x5f]
+// CHECK: sqdmulh s25, s26, v27.s[3] // encoding: [0x59,0xcb,0xbb,0x5f]
+// CHECK: sqdmulh s2, s6, v7.s[0] // encoding: [0xc2,0xc0,0x87,0x5f]
+
+//-----------------------------------------------------------------------------
+// Signed saturating rounding doubling multiply returning
+// high half (scalar, by element)
+//-----------------------------------------------------------------------------
+ sqrdmulh h31, h30, v14.h[2]
+ sqrdmulh h1, h1, v1.h[4]
+ sqrdmulh h21, h22, v15.h[7]
+ sqrdmulh s5, s6, v7.s[2]
+ sqrdmulh s20, s26, v27.s[1]
+
+// CHECK: sqrdmulh h31, h30, v14.h[2] // encoding: [0xdf,0xd3,0x6e,0x5f]
+// CHECK: sqrdmulh h1, h1, v1.h[4] // encoding: [0x21,0xd8,0x41,0x5f]
+// CHECK: sqrdmulh h21, h22, v15.h[7] // encoding: [0xd5,0xda,0x7f,0x5f]
+// CHECK: sqrdmulh s5, s6, v7.s[2] // encoding: [0xc5,0xd8,0x87,0x5f]
+// CHECK: sqrdmulh s20, s26, v27.s[1] // encoding: [0x54,0xd3,0xbb,0x5f]
+
+
+
+
+
diff --git a/test/MC/AArch64/neon-scalar-dup.s b/test/MC/AArch64/neon-scalar-dup.s
new file mode 100644
index 0000000..64366f2
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-dup.s
@@ -0,0 +1,29 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Duplicate element (scalar)
+//------------------------------------------------------------------------------
+ dup b0, v0.b[15]
+ dup b1, v0.b[7]
+ dup b17, v0.b[0]
+ dup h5, v31.h[7]
+ dup h9, v1.h[4]
+ dup h11, v17.h[0]
+ dup s2, v2.s[3]
+ dup s4, v21.s[0]
+ dup s31, v21.s[2]
+ dup d3, v5.d[0]
+ dup d6, v5.d[1]
+
+// CHECK: dup b0, v0.b[15] // encoding: [0x00,0x04,0x1f,0x5e]
+// CHECK: dup b1, v0.b[7] // encoding: [0x01,0x04,0x0f,0x5e]
+// CHECK: dup b17, v0.b[0] // encoding: [0x11,0x04,0x01,0x5e]
+// CHECK: dup h5, v31.h[7] // encoding: [0xe5,0x07,0x1e,0x5e]
+// CHECK: dup h9, v1.h[4] // encoding: [0x29,0x04,0x12,0x5e]
+// CHECK: dup h11, v17.h[0] // encoding: [0x2b,0x06,0x02,0x5e]
+// CHECK: dup s2, v2.s[3] // encoding: [0x42,0x04,0x1c,0x5e]
+// CHECK: dup s4, v21.s[0] // encoding: [0xa4,0x06,0x04,0x5e]
+// CHECK: dup s31, v21.s[2] // encoding: [0xbf,0x06,0x14,0x5e]
+// CHECK: dup d3, v5.d[0] // encoding: [0xa3,0x04,0x08,0x5e]
+// CHECK: dup d6, v5.d[1] // encoding: [0xa6,0x04,0x18,0x5e]
+
diff --git a/test/MC/AArch64/neon-simd-copy.s b/test/MC/AArch64/neon-simd-copy.s
index 7edcc1b..67fe309 100644
--- a/test/MC/AArch64/neon-simd-copy.s
+++ b/test/MC/AArch64/neon-simd-copy.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
diff --git a/test/MC/AArch64/neon-simd-shift.s b/test/MC/AArch64/neon-simd-shift.s
index 9e6e1aa..a164323 100644
--- a/test/MC/AArch64/neon-simd-shift.s
+++ b/test/MC/AArch64/neon-simd-shift.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt
index 2f53375..b9ea7c1 100644
--- a/test/MC/Disassembler/AArch64/neon-instructions.txt
+++ b/test/MC/Disassembler/AArch64/neon-instructions.txt
@@ -2174,3 +2174,216 @@ G# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -disassemble < %s |
0x28,0x78,0x82,0x0e
0x19,0x78,0x82,0x4e
0x0a,0x78,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# Scalar Floating Point multiply (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: fmul s0, s1, v1.s[0]
+# CHECK: fmul s0, s1, v1.s[3]
+# CHECK: fmul d0, d1, v1.d[0]
+# CHECK: fmul d0, d1, v1.d[1]
+# CHECK: fmul d15, d15, v15.d[1]
+0x20 0x90 0x81 0x5f
+0x20 0x98 0xa1 0x5f
+0x20 0x90 0xc1 0x5f
+0x20 0x98 0xc1 0x5f
+0xef 0x99 0xcf 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Floating Point multiply extended (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: fmulx s3, s5, v7.s[0]
+# CHECK: fmulx s3, s5, v7.s[3]
+# CHECK: fmulx s3, s5, v15.s[3]
+# CHECK: fmulx d0, d4, v8.d[0]
+# CHECK: fmulx d0, d4, v8.d[1]
+0xa3 0x90 0x87 0x7f
+0xa3 0x98 0xa7 0x7f
+0xa3 0x98 0xaf 0x7f
+0x80 0x90 0xc8 0x7f
+0x80 0x98 0xc8 0x7f
+
+#----------------------------------------------------------------------
+# Scalar Floating Point fused multiply-add (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: fmla s0, s1, v1.s[0]
+# CHECK: fmla s0, s1, v1.s[3]
+# CHECK: fmla d0, d1, v1.d[0]
+# CHECK: fmla d0, d1, v1.d[1]
+# CHECK: fmla d15, d15, v15.d[1]
+0x20 0x10 0x81 0x5f
+0x20 0x18 0xa1 0x5f
+0x20 0x10 0xc1 0x5f
+0x20 0x18 0xc1 0x5f
+0xef 0x19 0xcf 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Floating Point fused multiply-sub (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: fmls s3, s5, v7.s[0]
+# CHECK: fmls s3, s5, v7.s[3]
+# CHECK: fmls s3, s5, v15.s[3]
+# CHECK: fmls d0, d4, v8.d[0]
+# CHECK: fmls d0, d4, v8.d[1]
+0xa3 0x50 0x87 0x5f
+0xa3 0x58 0xa7 0x5f
+0xa3 0x58 0xaf 0x5f
+0x80 0x50 0xc8 0x5f
+0x80 0x58 0xc8 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Signed saturating doubling
+# multiply-add long (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: sqdmlal s0, h0, v0.h[0]
+# CHECK: sqdmlal s0, h0, v0.h[1]
+# CHECK: sqdmlal s0, h0, v0.h[2]
+# CHECK: sqdmlal s0, h0, v0.h[3]
+# CHECK: sqdmlal s0, h0, v0.h[4]
+# CHECK: sqdmlal s0, h0, v0.h[5]
+# CHECK: sqdmlal s0, h0, v0.h[6]
+# CHECK: sqdmlal s0, h0, v0.h[7]
+# CHECK: sqdmlal d8, s9, v15.s[0]
+# CHECK: sqdmlal d8, s9, v15.s[1]
+# CHECK: sqdmlal d8, s9, v15.s[2]
+# CHECK: sqdmlal d8, s9, v15.s[3]
+0x00 0x30 0x40 0x5f
+0x00 0x30 0x50 0x5f
+0x00 0x30 0x60 0x5f
+0x00 0x30 0x70 0x5f
+0x00 0x38 0x40 0x5f
+0x00 0x38 0x50 0x5f
+0x00 0x38 0x60 0x5f
+0x00 0x38 0x70 0x5f
+0x28 0x31 0x8f 0x5f
+0x28 0x31 0xaf 0x5f
+0x28 0x39 0x8f 0x5f
+0x28 0x39 0xaf 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Signed saturating doubling
+# multiply-sub long (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: sqdmlsl s0, h0, v0.h[0]
+# CHECK: sqdmlsl s0, h0, v0.h[1]
+# CHECK: sqdmlsl s0, h0, v0.h[2]
+# CHECK: sqdmlsl s0, h0, v0.h[3]
+# CHECK: sqdmlsl s0, h0, v0.h[4]
+# CHECK: sqdmlsl s0, h0, v0.h[5]
+# CHECK: sqdmlsl s0, h0, v0.h[6]
+# CHECK: sqdmlsl s0, h0, v0.h[7]
+# CHECK: sqdmlsl d8, s9, v15.s[0]
+# CHECK: sqdmlsl d8, s9, v15.s[1]
+# CHECK: sqdmlsl d8, s9, v15.s[2]
+# CHECK: sqdmlsl d8, s9, v15.s[3]
+0x00 0x70 0x40 0x5f
+0x00 0x70 0x50 0x5f
+0x00 0x70 0x60 0x5f
+0x00 0x70 0x70 0x5f
+0x00 0x78 0x40 0x5f
+0x00 0x78 0x50 0x5f
+0x00 0x78 0x60 0x5f
+0x00 0x78 0x70 0x5f
+0x28 0x71 0x8f 0x5f
+0x28 0x71 0xaf 0x5f
+0x28 0x79 0x8f 0x5f
+0x28 0x79 0xaf 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Signed saturating doubling multiply long (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: sqdmull s1, h1, v1.h[0]
+# CHECK: sqdmull s1, h1, v1.h[1]
+# CHECK: sqdmull s1, h1, v1.h[2]
+# CHECK: sqdmull s1, h1, v1.h[3]
+# CHECK: sqdmull s1, h1, v1.h[4]
+# CHECK: sqdmull s1, h1, v1.h[5]
+# CHECK: sqdmull s1, h1, v1.h[6]
+# CHECK: sqdmull s1, h1, v1.h[7]
+# CHECK: sqdmull d1, s1, v4.s[0]
+# CHECK: sqdmull d1, s1, v4.s[1]
+# CHECK: sqdmull d1, s1, v4.s[2]
+# CHECK: sqdmull d1, s1, v4.s[3]
+0x21 0xb0 0x41 0x5f
+0x21 0xb0 0x51 0x5f
+0x21 0xb0 0x61 0x5f
+0x21 0xb0 0x71 0x5f
+0x21 0xb8 0x41 0x5f
+0x21 0xb8 0x51 0x5f
+0x21 0xb8 0x61 0x5f
+0x21 0xb8 0x71 0x5f
+0x21 0xb0 0x84 0x5f
+0x21 0xb0 0xa4 0x5f
+0x21 0xb8 0x84 0x5f
+0x21 0xb8 0xa4 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Signed saturating doubling multiply returning
+# high half (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: sqdmulh h7, h1, v14.h[0]
+# CHECK: sqdmulh h7, h15, v8.h[1]
+# CHECK: sqdmulh h7, h15, v8.h[2]
+# CHECK: sqdmulh h7, h15, v8.h[3]
+# CHECK: sqdmulh h7, h15, v8.h[4]
+# CHECK: sqdmulh h7, h15, v8.h[5]
+# CHECK: sqdmulh h7, h15, v8.h[6]
+# CHECK: sqdmulh h7, h15, v8.h[7]
+# CHECK: sqdmulh s15, s3, v4.s[0]
+# CHECK: sqdmulh s15, s14, v16.s[1]
+# CHECK: sqdmulh s15, s15, v16.s[2]
+# CHECK: sqdmulh s15, s16, v17.s[3]
+0x27 0xc0 0x4e 0x5f
+0xe7 0xc1 0x58 0x5f
+0xe7 0xc1 0x68 0x5f
+0xe7 0xc1 0x78 0x5f
+0xe7 0xc9 0x48 0x5f
+0xe7 0xc9 0x58 0x5f
+0xe7 0xc9 0x68 0x5f
+0xe7 0xc9 0x78 0x5f
+0x6f 0xc0 0x84 0x5f
+0xcf 0xc1 0xb0 0x5f
+0xef 0xc9 0x90 0x5f
+0x0f 0xca 0xb1 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Signed saturating rounding doubling multiply
+# returning high half (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: sqrdmulh h7, h1, v14.h[0]
+# CHECK: sqrdmulh h7, h15, v8.h[1]
+# CHECK: sqrdmulh h7, h15, v8.h[2]
+# CHECK: sqrdmulh h7, h15, v8.h[3]
+# CHECK: sqrdmulh h7, h15, v8.h[4]
+# CHECK: sqrdmulh h7, h15, v8.h[5]
+# CHECK: sqrdmulh h7, h15, v8.h[6]
+# CHECK: sqrdmulh h7, h15, v8.h[7]
+# CHECK: sqrdmulh s15, s3, v4.s[0]
+# CHECK: sqrdmulh s15, s14, v16.s[1]
+# CHECK: sqrdmulh s15, s15, v16.s[2]
+# CHECK: sqrdmulh s15, s16, v17.s[3]
+0x27 0xd0 0x4e 0x5f
+0xe7 0xd1 0x58 0x5f
+0xe7 0xd1 0x68 0x5f
+0xe7 0xd1 0x78 0x5f
+0xe7 0xd9 0x48 0x5f
+0xe7 0xd9 0x58 0x5f
+0xe7 0xd9 0x68 0x5f
+0xe7 0xd9 0x78 0x5f
+0x6f 0xd0 0x84 0x5f
+0xcf 0xd1 0xb0 0x5f
+0xef 0xd9 0x90 0x5f
+0x0f 0xda 0xb1 0x5f
+
+#----------------------------------------------------------------------
+#Duplicate element (scalar)
+#----------------------------------------------------------------------
+# CHECK: dup b0, v0.b[15]
+# CHECK: dup h2, v31.h[5]
+# CHECK: dup s17, v2.s[2]
+# CHECK: dup d6, v12.d[1]
+0x00 0x04 0x1f 0x5e
+0xe2 0x07 0x16 0x5e
+0x51 0x04 0x14 0x5e
+0x86 0x05 0x18 0x5e
+