aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/llvm/IR/IntrinsicsAArch64.td40
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td19
-rw-r--r--lib/Target/AArch64/AArch64InstrNEON.td274
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp31
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp52
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp29
-rw-r--r--test/CodeGen/AArch64/neon-scalar-shift-imm.ll527
-rw-r--r--test/MC/AArch64/neon-diagnostics.s24
-rw-r--r--test/MC/AArch64/neon-scalar-shift-imm.s186
-rw-r--r--test/MC/Disassembler/AArch64/neon-instructions.txt162
10 files changed, 1331 insertions, 13 deletions
diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td
index 76b9215..2dfe02a 100644
--- a/include/llvm/IR/IntrinsicsAArch64.td
+++ b/include/llvm/IR/IntrinsicsAArch64.td
@@ -220,4 +220,44 @@ def int_aarch64_neon_vqdmlsl : Neon_3Arg_Long_Intrinsic;
// Signed Saturating Doubling Multiply Long
def int_aarch64_neon_vqdmull : Neon_2Arg_Long_Intrinsic;
+
+class Neon_2Arg_ShiftImm_Intrinsic
+ : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>;
+
+class Neon_3Arg_ShiftImm_Intrinsic
+ : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+// Scalar Shift Right (Immediate)
+def int_aarch64_neon_vshrds_n : Neon_2Arg_ShiftImm_Intrinsic;
+def int_aarch64_neon_vshrdu_n : Neon_2Arg_ShiftImm_Intrinsic;
+
+// Scalar Rounding Shift Right (Immediate)
+def int_aarch64_neon_vrshrds_n : Neon_2Arg_ShiftImm_Intrinsic;
+def int_aarch64_neon_vrshrdu_n : Neon_2Arg_ShiftImm_Intrinsic;
+
+// Scalar Shift Right and Accumulate (Immediate)
+def int_aarch64_neon_vsrads_n : Neon_3Arg_ShiftImm_Intrinsic;
+def int_aarch64_neon_vsradu_n : Neon_3Arg_ShiftImm_Intrinsic;
+
+// Scalar Rounding Shift Right and Accumulate (Immediate)
+def int_aarch64_neon_vrsrads_n : Neon_3Arg_ShiftImm_Intrinsic;
+def int_aarch64_neon_vrsradu_n : Neon_3Arg_ShiftImm_Intrinsic;
+
+// Scalar Shift Left (Immediate)
+def int_aarch64_neon_vshld_n : Neon_2Arg_ShiftImm_Intrinsic;
+
+// Scalar Saturating Shift Left (Immediate)
+def int_aarch64_neon_vqshls_n : Neon_N2V_Intrinsic;
+def int_aarch64_neon_vqshlu_n : Neon_N2V_Intrinsic;
+
+// Scalar Signed Saturating Shift Left Unsigned (Immediate)
+def int_aarch64_neon_vqshlus_n : Neon_N2V_Intrinsic;
+
+// Shift Right And Insert (Immediate)
+def int_aarch64_neon_vsrid_n : Neon_2Arg_ShiftImm_Intrinsic;
+
+// Shift Left And Insert (Immediate)
+def int_aarch64_neon_vslid_n : Neon_2Arg_ShiftImm_Intrinsic;
+
}
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index 7a41ce0..44dbc9d 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1234,5 +1234,24 @@ class NeonI_Scalar3Diff<bit u, bits<2> size, bits<4> opcode,
// Inherit Rd in 4-0
}
+// Format AdvSIMD scalar shift by immediate
+
+class NeonI_ScalarShiftImm<bit u, bits<5> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<4> Imm4;
+ bits<3> Imm3;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = u;
+ let Inst{28-23} = 0b111110;
+ let Inst{22-19} = Imm4;
+ let Inst{18-16} = Imm3;
+ let Inst{15-11} = opcode;
+ let Inst{10} = 0b1;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
}
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index b9f83f7..5e58daf 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -1450,7 +1450,7 @@ def imm0_63 : Operand<i32> {
let ParserMatchClass = uimm6_asmoperand;
}
-// Shift Right Immediate - A shift right immediate is encoded differently from
+// Shift Right/Left Immediate - A shift immediate is encoded differently from
// other shift immediates. The immh:immb field is encoded like so:
//
// Offset Encoding
@@ -1458,6 +1458,11 @@ def imm0_63 : Operand<i32> {
// 16 immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
// 32 immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
// 64 immh:immb<6> = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
+//
+// The shift right immediate amount, in the range 1 to element bits, is computed
+// as Offset - UInt(immh:immb). The shift left immediate amount, in the range 0
+// to element bits - 1, is computed as UInt(immh:immb) - Offset.
+
class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
let Name = "ShrImm" # OFFSET;
let RenderMethod = "addImmOperands";
@@ -1481,6 +1486,29 @@ def shr_imm16 : shr_imm<"16">;
def shr_imm32 : shr_imm<"32">;
def shr_imm64 : shr_imm<"64">;
+class shl_imm_asmoperands<string OFFSET> : AsmOperandClass {
+ let Name = "ShlImm" # OFFSET;
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "ShlImm" # OFFSET;
+}
+
+class shl_imm<string OFFSET> : Operand<i32> {
+ let EncoderMethod = "getShiftLeftImm" # OFFSET;
+ let DecoderMethod = "DecodeShiftLeftImm" # OFFSET;
+ let ParserMatchClass =
+ !cast<AsmOperandClass>("shl_imm" # OFFSET # "_asmoperand");
+}
+
+def shl_imm8_asmoperand : shl_imm_asmoperands<"8">;
+def shl_imm16_asmoperand : shl_imm_asmoperands<"16">;
+def shl_imm32_asmoperand : shl_imm_asmoperands<"32">;
+def shl_imm64_asmoperand : shl_imm_asmoperands<"64">;
+
+def shl_imm8 : shl_imm<"8">;
+def shl_imm16 : shl_imm<"16">;
+def shl_imm32 : shl_imm<"32">;
+def shl_imm64 : shl_imm<"64">;
+
class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
: NeonI_2VShiftImm<q, u, opcode,
@@ -3471,6 +3499,250 @@ multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
(INSTD FPR64:$Src, FPR64:$Rn)>;
}
+// Scalar Shift By Immediate
+
+class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop,
+ RegisterClass FPRC, Operand ImmTy>
+ : NeonI_ScalarShiftImm<u, opcode,
+ (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [], NoItinerary>;
+
+multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode,
+ string asmop> {
+ def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
+ bits<6> Imm;
+ let Inst{22} = 0b1; // immh:immb = 1xxxxxx
+ let Inst{21-16} = Imm;
+ }
+}
+
+multiclass NeonI_ScalarShiftRightImm_BHSD_size<bit u, bits<5> opcode,
+ string asmop>
+ : NeonI_ScalarShiftRightImm_D_size<u, opcode, asmop> {
+ def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shr_imm8> {
+ bits<3> Imm;
+ let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
+ let Inst{18-16} = Imm;
+ }
+ def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shr_imm16> {
+ bits<4> Imm;
+ let Inst{22-20} = 0b001; // immh:immb = 001xxxx
+ let Inst{19-16} = Imm;
+ }
+ def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
+ bits<5> Imm;
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ let Inst{20-16} = Imm;
+ }
+}
+
+multiclass NeonI_ScalarShiftLeftImm_D_size<bit u, bits<5> opcode,
+ string asmop> {
+ def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shl_imm64> {
+ bits<6> Imm;
+ let Inst{22} = 0b1; // immh:immb = 1xxxxxx
+ let Inst{21-16} = Imm;
+ }
+}
+
+multiclass NeonI_ScalarShiftLeftImm_BHSD_size<bit u, bits<5> opcode,
+ string asmop>
+ : NeonI_ScalarShiftLeftImm_D_size<u, opcode, asmop> {
+ def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shl_imm8> {
+ bits<3> Imm;
+ let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
+ let Inst{18-16} = Imm;
+ }
+ def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shl_imm16> {
+ bits<4> Imm;
+ let Inst{22-20} = 0b001; // immh:immb = 001xxxx
+ let Inst{19-16} = Imm;
+ }
+ def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shl_imm32> {
+ bits<5> Imm;
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ let Inst{20-16} = Imm;
+ }
+}
+
+class NeonI_ScalarShiftImm_accum_D_size<bit u, bits<5> opcode, string asmop>
+ : NeonI_ScalarShiftImm<u, opcode,
+ (outs FPR64:$Rd), (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [], NoItinerary> {
+ bits<6> Imm;
+ let Inst{22} = 0b1; // immh:immb = 1xxxxxx
+ let Inst{21-16} = Imm;
+ let Constraints = "$Src = $Rd";
+}
+
+class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop,
+ RegisterClass FPRCD, RegisterClass FPRCS,
+ Operand ImmTy>
+ : NeonI_ScalarShiftImm<u, opcode,
+ (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [], NoItinerary>;
+
+multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode,
+ string asmop> {
+ def bhi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR8, FPR16,
+ shr_imm8> {
+ bits<3> Imm;
+ let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
+ let Inst{18-16} = Imm;
+ }
+ def hsi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR16, FPR32,
+ shr_imm16> {
+ bits<4> Imm;
+ let Inst{22-20} = 0b001; // immh:immb = 001xxxx
+ let Inst{19-16} = Imm;
+ }
+ def sdi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR32, FPR64,
+ shr_imm32> {
+ bits<5> Imm;
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ let Inst{20-16} = Imm;
+ }
+}
+
+multiclass Neon_ScalarShiftImm_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD> {
+ def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
+ (INSTD FPR64:$Rn, imm:$Imm)>;
+}
+
+multiclass Neon_ScalarShiftImm_BHSD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTB,
+ Instruction INSTH,
+ Instruction INSTS,
+ Instruction INSTD>
+ : Neon_ScalarShiftImm_D_size_patterns<opnode, INSTD> {
+ def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 imm:$Imm))),
+ (INSTB FPR8:$Rn, imm:$Imm)>;
+ def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 imm:$Imm))),
+ (INSTH FPR16:$Rn, imm:$Imm)>;
+ def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))),
+ (INSTS FPR32:$Rn, imm:$Imm)>;
+}
+
+class Neon_ScalarShiftImm_accum_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD>
+ : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
+ (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
+
+multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns<
+ SDPatternOperator opnode,
+ Instruction INSTH,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 imm:$Imm))),
+ (INSTH FPR16:$Rn, imm:$Imm)>;
+ def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))),
+ (INSTS FPR32:$Rn, imm:$Imm)>;
+ def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
+ (INSTD FPR64:$Rn, imm:$Imm)>;
+}
+
+// Scalar Signed Shift Right (Immediate)
+defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">;
+defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshrds_n, SSHRddi>;
+
+// Scalar Unsigned Shift Right (Immediate)
+defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">;
+defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshrdu_n, USHRddi>;
+
+// Scalar Signed Rounding Shift Right (Immediate)
+defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">;
+defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vrshrds_n, SRSHRddi>;
+
+// Scalar Unigned Rounding Shift Right (Immediate)
+defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">;
+defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vrshrdu_n, URSHRddi>;
+
+// Scalar Signed Shift Right and Accumulate (Immediate)
+def SSRA : NeonI_ScalarShiftImm_accum_D_size<0b0, 0b00010, "ssra">;
+def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsrads_n, SSRA>;
+
+// Scalar Unsigned Shift Right and Accumulate (Immediate)
+def USRA : NeonI_ScalarShiftImm_accum_D_size<0b1, 0b00010, "usra">;
+def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsradu_n, USRA>;
+
+// Scalar Signed Rounding Shift Right and Accumulate (Immediate)
+def SRSRA : NeonI_ScalarShiftImm_accum_D_size<0b0, 0b00110, "srsra">;
+def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vrsrads_n, SRSRA>;
+
+// Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
+def URSRA : NeonI_ScalarShiftImm_accum_D_size<0b1, 0b00110, "ursra">;
+def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vrsradu_n, URSRA>;
+
+// Scalar Shift Left (Immediate)
+defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">;
+defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshld_n, SHLddi>;
+
+// Signed Saturating Shift Left (Immediate)
+defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">;
+defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshls_n,
+ SQSHLbbi, SQSHLhhi,
+ SQSHLssi, SQSHLddi>;
+
+// Unsigned Saturating Shift Left (Immediate)
+defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">;
+defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshlu_n,
+ UQSHLbbi, UQSHLhhi,
+ UQSHLssi, UQSHLddi>;
+
+// Signed Saturating Shift Left Unsigned (Immediate)
+defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">;
+defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshlus_n,
+ SQSHLUbbi, SQSHLUhhi,
+ SQSHLUssi, SQSHLUddi>;
+
+// Shift Right And Insert (Immediate)
+defm SRI : NeonI_ScalarShiftRightImm_D_size<0b1, 0b01000, "sri">;
+defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vsrid_n, SRIddi>;
+
+// Shift Left And Insert (Immediate)
+defm SLI : NeonI_ScalarShiftLeftImm_D_size<0b1, 0b01010, "sli">;
+defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vslid_n, SLIddi>;
+
+// Signed Saturating Shift Right Narrow (Immediate)
+defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrn,
+ SQSHRNbhi, SQSHRNhsi,
+ SQSHRNsdi>;
+
+// Unsigned Saturating Shift Right Narrow (Immediate)
+defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqshrn,
+ UQSHRNbhi, UQSHRNhsi,
+ UQSHRNsdi>;
+
+// Signed Saturating Rounded Shift Right Narrow (Immediate)
+defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrn,
+ SQRSHRNbhi, SQRSHRNhsi,
+ SQRSHRNsdi>;
+
+// Unsigned Saturating Rounded Shift Right Narrow (Immediate)
+defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqrshrn,
+ UQRSHRNbhi, UQRSHRNhsi,
+ UQRSHRNsdi>;
+
+// Signed Saturating Shift Right Unsigned Narrow (Immediate)
+defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrun,
+ SQSHRUNbhi, SQSHRUNhsi,
+ SQSHRUNsdi>;
+
+// Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
+defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrun,
+ SQRSHRUNbhi, SQRSHRUNhsi,
+ SQRSHRUNsdi>;
+
// Scalar Integer Add
let isCommutable = 1 in {
def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 127c7ec..1f7a7d8 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -698,6 +698,25 @@ public:
bool isShrImm64() const { return isShrFixedWidth(64); }
+ // if 0 <= value < w, return true
+ bool isShlFixedWidth(int w) const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < w;
+ }
+
+ bool isShlImm8() const { return isShlFixedWidth(8); }
+
+ bool isShlImm16() const { return isShlFixedWidth(16); }
+
+ bool isShlImm32() const { return isShlFixedWidth(32); }
+
+ bool isShlImm64() const { return isShlFixedWidth(64); }
+
bool isNeonMovImmShiftLSL() const {
if (!isShiftOrExtend())
return false;
@@ -2480,6 +2499,18 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_ShrImm64:
return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
"expected integer in range [1, 64]");
+ case Match_ShlImm8:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 7]");
+ case Match_ShlImm16:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 15]");
+ case Match_ShlImm32:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 31]");
+ case Match_ShlImm64:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 63]");
}
llvm_unreachable("Implement any new match types added!");
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 9a97fbe..db1da49 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -141,6 +141,18 @@ static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeShiftLeftImm8(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftLeftImm16(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeShiftLeftImm32(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeShiftLeftImm64(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
template<int RegWidth>
static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
unsigned FullImm,
@@ -509,6 +521,46 @@ static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeShiftLeftImm8(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ if (Val > 7)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftLeftImm16(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ if (Val > 15)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftLeftImm32(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ if (Val > 31)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftLeftImm64(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ if (Val > 63)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
+
template<int RegWidth>
static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
unsigned FullImm,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 7bfaecc..b41c566 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -68,6 +68,15 @@ public:
unsigned getShiftRightImm64(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftLeftImm8(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftLeftImm16(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftLeftImm32(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftLeftImm64(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
// Labels are handled mostly the same way: a symbol is needed, and
// just gets some fixup attached.
template<AArch64::Fixups fixupDesired>
@@ -338,6 +347,26 @@ unsigned AArch64MCCodeEmitter::getShiftRightImm64(
return 64 - MI.getOperand(Op).getImm();
}
+unsigned AArch64MCCodeEmitter::getShiftLeftImm8(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return MI.getOperand(Op).getImm() - 8;
+}
+
+unsigned AArch64MCCodeEmitter::getShiftLeftImm16(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return MI.getOperand(Op).getImm() - 16;
+}
+
+unsigned AArch64MCCodeEmitter::getShiftLeftImm32(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return MI.getOperand(Op).getImm() - 32;
+}
+
+unsigned AArch64MCCodeEmitter::getShiftLeftImm64(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return MI.getOperand(Op).getImm() - 64;
+}
+
template<AArch64::Fixups fixupDesired> unsigned
AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI,
unsigned OpIdx,
diff --git a/test/CodeGen/AArch64/neon-scalar-shift-imm.ll b/test/CodeGen/AArch64/neon-scalar-shift-imm.ll
new file mode 100644
index 0000000..b11540f
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-shift-imm.ll
@@ -0,0 +1,527 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define i64 @test_vshrd_n_s64(i64 %a) {
+; CHECK: test_vshrd_n_s64
+; CHECK: sshr {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vsshr = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vsshr1 = call <1 x i64> @llvm.aarch64.neon.vshrds.n(<1 x i64> %vsshr, i32 63)
+ %0 = extractelement <1 x i64> %vsshr1, i32 0
+ ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vshrds.n(<1 x i64>, i32)
+
+define i64 @test_vshrd_n_u64(i64 %a) {
+; CHECK: test_vshrd_n_u64
+; CHECK: ushr {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vushr = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vushr1 = call <1 x i64> @llvm.aarch64.neon.vshrdu.n(<1 x i64> %vushr, i32 63)
+ %0 = extractelement <1 x i64> %vushr1, i32 0
+ ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vshrdu.n(<1 x i64>, i32)
+
+define i64 @test_vrshrd_n_s64(i64 %a) {
+; CHECK: test_vrshrd_n_s64
+; CHECK: srshr {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vsrshr = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vsrshr1 = call <1 x i64> @llvm.aarch64.neon.vrshrds.n(<1 x i64> %vsrshr, i32 63)
+ %0 = extractelement <1 x i64> %vsrshr1, i32 0
+ ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vrshrds.n(<1 x i64>, i32)
+
+define i64 @test_vrshrd_n_u64(i64 %a) {
+; CHECK: test_vrshrd_n_u64
+; CHECK: urshr {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vurshr = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vurshr1 = call <1 x i64> @llvm.aarch64.neon.vrshrdu.n(<1 x i64> %vurshr, i32 63)
+ %0 = extractelement <1 x i64> %vurshr1, i32 0
+ ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vrshrdu.n(<1 x i64>, i32)
+
+define i64 @test_vsrad_n_s64(i64 %a, i64 %b) {
+; CHECK: test_vsrad_n_s64
+; CHECK: ssra {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vssra = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vssra1 = insertelement <1 x i64> undef, i64 %b, i32 0
+ %vssra2 = call <1 x i64> @llvm.aarch64.neon.vsrads.n(<1 x i64> %vssra, <1 x i64> %vssra1, i32 63)
+ %0 = extractelement <1 x i64> %vssra2, i32 0
+ ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsrads.n(<1 x i64>, <1 x i64>, i32)
+
+define i64 @test_vsrad_n_u64(i64 %a, i64 %b) {
+; CHECK: test_vsrad_n_u64
+; CHECK: usra {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vusra = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vusra1 = insertelement <1 x i64> undef, i64 %b, i32 0
+ %vusra2 = call <1 x i64> @llvm.aarch64.neon.vsradu.n(<1 x i64> %vusra, <1 x i64> %vusra1, i32 63)
+ %0 = extractelement <1 x i64> %vusra2, i32 0
+ ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsradu.n(<1 x i64>, <1 x i64>, i32)
+
+define i64 @test_vrsrad_n_s64(i64 %a, i64 %b) {
+; CHECK: test_vrsrad_n_s64
+; CHECK: srsra {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vsrsra = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vsrsra1 = insertelement <1 x i64> undef, i64 %b, i32 0
+ %vsrsra2 = call <1 x i64> @llvm.aarch64.neon.vrsrads.n(<1 x i64> %vsrsra, <1 x i64> %vsrsra1, i32 63)
+ %0 = extractelement <1 x i64> %vsrsra2, i32 0
+ ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vrsrads.n(<1 x i64>, <1 x i64>, i32)
+
+define i64 @test_vrsrad_n_u64(i64 %a, i64 %b) {
+; CHECK: test_vrsrad_n_u64
+; CHECK: ursra {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vursra = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vursra1 = insertelement <1 x i64> undef, i64 %b, i32 0
+ %vursra2 = call <1 x i64> @llvm.aarch64.neon.vrsradu.n(<1 x i64> %vursra, <1 x i64> %vursra1, i32 63)
+ %0 = extractelement <1 x i64> %vursra2, i32 0
+ ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vrsradu.n(<1 x i64>, <1 x i64>, i32)
+
+define i64 @test_vshld_n_s64(i64 %a) {
+; CHECK: test_vshld_n_s64
+; CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vshl = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vshl1 = call <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64> %vshl, i32 63)
+ %0 = extractelement <1 x i64> %vshl1, i32 0
+ ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64>, i32)
+
+define i64 @test_vshld_n_u64(i64 %a) {
+; CHECK: test_vshld_n_u64
+; CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vshl = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vshl1 = call <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64> %vshl, i32 63)
+ %0 = extractelement <1 x i64> %vshl1, i32 0
+ ret i64 %0
+}
+
+define i8 @test_vqshlb_n_s8(i8 %a) {
+; CHECK: test_vqshlb_n_s8
+; CHECK: sqshl {{b[0-9]+}}, {{b[0-9]+}}, #7
+entry:
+ %vsqshl = insertelement <1 x i8> undef, i8 %a, i32 0
+ %vsqshl1 = call <1 x i8> @llvm.aarch64.neon.vqshls.n.v1i8(<1 x i8> %vsqshl, i32 7)
+ %0 = extractelement <1 x i8> %vsqshl1, i32 0
+ ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vqshls.n.v1i8(<1 x i8>, i32)
+
+define i16 @test_vqshlh_n_s16(i16 %a) {
+; CHECK: test_vqshlh_n_s16
+; CHECK: sqshl {{h[0-9]+}}, {{h[0-9]+}}, #15
+entry:
+ %vsqshl = insertelement <1 x i16> undef, i16 %a, i32 0
+ %vsqshl1 = call <1 x i16> @llvm.aarch64.neon.vqshls.n.v1i16(<1 x i16> %vsqshl, i32 15)
+ %0 = extractelement <1 x i16> %vsqshl1, i32 0
+ ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vqshls.n.v1i16(<1 x i16>, i32)
+
+define i32 @test_vqshls_n_s32(i32 %a) {
+; CHECK: test_vqshls_n_s32
+; CHECK: sqshl {{s[0-9]+}}, {{s[0-9]+}}, #31
+entry:
+ %vsqshl = insertelement <1 x i32> undef, i32 %a, i32 0
+ %vsqshl1 = call <1 x i32> @llvm.aarch64.neon.vqshls.n.v1i32(<1 x i32> %vsqshl, i32 31)
+ %0 = extractelement <1 x i32> %vsqshl1, i32 0
+ ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqshls.n.v1i32(<1 x i32>, i32)
+
+define i64 @test_vqshld_n_s64(i64 %a) {
+; CHECK: test_vqshld_n_s64
+; CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vsqshl = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vsqshl1 = call <1 x i64> @llvm.aarch64.neon.vqshls.n.v1i64(<1 x i64> %vsqshl, i32 63)
+ %0 = extractelement <1 x i64> %vsqshl1, i32 0
+ ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vqshls.n.v1i64(<1 x i64>, i32)
+
+define i8 @test_vqshlb_n_u8(i8 %a) {
+; CHECK: test_vqshlb_n_u8
+; CHECK: uqshl {{b[0-9]+}}, {{b[0-9]+}}, #7
+entry:
+ %vuqshl = insertelement <1 x i8> undef, i8 %a, i32 0
+ %vuqshl1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.n.v1i8(<1 x i8> %vuqshl, i32 7)
+ %0 = extractelement <1 x i8> %vuqshl1, i32 0
+ ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vqshlu.n.v1i8(<1 x i8>, i32)
+
+define i16 @test_vqshlh_n_u16(i16 %a) {
+; CHECK: test_vqshlh_n_u16
+; CHECK: uqshl {{h[0-9]+}}, {{h[0-9]+}}, #15
+entry:
+ %vuqshl = insertelement <1 x i16> undef, i16 %a, i32 0
+ %vuqshl1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.n.v1i16(<1 x i16> %vuqshl, i32 15)
+ %0 = extractelement <1 x i16> %vuqshl1, i32 0
+ ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vqshlu.n.v1i16(<1 x i16>, i32)
+
+define i32 @test_vqshls_n_u32(i32 %a) {
+; CHECK: test_vqshls_n_u32
+; CHECK: uqshl {{s[0-9]+}}, {{s[0-9]+}}, #31
+entry:
+ %vuqshl = insertelement <1 x i32> undef, i32 %a, i32 0
+ %vuqshl1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.n.v1i32(<1 x i32> %vuqshl, i32 31)
+ %0 = extractelement <1 x i32> %vuqshl1, i32 0
+ ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqshlu.n.v1i32(<1 x i32>, i32)
+
+define i64 @test_vqshld_n_u64(i64 %a) {
+; CHECK: test_vqshld_n_u64
+; CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vuqshl = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vuqshl1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.n.v1i64(<1 x i64> %vuqshl, i32 63)
+ %0 = extractelement <1 x i64> %vuqshl1, i32 0
+ ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vqshlu.n.v1i64(<1 x i64>, i32)
+
+define i8 @test_vqshlub_n_s8(i8 %a) {
+; CHECK: test_vqshlub_n_s8
+; CHECK: sqshlu {{b[0-9]+}}, {{b[0-9]+}}, #7
+entry:
+ %vsqshlu = insertelement <1 x i8> undef, i8 %a, i32 0
+ %vsqshlu1 = call <1 x i8> @llvm.aarch64.neon.vqshlus.n.v1i8(<1 x i8> %vsqshlu, i32 7)
+ %0 = extractelement <1 x i8> %vsqshlu1, i32 0
+ ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vqshlus.n.v1i8(<1 x i8>, i32)
+
+define i16 @test_vqshluh_n_s16(i16 %a) {
+; CHECK: test_vqshluh_n_s16
+; CHECK: sqshlu {{h[0-9]+}}, {{h[0-9]+}}, #15
+entry:
+ %vsqshlu = insertelement <1 x i16> undef, i16 %a, i32 0
+ %vsqshlu1 = call <1 x i16> @llvm.aarch64.neon.vqshlus.n.v1i16(<1 x i16> %vsqshlu, i32 15)
+ %0 = extractelement <1 x i16> %vsqshlu1, i32 0
+ ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vqshlus.n.v1i16(<1 x i16>, i32)
+
+define i32 @test_vqshlus_n_s32(i32 %a) {
+; CHECK: test_vqshlus_n_s32
+; CHECK: sqshlu {{s[0-9]+}}, {{s[0-9]+}}, #31
+entry:
+ %vsqshlu = insertelement <1 x i32> undef, i32 %a, i32 0
+ %vsqshlu1 = call <1 x i32> @llvm.aarch64.neon.vqshlus.n.v1i32(<1 x i32> %vsqshlu, i32 31)
+ %0 = extractelement <1 x i32> %vsqshlu1, i32 0
+ ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqshlus.n.v1i32(<1 x i32>, i32)
+
+define i64 @test_vqshlud_n_s64(i64 %a) {
+; CHECK: test_vqshlud_n_s64
+; CHECK: sqshlu {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vsqshlu = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vsqshlu1 = call <1 x i64> @llvm.aarch64.neon.vqshlus.n.v1i64(<1 x i64> %vsqshlu, i32 63)
+ %0 = extractelement <1 x i64> %vsqshlu1, i32 0
+ ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vqshlus.n.v1i64(<1 x i64>, i32)
+
+define i64 @test_vsrid_n_s64(i64 %a) {
+; CHECK: test_vsrid_n_s64
+; CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vsri = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vsri1 = call <1 x i64> @llvm.aarch64.neon.vsrid.n(<1 x i64> %vsri, i32 63)
+ %0 = extractelement <1 x i64> %vsri1, i32 0
+ ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsrid.n(<1 x i64>, i32)
+
+define i64 @test_vsrid_n_u64(i64 %a) {
+; CHECK: test_vsrid_n_u64
+; CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vsri = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vsri1 = call <1 x i64> @llvm.aarch64.neon.vsrid.n(<1 x i64> %vsri, i32 63)
+ %0 = extractelement <1 x i64> %vsri1, i32 0
+ ret i64 %0
+}
+
+define i64 @test_vslid_n_s64(i64 %a) {
+; CHECK: test_vslid_n_s64
+; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vsli = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vsli1 = call <1 x i64> @llvm.aarch64.neon.vslid.n(<1 x i64> %vsli, i32 63)
+ %0 = extractelement <1 x i64> %vsli1, i32 0
+ ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vslid.n(<1 x i64>, i32)
+
+define i64 @test_vslid_n_u64(i64 %a) {
+; CHECK: test_vslid_n_u64
+; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vsli = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vsli1 = call <1 x i64> @llvm.aarch64.neon.vslid.n(<1 x i64> %vsli, i32 63)
+ %0 = extractelement <1 x i64> %vsli1, i32 0
+ ret i64 %0
+}
+
+define i8 @test_vqshrnh_n_s16(i16 %a) {
+; CHECK: test_vqshrnh_n_s16
+; CHECK: sqshrn {{b[0-9]+}}, {{h[0-9]+}}, #15
+entry:
+ %vsqshrn = insertelement <1 x i16> undef, i16 %a, i32 0
+ %vsqshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16> %vsqshrn, i32 15)
+ %0 = extractelement <1 x i8> %vsqshrn1, i32 0
+ ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16>, i32)
+
+define i16 @test_vqshrns_n_s32(i32 %a) {
+; CHECK: test_vqshrns_n_s32
+; CHECK: sqshrn {{h[0-9]+}}, {{s[0-9]+}}, #31
+entry:
+ %vsqshrn = insertelement <1 x i32> undef, i32 %a, i32 0
+ %vsqshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32> %vsqshrn, i32 31)
+ %0 = extractelement <1 x i16> %vsqshrn1, i32 0
+ ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32>, i32)
+
+define i32 @test_vqshrnd_n_s64(i64 %a) {
+; CHECK: test_vqshrnd_n_s64
+; CHECK: sqshrn {{s[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vsqshrn = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vsqshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64> %vsqshrn, i32 63)
+ %0 = extractelement <1 x i32> %vsqshrn1, i32 0
+ ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64>, i32)
+
+define i8 @test_vqshrnh_n_u16(i16 %a) {
+; CHECK: test_vqshrnh_n_u16
+; CHECK: uqshrn {{b[0-9]+}}, {{h[0-9]+}}, #15
+entry:
+ %vuqshrn = insertelement <1 x i16> undef, i16 %a, i32 0
+ %vuqshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16> %vuqshrn, i32 15)
+ %0 = extractelement <1 x i8> %vuqshrn1, i32 0
+ ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16>, i32)
+
+define i16 @test_vqshrns_n_u32(i32 %a) {
+; CHECK: test_vqshrns_n_u32
+; CHECK: uqshrn {{h[0-9]+}}, {{s[0-9]+}}, #31
+entry:
+ %vuqshrn = insertelement <1 x i32> undef, i32 %a, i32 0
+ %vuqshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32> %vuqshrn, i32 31)
+ %0 = extractelement <1 x i16> %vuqshrn1, i32 0
+ ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32>, i32)
+
+define i32 @test_vqshrnd_n_u64(i64 %a) {
+; CHECK: test_vqshrnd_n_u64
+; CHECK: uqshrn {{s[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vuqshrn = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vuqshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64> %vuqshrn, i32 63)
+ %0 = extractelement <1 x i32> %vuqshrn1, i32 0
+ ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64>, i32)
+
+define i8 @test_vqrshrnh_n_s16(i16 %a) {
+; CHECK: test_vqrshrnh_n_s16
+; CHECK: sqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #15
+entry:
+ %vsqrshrn = insertelement <1 x i16> undef, i16 %a, i32 0
+ %vsqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16> %vsqrshrn, i32 15)
+ %0 = extractelement <1 x i8> %vsqrshrn1, i32 0
+ ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16>, i32)
+
+define i16 @test_vqrshrns_n_s32(i32 %a) {
+; CHECK: test_vqrshrns_n_s32
+; CHECK: sqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #31
+entry:
+ %vsqrshrn = insertelement <1 x i32> undef, i32 %a, i32 0
+ %vsqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32> %vsqrshrn, i32 31)
+ %0 = extractelement <1 x i16> %vsqrshrn1, i32 0
+ ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32>, i32)
+
+define i32 @test_vqrshrnd_n_s64(i64 %a) {
+; CHECK: test_vqrshrnd_n_s64
+; CHECK: sqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vsqrshrn = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vsqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64> %vsqrshrn, i32 63)
+ %0 = extractelement <1 x i32> %vsqrshrn1, i32 0
+ ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64>, i32)
+
+define i8 @test_vqrshrnh_n_u16(i16 %a) {
+; CHECK: test_vqrshrnh_n_u16
+; CHECK: uqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #15
+entry:
+ %vuqrshrn = insertelement <1 x i16> undef, i16 %a, i32 0
+ %vuqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16> %vuqrshrn, i32 15)
+ %0 = extractelement <1 x i8> %vuqrshrn1, i32 0
+ ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16>, i32)
+
+define i16 @test_vqrshrns_n_u32(i32 %a) {
+; CHECK: test_vqrshrns_n_u32
+; CHECK: uqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #31
+entry:
+ %vuqrshrn = insertelement <1 x i32> undef, i32 %a, i32 0
+ %vuqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %vuqrshrn, i32 31)
+ %0 = extractelement <1 x i16> %vuqrshrn1, i32 0
+ ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32>, i32)
+
+define i32 @test_vqrshrnd_n_u64(i64 %a) {
+; CHECK: test_vqrshrnd_n_u64
+; CHECK: uqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vuqrshrn = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vuqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64> %vuqrshrn, i32 63)
+ %0 = extractelement <1 x i32> %vuqrshrn1, i32 0
+ ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64>, i32)
+
+define i8 @test_vqshrunh_n_s16(i16 %a) {
+; CHECK: test_vqshrunh_n_s16
+; CHECK: sqshrun {{b[0-9]+}}, {{h[0-9]+}}, #15
+entry:
+ %vsqshrun = insertelement <1 x i16> undef, i16 %a, i32 0
+ %vsqshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16> %vsqshrun, i32 15)
+ %0 = extractelement <1 x i8> %vsqshrun1, i32 0
+ ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16>, i32)
+
+define i16 @test_vqshruns_n_s32(i32 %a) {
+; CHECK: test_vqshruns_n_s32
+; CHECK: sqshrun {{h[0-9]+}}, {{s[0-9]+}}, #31
+entry:
+ %vsqshrun = insertelement <1 x i32> undef, i32 %a, i32 0
+ %vsqshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32> %vsqshrun, i32 31)
+ %0 = extractelement <1 x i16> %vsqshrun1, i32 0
+ ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32>, i32)
+
+define i32 @test_vqshrund_n_s64(i64 %a) {
+; CHECK: test_vqshrund_n_s64
+; CHECK: sqshrun {{s[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vsqshrun = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vsqshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64> %vsqshrun, i32 63)
+ %0 = extractelement <1 x i32> %vsqshrun1, i32 0
+ ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64>, i32)
+
+define i8 @test_vqrshrunh_n_s16(i16 %a) {
+; CHECK: test_vqrshrunh_n_s16
+; CHECK: sqrshrun {{b[0-9]+}}, {{h[0-9]+}}, #15
+entry:
+ %vsqrshrun = insertelement <1 x i16> undef, i16 %a, i32 0
+ %vsqrshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16> %vsqrshrun, i32 15)
+ %0 = extractelement <1 x i8> %vsqrshrun1, i32 0
+ ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16>, i32)
+
+define i16 @test_vqrshruns_n_s32(i32 %a) {
+; CHECK: test_vqrshruns_n_s32
+; CHECK: sqrshrun {{h[0-9]+}}, {{s[0-9]+}}, #31
+entry:
+ %vsqrshrun = insertelement <1 x i32> undef, i32 %a, i32 0
+ %vsqrshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32> %vsqrshrun, i32 31)
+ %0 = extractelement <1 x i16> %vsqrshrun1, i32 0
+ ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32>, i32)
+
+define i32 @test_vqrshrund_n_s64(i64 %a) {
+; CHECK: test_vqrshrund_n_s64
+; CHECK: sqrshrun {{s[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+ %vsqrshrun = insertelement <1 x i64> undef, i64 %a, i32 0
+ %vsqrshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrun.v1i32(<1 x i64> %vsqrshrun, i32 63)
+ %0 = extractelement <1 x i32> %vsqrshrun1, i32 0
+ ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vsqrshrun.v1i32(<1 x i64>, i32)
diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s
index 28f8e7a..c378ce4 100644
--- a/test/MC/AArch64/neon-diagnostics.s
+++ b/test/MC/AArch64/neon-diagnostics.s
@@ -970,23 +970,23 @@
//----------------------------------------------------------------------
// Mismatched vector types
- sqshl b0, b1, s0
- uqshl h0, h1, b0
- sqshl s0, s1, h0
- uqshl d0, d1, b0
+ sqshl b0, s1, b0
+ uqshl h0, b1, h0
+ sqshl s0, h1, s0
+ uqshl d0, b1, d0
// CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR: sqshl b0, b1, s0
-// CHECK-ERROR: ^
+// CHECK-ERROR: sqshl b0, s1, b0
+// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR: uqshl h0, h1, b0
-// CHECK-ERROR: ^
+// CHECK-ERROR: uqshl h0, b1, h0
+// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR: sqshl s0, s1, h0
-// CHECK-ERROR: ^
+// CHECK-ERROR: sqshl s0, h1, s0
+// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR: uqshl d0, d1, b0
-// CHECK-ERROR: ^
+// CHECK-ERROR: uqshl d0, b1, d0
+// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Scalar Integer Rouding Shift Left (Signed, Unsigned)
diff --git a/test/MC/AArch64/neon-scalar-shift-imm.s b/test/MC/AArch64/neon-scalar-shift-imm.s
new file mode 100644
index 0000000..96cb815
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-shift-imm.s
@@ -0,0 +1,186 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Scalar Signed Shift Right (Immediate)
+//----------------------------------------------------------------------
+ sshr d15, d16, #12
+
+// CHECK: sshr d15, d16, #12 // encoding: [0x0f,0x06,0x74,0x5f]
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Shift Right (Immediate)
+//----------------------------------------------------------------------
+ ushr d10, d17, #18
+
+// CHECK: ushr d10, d17, #18 // encoding: [0x2a,0x06,0x6e,0x7f]
+
+//----------------------------------------------------------------------
+// Scalar Signed Rounding Shift Right (Immediate)
+//----------------------------------------------------------------------
+ srshr d19, d18, #7
+
+// CHECK: srshr d19, d18, #7 // encoding: [0x53,0x26,0x79,0x5f]
+
+//----------------------------------------------------------------------
+// Scalar Unigned Rounding Shift Right (Immediate)
+//----------------------------------------------------------------------
+ urshr d20, d23, #31
+
+// CHECK: urshr d20, d23, #31 // encoding: [0xf4,0x26,0x61,0x7f]
+
+//----------------------------------------------------------------------
+// Scalar Signed Shift Right and Accumulate (Immediate)
+//----------------------------------------------------------------------
+ ssra d18, d12, #21
+
+// CHECK: ssra d18, d12, #21 // encoding: [0x92,0x15,0x6b,0x5f]
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Shift Right and Accumulate (Immediate)
+//----------------------------------------------------------------------
+ usra d20, d13, #61
+
+// CHECK: usra d20, d13, #61 // encoding: [0xb4,0x15,0x43,0x7f]
+
+//----------------------------------------------------------------------
+// Scalar Signed Rounding Shift Right and Accumulate (Immediate)
+//----------------------------------------------------------------------
+ srsra d15, d11, #19
+
+// CHECK: srsra d15, d11, #19 // encoding: [0x6f,0x35,0x6d,0x5f]
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
+//----------------------------------------------------------------------
+ ursra d18, d10, #13
+
+// CHECK: ursra d18, d10, #13 // encoding: [0x52,0x35,0x73,0x7f]
+
+//----------------------------------------------------------------------
+// Scalar Shift Left (Immediate)
+//----------------------------------------------------------------------
+ shl d7, d10, #12
+
+// CHECK: shl d7, d10, #12 // encoding: [0x47,0x55,0x4c,0x5f]
+
+//----------------------------------------------------------------------
+// Signed Saturating Shift Left (Immediate)
+//----------------------------------------------------------------------
+ sqshl b11, b19, #7
+ sqshl h13, h18, #11
+ sqshl s14, s17, #22
+ sqshl d15, d16, #51
+
+// CHECK: sqshl b11, b19, #7 // encoding: [0x6b,0x76,0x0f,0x5f]
+// CHECK: sqshl h13, h18, #11 // encoding: [0x4d,0x76,0x1b,0x5f]
+// CHECK: sqshl s14, s17, #22 // encoding: [0x2e,0x76,0x36,0x5f]
+// CHECK: sqshl d15, d16, #51 // encoding: [0x0f,0x76,0x73,0x5f]
+
+//----------------------------------------------------------------------
+// Unsigned Saturating Shift Left (Immediate)
+//----------------------------------------------------------------------
+ uqshl b18, b15, #6
+ uqshl h11, h18, #7
+ uqshl s14, s19, #18
+ uqshl d15, d12, #19
+
+// CHECK: uqshl b18, b15, #6 // encoding: [0xf2,0x75,0x0e,0x7f]
+// CHECK: uqshl h11, h18, #7 // encoding: [0x4b,0x76,0x17,0x7f]
+// CHECK: uqshl s14, s19, #18 // encoding: [0x6e,0x76,0x32,0x7f]
+// CHECK: uqshl d15, d12, #19 // encoding: [0x8f,0x75,0x53,0x7f]
+
+//----------------------------------------------------------------------
+// Signed Saturating Shift Left Unsigned (Immediate)
+//----------------------------------------------------------------------
+ sqshlu b15, b18, #6
+ sqshlu h19, h17, #6
+ sqshlu s16, s14, #25
+ sqshlu d11, d13, #32
+
+// CHECK: sqshlu b15, b18, #6 // encoding: [0x4f,0x66,0x0e,0x7f]
+// CHECK: sqshlu h19, h17, #6 // encoding: [0x33,0x66,0x16,0x7f]
+// CHECK: sqshlu s16, s14, #25 // encoding: [0xd0,0x65,0x39,0x7f]
+// CHECK: sqshlu d11, d13, #32 // encoding: [0xab,0x65,0x60,0x7f]
+
+//----------------------------------------------------------------------
+// Shift Right And Insert (Immediate)
+//----------------------------------------------------------------------
+ sri d10, d12, #14
+
+// CHECK: sri d10, d12, #14 // encoding: [0x8a,0x45,0x72,0x7f]
+
+//----------------------------------------------------------------------
+// Shift Left And Insert (Immediate)
+//----------------------------------------------------------------------
+ sli d10, d14, #12
+
+// CHECK: sli d10, d14, #12 // encoding: [0xca,0x55,0x4c,0x7f]
+
+//----------------------------------------------------------------------
+// Signed Saturating Shift Right Narrow (Immediate)
+//----------------------------------------------------------------------
+ sqshrn b10, h15, #5
+ sqshrn h17, s10, #4
+ sqshrn s18, d10, #31
+
+// CHECK: sqshrn b10, h15, #5 // encoding: [0xea,0x95,0x0b,0x5f]
+// CHECK: sqshrn h17, s10, #4 // encoding: [0x51,0x95,0x1c,0x5f]
+// CHECK: sqshrn s18, d10, #31 // encoding: [0x52,0x95,0x21,0x5f]
+
+//----------------------------------------------------------------------
+// Unsigned Saturating Shift Right Narrow (Immediate)
+//----------------------------------------------------------------------
+ uqshrn b12, h10, #7
+ uqshrn h10, s14, #5
+ uqshrn s10, d12, #13
+
+// CHECK: uqshrn b12, h10, #7 // encoding: [0x4c,0x95,0x09,0x7f]
+// CHECK: uqshrn h10, s14, #5 // encoding: [0xca,0x95,0x1b,0x7f]
+// CHECK: uqshrn s10, d12, #13 // encoding: [0x8a,0x95,0x33,0x7f]
+
+//----------------------------------------------------------------------
+// Signed Saturating Rounded Shift Right Narrow (Immediate)
+//----------------------------------------------------------------------
+ sqrshrn b10, h13, #2
+ sqrshrn h15, s10, #6
+ sqrshrn s15, d12, #9
+
+// CHECK: sqrshrn b10, h13, #2 // encoding: [0xaa,0x9d,0x0e,0x5f]
+// CHECK: sqrshrn h15, s10, #6 // encoding: [0x4f,0x9d,0x1a,0x5f]
+// CHECK: sqrshrn s15, d12, #9 // encoding: [0x8f,0x9d,0x37,0x5f]
+
+//----------------------------------------------------------------------
+// Unsigned Saturating Rounded Shift Right Narrow (Immediate)
+//----------------------------------------------------------------------
+ uqrshrn b10, h12, #5
+ uqrshrn h12, s10, #14
+ uqrshrn s10, d10, #25
+
+// CHECK: uqrshrn b10, h12, #5 // encoding: [0x8a,0x9d,0x0b,0x7f]
+// CHECK: uqrshrn h12, s10, #14 // encoding: [0x4c,0x9d,0x12,0x7f]
+// CHECK: uqrshrn s10, d10, #25 // encoding: [0x4a,0x9d,0x27,0x7f]
+
+//----------------------------------------------------------------------
+// Signed Saturating Shift Right Unsigned Narrow (Immediate)
+//----------------------------------------------------------------------
+ sqshrun b15, h10, #7
+ sqshrun h20, s14, #3
+ sqshrun s10, d15, #15
+
+// CHECK: sqshrun b15, h10, #7 // encoding: [0x4f,0x85,0x09,0x7f]
+// CHECK: sqshrun h20, s14, #3 // encoding: [0xd4,0x85,0x1d,0x7f]
+// CHECK: sqshrun s10, d15, #15 // encoding: [0xea,0x85,0x31,0x7f]
+
+//----------------------------------------------------------------------
+// Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
+//----------------------------------------------------------------------
+
+ sqrshrun b17, h10, #6
+ sqrshrun h10, s13, #15
+ sqrshrun s22, d16, #31
+
+// CHECK: sqrshrun b17, h10, #6 // encoding: [0x51,0x8d,0x0a,0x7f]
+// CHECK: sqrshrun h10, s13, #15 // encoding: [0xaa,0x8d,0x11,0x7f]
+// CHECK: sqrshrun s22, d16, #31 // encoding: [0x16,0x8e,0x21,0x7f]
diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt
index 2627b14..c63b65a 100644
--- a/test/MC/Disassembler/AArch64/neon-instructions.txt
+++ b/test/MC/Disassembler/AArch64/neon-instructions.txt
@@ -1793,3 +1793,165 @@
0x52,0x4a,0x21,0x7e
0x34,0x4a,0x61,0x7e
0xd3,0x49,0xa1,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Signed Shift Right (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sshr d15, d16, #12
+0x0f,0x06,0x74,0x5f
+
+#----------------------------------------------------------------------
+# Scalar Unsigned Shift Right (Immediate)
+#----------------------------------------------------------------------
+# CHECK: ushr d10, d17, #18
+0x2a,0x06,0x6e,0x7f
+
+#----------------------------------------------------------------------
+# Scalar Signed Rounding Shift Right (Immediate)
+#----------------------------------------------------------------------
+# CHECK: srshr d19, d18, #7
+0x53,0x26,0x79,0x5f
+
+#----------------------------------------------------------------------
+# Scalar Unigned Rounding Shift Right (Immediate)
+#----------------------------------------------------------------------
+# CHECK: urshr d20, d23, #31
+0xf4,0x26,0x61,0x7f
+
+#----------------------------------------------------------------------
+# Scalar Signed Shift Right and Accumulate (Immediate)
+#----------------------------------------------------------------------
+# CHECK: ssra d18, d12, #21
+0x92,0x15,0x6b,0x5f
+
+#----------------------------------------------------------------------
+# Scalar Unsigned Shift Right and Accumulate (Immediate)
+#----------------------------------------------------------------------
+# CHECK: usra d20, d13, #61
+0xb4,0x15,0x43,0x7f
+
+#----------------------------------------------------------------------
+# Scalar Signed Rounding Shift Right and Accumulate (Immediate)
+#----------------------------------------------------------------------
+# CHECK: srsra d15, d11, #19
+0x6f,0x35,0x6d,0x5f
+
+#----------------------------------------------------------------------
+# Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
+#----------------------------------------------------------------------
+# CHECK: ursra d18, d10, #13
+0x52,0x35,0x73,0x7f
+
+#----------------------------------------------------------------------
+# Scalar Shift Left (Immediate)
+#----------------------------------------------------------------------
+# CHECK: shl d7, d10, #12
+0x47,0x55,0x4c,0x5f
+
+#----------------------------------------------------------------------
+# Signed Saturating Shift Left (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sqshl b11, b19, #7
+# CHECK: sqshl h13, h18, #11
+# CHECK: sqshl s14, s17, #22
+# CHECK: sqshl d15, d16, #51
+0x6b,0x76,0x0f,0x5f
+0x4d,0x76,0x1b,0x5f
+0x2e,0x76,0x36,0x5f
+0x0f,0x76,0x73,0x5f
+
+#----------------------------------------------------------------------
+# Unsigned Saturating Shift Left (Immediate)
+#----------------------------------------------------------------------
+# CHECK: uqshl b18, b15, #6
+# CHECK: uqshl h11, h18, #7
+# CHECK: uqshl s14, s19, #18
+# CHECK: uqshl d15, d12, #19
+0xf2,0x75,0x0e,0x7f
+0x4b,0x76,0x17,0x7f
+0x6e,0x76,0x32,0x7f
+0x8f,0x75,0x53,0x7f
+
+#----------------------------------------------------------------------
+# Signed Saturating Shift Left Unsigned (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sqshlu b15, b18, #6
+# CHECK: sqshlu h19, h17, #6
+# CHECK: sqshlu s16, s14, #25
+# CHECK: sqshlu d11, d13, #32
+0x4f,0x66,0x0e,0x7f
+0x33,0x66,0x16,0x7f
+0xd0,0x65,0x39,0x7f
+0xab,0x65,0x60,0x7f
+
+#----------------------------------------------------------------------
+# Shift Right And Insert (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sri d10, d12, #14
+0x8a,0x45,0x72,0x7f
+
+#----------------------------------------------------------------------
+# Shift Left And Insert (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sli d10, d14, #12
+0xca,0x55,0x4c,0x7f
+
+#----------------------------------------------------------------------
+# Signed Saturating Shift Right Narrow (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sqshrn b10, h15, #5
+# CHECK: sqshrn h17, s10, #4
+# CHECK: sqshrn s18, d10, #31
+0xea,0x95,0x0b,0x5f
+0x51,0x95,0x1c,0x5f
+0x52,0x95,0x21,0x5f
+
+#----------------------------------------------------------------------
+# Unsigned Saturating Shift Right Narrow (Immediate)
+#----------------------------------------------------------------------
+# CHECK: uqshrn b12, h10, #7
+# CHECK: uqshrn h10, s14, #5
+# CHECK: uqshrn s10, d12, #13
+0x4c,0x95,0x09,0x7f
+0xca,0x95,0x1b,0x7f
+0x8a,0x95,0x33,0x7f
+
+#----------------------------------------------------------------------
+# Signed Saturating Rounded Shift Right Narrow (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sqrshrn b10, h13, #2
+# CHECK: sqrshrn h15, s10, #6
+# CHECK: sqrshrn s15, d12, #9
+0xaa,0x9d,0x0e,0x5f
+0x4f,0x9d,0x1a,0x5f
+0x8f,0x9d,0x37,0x5f
+
+#----------------------------------------------------------------------
+# Unsigned Saturating Rounded Shift Right Narrow (Immediate)
+#----------------------------------------------------------------------
+# CHECK: uqrshrn b10, h12, #5
+# CHECK: uqrshrn h12, s10, #14
+# CHECK: uqrshrn s10, d10, #25
+0x8a,0x9d,0x0b,0x7f
+0x4c,0x9d,0x12,0x7f
+0x4a,0x9d,0x27,0x7f
+
+#----------------------------------------------------------------------
+# Signed Saturating Shift Right Unsigned Narrow (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sqshrun b15, h10, #7
+# CHECK: sqshrun h20, s14, #3
+# CHECK: sqshrun s10, d15, #15
+0x4f,0x85,0x09,0x7f
+0xd4,0x85,0x1d,0x7f
+0xea,0x85,0x31,0x7f
+
+#----------------------------------------------------------------------
+# Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sqrshrun b17, h10, #6
+# CHECK: sqrshrun h10, s13, #15
+# CHECK: sqrshrun s22, d16, #31
+0x51,0x8d,0x0a,0x7f
+0xaa,0x8d,0x11,0x7f
+0x16,0x8e,0x21,0x7f