diff options
author | Hao Liu <Hao.Liu@arm.com> | 2013-09-04 09:28:24 +0000 |
---|---|---|
committer | Hao Liu <Hao.Liu@arm.com> | 2013-09-04 09:28:24 +0000 |
commit | 19fdc268c316b3b0bdcb2b558449819f4f402d6a (patch) | |
tree | 7e600b5667c314ab009690568492a55b06b90c1b | |
parent | 886631cc2790cc0143966069e613d933914724b4 (diff) | |
download | external_llvm-19fdc268c316b3b0bdcb2b558449819f4f402d6a.zip external_llvm-19fdc268c316b3b0bdcb2b558449819f4f402d6a.tar.gz external_llvm-19fdc268c316b3b0bdcb2b558449819f4f402d6a.tar.bz2 |
Inplement aarch64 neon instructions in AdvSIMD(shift). About 24 shift instructions:
sshr,ushr,ssra,usra,srshr,urshr,srsra,ursra,sri,shl,sli,sqshlu,sqshl,uqshl,shrn,sqrshrun,sqshrn,uqshr,sqrshrn,uqrshrn,sshll,ushll
and 4 convert instructions:
scvtf,ucvtf,fcvtzs,fcvtzu
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189925 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/IR/IntrinsicsAArch64.td | 28 | ||||
-rw-r--r-- | include/llvm/Target/TargetSelectionDAG.td | 2 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64ISelLowering.cpp | 90 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64ISelLowering.h | 8 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstrNEON.td | 649 | ||||
-rw-r--r-- | lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 31 | ||||
-rw-r--r-- | lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp | 38 | ||||
-rw-r--r-- | lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp | 27 | ||||
-rw-r--r-- | test/CodeGen/AArch64/neon-simd-shift.ll | 1524 | ||||
-rw-r--r-- | test/MC/AArch64/neon-diagnostics.s | 726 | ||||
-rw-r--r-- | test/MC/AArch64/neon-simd-shift.s | 434 | ||||
-rw-r--r-- | test/MC/Disassembler/AArch64/neon-instructions.txt | 395 |
12 files changed, 3911 insertions, 41 deletions
diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td index d7b1947..0a71ea4 100644 --- a/include/llvm/IR/IntrinsicsAArch64.td +++ b/include/llvm/IR/IntrinsicsAArch64.td @@ -38,4 +38,32 @@ def int_aarch64_neon_vpminnm : Neon_2Arg_Intrinsic; // Vector Multiply Extended (Floating Point) def int_aarch64_neon_vmulx : Neon_2Arg_Intrinsic; + +class Neon_N2V_Intrinsic + : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty], + [IntrNoMem]>; +class Neon_N3V_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], + [IntrNoMem]>; +class Neon_N2V_Narrow_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMExtendedElementVectorType<0>, llvm_i32_ty], + [IntrNoMem]>; + +// Vector rounding shift right by immediate (Signed) +def int_aarch64_neon_vsrshr : Neon_N2V_Intrinsic; +def int_aarch64_neon_vurshr : Neon_N2V_Intrinsic; +def int_aarch64_neon_vsqshlu : Neon_N2V_Intrinsic; + +def int_aarch64_neon_vsri : Neon_N3V_Intrinsic; +def int_aarch64_neon_vsli : Neon_N3V_Intrinsic; + +def int_aarch64_neon_vsqshrun : Neon_N2V_Narrow_Intrinsic; +def int_aarch64_neon_vrshrn : Neon_N2V_Narrow_Intrinsic; +def int_aarch64_neon_vsqrshrun : Neon_N2V_Narrow_Intrinsic; +def int_aarch64_neon_vsqshrn : Neon_N2V_Narrow_Intrinsic; +def int_aarch64_neon_vuqshrn : Neon_N2V_Narrow_Intrinsic; +def int_aarch64_neon_vsqrshrn : Neon_N2V_Narrow_Intrinsic; +def int_aarch64_neon_vuqrshrn : Neon_N2V_Narrow_Intrinsic; } diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index 72963da..d94bdc6 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -465,6 +465,8 @@ def vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>, []>; def vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT", SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>; +def concat_vectors : SDNode<"ISD::CONCAT_VECTORS", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1, 2>]>,[]>; // This operator does not do subvector type checking. The ARM // backend, at least, needs it. diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index d12302e..b68c43a 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -77,8 +77,11 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::SHL); + setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); + // AArch64 does not have i1 loads, or much of anything for i1 really. setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); @@ -283,6 +286,8 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal); + setOperationAction(ISD::SETCC, MVT::v8i8, Custom); setOperationAction(ISD::SETCC, MVT::v16i8, Custom); setOperationAction(ISD::SETCC, MVT::v4i16, Custom); @@ -834,6 +839,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { return "AArch64ISD::NEON_CMPZ"; case AArch64ISD::NEON_TST: return "AArch64ISD::NEON_TST"; + case AArch64ISD::NEON_DUPIMM: + return "AArch64ISD::NEON_DUPIMM"; + case AArch64ISD::NEON_QSHLs: + return "AArch64ISD::NEON_QSHLs"; + case AArch64ISD::NEON_QSHLu: + return "AArch64ISD::NEON_QSHLu"; default: return NULL; } @@ -3257,7 +3268,7 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { /// Check if this is a valid build_vector for the immediate operand of /// a vector shift left operation. That value must be in the range: -/// 0 <= Value < ElementBits for a left shift +/// 0 <= Value < ElementBits static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) { assert(VT.isVector() && "vector shift count is not a vector type"); unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); @@ -3266,10 +3277,25 @@ static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) { return (Cnt >= 0 && Cnt < ElementBits); } -static SDValue PerformSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, +/// Check if this is a valid build_vector for the immediate operand of a +/// vector shift right operation. The value must be in the range: +/// 1 <= Value <= ElementBits +static bool isVShiftRImm(SDValue Op, EVT VT, int64_t &Cnt) { + assert(VT.isVector() && "vector shift count is not a vector type"); + unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); + if (!getVShiftImm(Op, ElementBits, Cnt)) + return false; + return (Cnt >= 1 && Cnt <= ElementBits); +} + +/// Checks for immediate versions of vector shifts and lowers them. +static SDValue PerformShiftCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *ST) { SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); + if (N->getOpcode() == ISD::SRA && (VT == MVT::i32 || VT == MVT::i64)) + return PerformSRACombine(N, DCI); // Nothing to be done for scalar shifts. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -3278,10 +3304,54 @@ static SDValue PerformSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI assert(ST->hasNEON() && "unexpected vector shift"); int64_t Cnt; - if (isVShiftLImm(N->getOperand(1), VT, Cnt)) { - SDValue RHS = DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(0)), - VT, DAG.getConstant(Cnt, MVT::i32)); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS); + + switch (N->getOpcode()) { + default: + llvm_unreachable("unexpected shift opcode"); + + case ISD::SHL: + if (isVShiftLImm(N->getOperand(1), VT, Cnt)) { + SDValue RHS = + DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(1)), VT, + DAG.getConstant(Cnt, MVT::i32)); + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS); + } + break; + + case ISD::SRA: + case ISD::SRL: + if (isVShiftRImm(N->getOperand(1), VT, Cnt)) { + SDValue RHS = + DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(1)), VT, + DAG.getConstant(Cnt, MVT::i32)); + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N->getOperand(0), RHS); + } + break; + } + + return SDValue(); +} + +/// ARM-specific DAG combining for intrinsics. +static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { + unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + + switch (IntNo) { + default: + // Don't do anything for most intrinsics. + break; + + case Intrinsic::arm_neon_vqshifts: + case Intrinsic::arm_neon_vqshiftu: + EVT VT = N->getOperand(1).getValueType(); + int64_t Cnt; + if (!isVShiftLImm(N->getOperand(2), VT, Cnt)) + break; + unsigned VShiftOpc = (IntNo == Intrinsic::arm_neon_vqshifts) + ? AArch64ISD::NEON_QSHLs + : AArch64ISD::NEON_QSHLu; + return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0), + N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); } return SDValue(); @@ -3294,8 +3364,12 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N, default: break; case ISD::AND: return PerformANDCombine(N, DCI); case ISD::OR: return PerformORCombine(N, DCI, getSubtarget()); - case ISD::SRA: return PerformSRACombine(N, DCI); - case ISD::SHL: return PerformSHLCombine(N, DCI, getSubtarget()); + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + return PerformShiftCombine(N, DCI, getSubtarget()); + case ISD::INTRINSIC_WO_CHAIN: + return PerformIntrinsicCombine(N, DCI.DAG); } return SDValue(); } diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index c9795b2..7c7d038 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -19,7 +19,7 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" - +#include "llvm/IR/Intrinsics.h" namespace llvm { namespace AArch64ISD { @@ -135,7 +135,11 @@ namespace AArch64ISD { NEON_TST, // Operation for the immediate in vector shift - NEON_DUPIMM + NEON_DUPIMM, + + // Vector saturating shift + NEON_QSHLs, + NEON_QSHLu }; } diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index fb6d654..9712a5a 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -44,6 +44,12 @@ def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2, def Neon_dupImm : SDNode<"AArch64ISD::NEON_DUPIMM", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>>; +def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; +def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>; +def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>; + + //===----------------------------------------------------------------------===// // Multiclasses //===----------------------------------------------------------------------===// @@ -1413,58 +1419,133 @@ def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>; } // Vector Shift (Immediate) - +// Immediate in [0, 63] def imm0_63 : Operand<i32> { let ParserMatchClass = uimm6_asmoperand; } -class N2VShiftLeft<bit q, bit u, bits<5> opcode, string asmop, string T, - RegisterClass VPRC, ValueType Ty, Operand ImmTy> +// Shift Right Immediate - A shift right immediate is encoded differently from +// other shift immediates. The immh:immb field is encoded like so: +// +// Offset Encoding +// 8 immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0> +// 16 immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0> +// 32 immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0> +// 64 immh:immb<6> = '1xxxxxx', <imm> is encoded in immh:immb<5:0> +class shr_imm_asmoperands<string OFFSET> : AsmOperandClass { + let Name = "ShrImm" # OFFSET; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "ShrImm" # OFFSET; +} + +class shr_imm<string OFFSET> : Operand<i32> { + let EncoderMethod = "getShiftRightImm" # OFFSET; + let DecoderMethod = "DecodeShiftRightImm" # OFFSET; + let ParserMatchClass = + !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand"); +} + +def shr_imm8_asmoperand : shr_imm_asmoperands<"8">; +def shr_imm16_asmoperand : shr_imm_asmoperands<"16">; +def shr_imm32_asmoperand : shr_imm_asmoperands<"32">; +def shr_imm64_asmoperand : shr_imm_asmoperands<"64">; + +def shr_imm8 : shr_imm<"8">; +def shr_imm16 : shr_imm<"16">; +def shr_imm32 : shr_imm<"32">; +def shr_imm64 : shr_imm<"64">; + +class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T, + RegisterClass VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode> : NeonI_2VShiftImm<q, u, opcode, (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm), asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", [(set (Ty VPRC:$Rd), - (Ty (shl (Ty VPRC:$Rn), + (Ty (OpNode (Ty VPRC:$Rn), (Ty (Neon_dupImm (i32 imm:$Imm))))))], NoItinerary>; multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> { // 64-bit vector types. - def _8B : N2VShiftLeft<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3> { + def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3, shl> { let Inst{22-19} = 0b0001; // immh:immb = 0001xxx } - def _4H : N2VShiftLeft<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4> { + def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4, shl> { let Inst{22-20} = 0b001; // immh:immb = 001xxxx } - def _2S : N2VShiftLeft<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5> { + def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5, shl> { let Inst{22-21} = 0b01; // immh:immb = 01xxxxx } // 128-bit vector types. - def _16B : N2VShiftLeft<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3> { + def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3, shl> { let Inst{22-19} = 0b0001; // immh:immb = 0001xxx } - def _8H : N2VShiftLeft<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4> { + def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4, shl> { let Inst{22-20} = 0b001; // immh:immb = 001xxxx } - def _4S : N2VShiftLeft<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5> { + def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5, shl> { let Inst{22-21} = 0b01; // immh:immb = 01xxxxx } - def _2D : N2VShiftLeft<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63> { + def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63, shl> { let Inst{22} = 0b1; // immh:immb = 1xxxxxx } } -def Neon_top16B : PatFrag<(ops node:$in), +multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> { + def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, + OpNode> { + let Inst{22} = 0b1; + } +} + +// Shift left +defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">; + +// Shift right +defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>; +defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>; + +def Neon_top16B : PatFrag<(ops node:$in), (extract_subvector (v16i8 node:$in), (iPTR 8))>; -def Neon_top8H : PatFrag<(ops node:$in), +def Neon_top8H : PatFrag<(ops node:$in), (extract_subvector (v8i16 node:$in), (iPTR 4))>; -def Neon_top4S : PatFrag<(ops node:$in), +def Neon_top4S : PatFrag<(ops node:$in), (extract_subvector (v4i32 node:$in), (iPTR 2))>; class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT, @@ -1474,21 +1555,21 @@ class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT, (ins VPR64:$Rn, ImmTy:$Imm), asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm", [(set (DestTy VPR128:$Rd), - (DestTy (shl + (DestTy (shl (DestTy (ExtOp (SrcTy VPR64:$Rn))), (DestTy (Neon_dupImm (i32 imm:$Imm))))))], NoItinerary>; class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT, - string SrcT, ValueType DestTy, ValueType SrcTy, + string SrcT, ValueType DestTy, ValueType SrcTy, int StartIndex, Operand ImmTy, SDPatternOperator ExtOp, PatFrag getTop> : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, ImmTy:$Imm), asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm", - [(set (DestTy VPR128:$Rd), + [(set (DestTy VPR128:$Rd), (DestTy (shl - (DestTy (ExtOp + (DestTy (ExtOp (SrcTy (getTop VPR128:$Rn)))), (DestTy (Neon_dupImm (i32 imm:$Imm))))))], NoItinerary>; @@ -1497,33 +1578,33 @@ multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop, SDNode ExtOp> { // 64-bit vector types. def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8, - uimm3, ExtOp>{ + uimm3, ExtOp> { let Inst{22-19} = 0b0001; // immh:immb = 0001xxx } def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16, - uimm4, ExtOp>{ + uimm4, ExtOp> { let Inst{22-20} = 0b001; // immh:immb = 001xxxx } def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32, - uimm5, ExtOp>{ + uimm5, ExtOp> { let Inst{22-21} = 0b01; // immh:immb = 01xxxxx } // 128-bit vector types def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", - v8i16, v8i8, 8, uimm3, ExtOp, Neon_top16B>{ + v8i16, v8i8, 8, uimm3, ExtOp, Neon_top16B> { let Inst{22-19} = 0b0001; // immh:immb = 0001xxx } def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", - v4i32, v4i16, 4, uimm4, ExtOp, Neon_top8H>{ + v4i32, v4i16, 4, uimm4, ExtOp, Neon_top8H> { let Inst{22-20} = 0b001; // immh:immb = 001xxxx } def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", - v2i64, v2i32, 2, uimm5, ExtOp, Neon_top4S>{ + v2i64, v2i32, 2, uimm5, ExtOp, Neon_top4S> { let Inst{22-21} = 0b01; // immh:immb = 01xxxxx } @@ -1547,13 +1628,521 @@ multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop, (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>; } -// Shift left immediate -defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">; - -// Shift left long immediate +// Shift left long defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>; defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>; +// Rounding/Saturating shift +class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T, + RegisterClass VPRC, ValueType Ty, Operand ImmTy, + SDPatternOperator OpNode> + : NeonI_2VShiftImm<q, u, opcode, + (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm), + asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", + [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn), + (i32 imm:$Imm))))], + NoItinerary>; + +// shift right (vector by immediate) +multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop, + SDPatternOperator OpNode> { + def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, + OpNode> { + let Inst{22} = 0b1; + } +} + +multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop, + SDPatternOperator OpNode> { + // 64-bit vector types. + def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5, + OpNode> { + let Inst{22-21} = 0b01; + } + + // 128-bit vector types. + def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63, + OpNode> { + let Inst{22} = 0b1; + } +} + +// Rounding shift right +defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr", + int_aarch64_neon_vsrshr>; +defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr", + int_aarch64_neon_vurshr>; + +// Saturating shift left unsigned +defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>; + +// Saturating shift left +defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>; +defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>; + +class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T, + RegisterClass VPRC, ValueType Ty, Operand ImmTy, + SDNode OpNode> + : NeonI_2VShiftImm<q, u, opcode, + (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm), + asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", + [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src), + (Ty (OpNode (Ty VPRC:$Rn), + (Ty (Neon_dupImm (i32 imm:$Imm))))))))], + NoItinerary> { + let Constraints = "$src = $Rd"; +} + +// Shift Right accumulate +multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> { + def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, + OpNode> { + let Inst{22} = 0b1; + } +} + +// Shift right and accumulate +defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>; +defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>; + +// Rounding shift accumulate +class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T, + RegisterClass VPRC, ValueType Ty, Operand ImmTy, + SDPatternOperator OpNode> + : NeonI_2VShiftImm<q, u, opcode, + (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm), + asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", + [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src), + (Ty (OpNode (Ty VPRC:$Rn), (i32 imm:$Imm))))))], + NoItinerary> { + let Constraints = "$src = $Rd"; +} + +multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop, + SDPatternOperator OpNode> { + def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, + OpNode> { + let Inst{22} = 0b1; + } +} + +// Rounding shift right and accumulate +defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>; +defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>; + +// Shift insert by immediate +class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T, + RegisterClass VPRC, ValueType Ty, Operand ImmTy, + SDPatternOperator OpNode> + : NeonI_2VShiftImm<q, u, opcode, + (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm), + asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", + [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn), + (i32 imm:$Imm))))], + NoItinerary> { + let Constraints = "$src = $Rd"; +} + +// shift left insert (vector by immediate) +multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> { + def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3, + int_aarch64_neon_vsli> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4, + int_aarch64_neon_vsli> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5, + int_aarch64_neon_vsli> { + let Inst{22-21} = 0b01; + } + + // 128-bit vector types + def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3, + int_aarch64_neon_vsli> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4, + int_aarch64_neon_vsli> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5, + int_aarch64_neon_vsli> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63, + int_aarch64_neon_vsli> { + let Inst{22} = 0b1; + } +} + +// shift right insert (vector by immediate) +multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> { + // 64-bit vector types. + def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, + int_aarch64_neon_vsri> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, + int_aarch64_neon_vsri> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, + int_aarch64_neon_vsri> { + let Inst{22-21} = 0b01; + } + + // 128-bit vector types + def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, + int_aarch64_neon_vsri> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, + int_aarch64_neon_vsri> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, + int_aarch64_neon_vsri> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, + int_aarch64_neon_vsri> { + let Inst{22} = 0b1; + } +} + +// Shift left and insert +defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">; + +// Shift right and insert +defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">; + +class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT, + string SrcT, Operand ImmTy> + : NeonI_2VShiftImm<q, u, opcode, + (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm), + asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm", + [], NoItinerary>; + +class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT, + string SrcT, Operand ImmTy> + : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd), + (ins VPR64:$src, VPR128:$Rn, ImmTy:$Imm), + asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm", + [], NoItinerary> { + let Constraints = "$src = $Rd"; +} + +// left long shift by immediate +multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> { + def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> { + let Inst{22-21} = 0b01; + } + + // Shift Narrow High + def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h", + shr_imm8> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s", + shr_imm16> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d", + shr_imm32> { + let Inst{22-21} = 0b01; + } +} + +// Shift right narrow +defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">; + +// Shift right narrow (prefix Q is saturating, prefix R is rounding) +defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">; +defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">; +defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">; +defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">; +defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">; +defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">; +defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">; + +def Neon_combine : PatFrag<(ops node:$Rm, node:$Rn), + (v2i64 (concat_vectors (v1i64 node:$Rm), + (v1i64 node:$Rn)))>; + +def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs), + (v8i16 (srl (v8i16 node:$lhs), + (v8i16 (Neon_dupImm (i32 node:$rhs)))))>; +def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs), + (v4i32 (srl (v4i32 node:$lhs), + (v4i32 (Neon_dupImm (i32 node:$rhs)))))>; +def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs), + (v2i64 (srl (v2i64 node:$lhs), + (v2i64 (Neon_dupImm (i32 node:$rhs)))))>; +def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs), + (v8i16 (sra (v8i16 node:$lhs), + (v8i16 (Neon_dupImm (i32 node:$rhs)))))>; +def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs), + (v4i32 (sra (v4i32 node:$lhs), + (v4i32 (Neon_dupImm (i32 node:$rhs)))))>; +def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs), + (v2i64 (sra (v2i64 node:$lhs), + (v2i64 (Neon_dupImm (i32 node:$rhs)))))>; + +// Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors) +multiclass Neon_shiftNarrow_patterns<string shr> { + def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn, + imm:$Imm))), + (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>; + def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn, + imm:$Imm))), + (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>; + def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn, + imm:$Imm))), + (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>; + + def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert + (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") + VPR128:$Rn, imm:$Imm)))))), + (SHRNvvi_16B VPR64:$src, VPR128:$Rn, imm:$Imm)>; + def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert + (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") + VPR128:$Rn, imm:$Imm)))))), + (SHRNvvi_8H VPR64:$src, VPR128:$Rn, imm:$Imm)>; + def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert + (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") + VPR128:$Rn, imm:$Imm)))))), + (SHRNvvi_4S VPR64:$src, VPR128:$Rn, imm:$Imm)>; +} + +multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> { + def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm)), + (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>; + def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm)), + (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>; + def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm)), + (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>; + + def : Pat<(Neon_combine (v1i64 VPR64:$src), + (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))), + (!cast<Instruction>(prefix # "_16B") + VPR64:$src, VPR128:$Rn, imm:$Imm)>; + def : Pat<(Neon_combine (v1i64 VPR64:$src), + (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))), + (!cast<Instruction>(prefix # "_8H") + VPR64:$src, VPR128:$Rn, imm:$Imm)>; + def : Pat<(Neon_combine (v1i64 VPR64:$src), + (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))), + (!cast<Instruction>(prefix # "_4S") + VPR64:$src, VPR128:$Rn, imm:$Imm)>; +} + +defm : Neon_shiftNarrow_patterns<"lshr">; +defm : Neon_shiftNarrow_patterns<"ashr">; + +defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">; +defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">; +defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">; +defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">; +defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">; +defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">; +defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">; + +// Convert fix-point and float-pointing +class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T, + RegisterClass VPRC, ValueType DestTy, ValueType SrcTy, + Operand ImmTy, SDPatternOperator IntOp> + : NeonI_2VShiftImm<q, u, opcode, + (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm), + asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", + [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn), + (i32 imm:$Imm))))], + NoItinerary>; + +multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop, + SDPatternOperator IntOp> { + def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32, + shr_imm32, IntOp> { + let Inst{22-21} = 0b01; + } + + def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32, + shr_imm32, IntOp> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64, + shr_imm64, IntOp> { + let Inst{22} = 0b1; + } +} + +multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop, + SDPatternOperator IntOp> { + def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32, + shr_imm32, IntOp> { + let Inst{22-21} = 0b01; + } + + def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32, + shr_imm32, IntOp> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64, + shr_imm64, IntOp> { + let Inst{22} = 0b1; + } +} + +// Convert fixed-point to floating-point +defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf", + int_arm_neon_vcvtfxs2fp>; +defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf", + int_arm_neon_vcvtfxu2fp>; + +// Convert floating-point to fixed-point +defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs", + int_arm_neon_vcvtfp2fxs>; +defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu", + int_arm_neon_vcvtfp2fxu>; + // Scalar Arithmetic class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop> @@ -1726,6 +2315,10 @@ def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>; // ...and scalar bitcasts... +def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>; + def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 (EXTRACT_SUBREG (v8i8 VPR64:$src), sub_64))>; def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 43e91ac..68d4be4 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -664,6 +664,25 @@ public: return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4; } + // if 0 < value <= w, return true + bool isShrFixedWidth(int w) const { + if (!isImm()) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) + return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= w; + } + + bool isShrImm8() const { return isShrFixedWidth(8); } + + bool isShrImm16() const { return isShrFixedWidth(16); } + + bool isShrImm32() const { return isShrFixedWidth(32); } + + bool isShrImm64() const { return isShrFixedWidth(64); } + bool isNeonMovImmShiftLSL() const { if (!isShiftOrExtend()) return false; @@ -2240,6 +2259,18 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_Width64: return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), "expected integer in range [<lsb>, 63]"); + case Match_ShrImm8: + return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [1, 8]"); + case Match_ShrImm16: + return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [1, 16]"); + case Match_ShrImm32: + return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [1, 32]"); + case Match_ShrImm64: + return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [1, 64]"); } llvm_unreachable("Implement any new match types added!"); diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index a88a8e8..5b57b50 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -113,6 +113,18 @@ static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder); + template<int RegWidth> static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, unsigned FullImm, @@ -413,7 +425,33 @@ static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(8 - Val)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(16 - Val)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(32 - Val)); + return MCDisassembler::Success; +} +static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(64 - Val)); + return MCDisassembler::Success; +} template<int RegWidth> static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index b9770b3..7bfaecc 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -59,6 +59,14 @@ public: unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getShiftRightImm8(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getShiftRightImm16(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getShiftRightImm32(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getShiftRightImm64(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; // Labels are handled mostly the same way: a symbol is needed, and // just gets some fixup attached. @@ -310,6 +318,25 @@ AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx, return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6; } +unsigned AArch64MCCodeEmitter::getShiftRightImm8( + const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const { + return 8 - MI.getOperand(Op).getImm(); +} + +unsigned AArch64MCCodeEmitter::getShiftRightImm16( + const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const { + return 16 - MI.getOperand(Op).getImm(); +} + +unsigned AArch64MCCodeEmitter::getShiftRightImm32( + const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const { + return 32 - MI.getOperand(Op).getImm(); +} + +unsigned AArch64MCCodeEmitter::getShiftRightImm64( + const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const { + return 64 - MI.getOperand(Op).getImm(); +} template<AArch64::Fixups fixupDesired> unsigned AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI, diff --git a/test/CodeGen/AArch64/neon-simd-shift.ll b/test/CodeGen/AArch64/neon-simd-shift.ll new file mode 100644 index 0000000..19d1b21 --- /dev/null +++ b/test/CodeGen/AArch64/neon-simd-shift.ll @@ -0,0 +1,1524 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) { +; CHECK: test_vshr_n_s8 +; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 + %vshr_n = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> + ret <8 x i8> %vshr_n +} + +define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) { +; CHECK: test_vshr_n_s16 +; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 + %vshr_n = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3> + ret <4 x i16> %vshr_n +} + +define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) { +; CHECK: test_vshr_n_s32 +; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 + %vshr_n = ashr <2 x i32> %a, <i32 3, i32 3> + ret <2 x i32> %vshr_n +} + +define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) { +; CHECK: test_vshrq_n_s8 +; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 + %vshr_n = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> + ret <16 x i8> %vshr_n +} + +define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) { +; CHECK: test_vshrq_n_s16 +; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 + %vshr_n = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> + ret <8 x i16> %vshr_n +} + +define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) { +; CHECK: test_vshrq_n_s32 +; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 + %vshr_n = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3> + ret <4 x i32> %vshr_n +} + +define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) { +; CHECK: test_vshrq_n_s64 +; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 + %vshr_n = ashr <2 x i64> %a, <i64 3, i64 3> + ret <2 x i64> %vshr_n +} + +define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) { +; CHECK: test_vshr_n_u8 +; CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 + %vshr_n = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> + ret <8 x i8> %vshr_n +} + +define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) { +; CHECK: test_vshr_n_u16 +; CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 + %vshr_n = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3> + ret <4 x i16> %vshr_n +} + +define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) { +; CHECK: test_vshr_n_u32 +; CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 + %vshr_n = lshr <2 x i32> %a, <i32 3, i32 3> + ret <2 x i32> %vshr_n +} + +define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) { +; CHECK: test_vshrq_n_u8 +; CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 + %vshr_n = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> + ret <16 x i8> %vshr_n +} + +define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) { +; CHECK: test_vshrq_n_u16 +; CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 + %vshr_n = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> + ret <8 x i16> %vshr_n +} + +define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) { +; CHECK: test_vshrq_n_u32 +; CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 + %vshr_n = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3> + ret <4 x i32> %vshr_n +} + +define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) { +; CHECK: test_vshrq_n_u64 +; CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 + %vshr_n = lshr <2 x i64> %a, <i64 3, i64 3> + ret <2 x i64> %vshr_n +} + +define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vsra_n_s8 +; CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 + %vsra_n = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> + %1 = add <8 x i8> %vsra_n, %a + ret <8 x i8> %1 +} + +define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vsra_n_s16 +; CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 + %vsra_n = ashr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3> + %1 = add <4 x i16> %vsra_n, %a + ret <4 x i16> %1 +} + +define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vsra_n_s32 +; CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 + %vsra_n = ashr <2 x i32> %b, <i32 3, i32 3> + %1 = add <2 x i32> %vsra_n, %a + ret <2 x i32> %1 +} + +define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vsraq_n_s8 +; CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 + %vsra_n = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> + %1 = add <16 x i8> %vsra_n, %a + ret <16 x i8> %1 +} + +define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vsraq_n_s16 +; CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 + %vsra_n = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> + %1 = add <8 x i16> %vsra_n, %a + ret <8 x i16> %1 +} + +define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vsraq_n_s32 +; CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 + %vsra_n = ashr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3> + %1 = add <4 x i32> %vsra_n, %a + ret <4 x i32> %1 +} + +define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vsraq_n_s64 +; CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 + %vsra_n = ashr <2 x i64> %b, <i64 3, i64 3> + %1 = add <2 x i64> %vsra_n, %a + ret <2 x i64> %1 +} + +define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vsra_n_u8 +; CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 + %vsra_n = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> + %1 = add <8 x i8> %vsra_n, %a + ret <8 x i8> %1 +} + +define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vsra_n_u16 +; CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 + %vsra_n = lshr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3> + %1 = add <4 x i16> %vsra_n, %a + ret <4 x i16> %1 +} + +define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vsra_n_u32 +; CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 + %vsra_n = lshr <2 x i32> %b, <i32 3, i32 3> + %1 = add <2 x i32> %vsra_n, %a + ret <2 x i32> %1 +} + +define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vsraq_n_u8 +; CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 + %vsra_n = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> + %1 = add <16 x i8> %vsra_n, %a + ret <16 x i8> %1 +} + +define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vsraq_n_u16 +; CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 + %vsra_n = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> + %1 = add <8 x i16> %vsra_n, %a + ret <8 x i16> %1 +} + +define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vsraq_n_u32 +; CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 + %vsra_n = lshr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3> + %1 = add <4 x i32> %vsra_n, %a + ret <4 x i32> %1 +} + +define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vsraq_n_u64 +; CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 + %vsra_n = lshr <2 x i64> %b, <i64 3, i64 3> + %1 = add <2 x i64> %vsra_n, %a + ret <2 x i64> %1 +} + +define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) { +; CHECK: test_vrshr_n_s8 +; CHECK: srshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 + %vrshr_n = tail call <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8> %a, i32 3) + ret <8 x i8> %vrshr_n +} + + +define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) { +; CHECK: test_vrshr_n_s16 +; CHECK: srshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 + %vrshr_n = tail call <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16> %a, i32 3) + ret <4 x i16> %vrshr_n +} + + +define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) { +; CHECK: test_vrshr_n_s32 +; CHECK: srshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 + %vrshr_n = tail call <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32> %a, i32 3) + ret <2 x i32> %vrshr_n +} + + +define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) { +; CHECK: test_vrshrq_n_s8 +; CHECK: srshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 + %vrshr_n = tail call <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8> %a, i32 3) + ret <16 x i8> %vrshr_n +} + + +define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) { +; CHECK: test_vrshrq_n_s16 +; CHECK: srshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 + %vrshr_n = tail call <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16> %a, i32 3) + ret <8 x i16> %vrshr_n +} + + +define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) { +; CHECK: test_vrshrq_n_s32 +; CHECK: srshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 + %vrshr_n = tail call <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32> %a, i32 3) + ret <4 x i32> %vrshr_n +} + + +define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) { +; CHECK: test_vrshrq_n_s64 +; CHECK: srshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 + %vrshr_n = tail call <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64> %a, i32 3) + ret <2 x i64> %vrshr_n +} + + +define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) { +; CHECK: test_vrshr_n_u8 +; CHECK: urshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 + %vrshr_n = tail call <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8> %a, i32 3) + ret <8 x i8> %vrshr_n +} + + +define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) { +; CHECK: test_vrshr_n_u16 +; CHECK: urshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 + %vrshr_n = tail call <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16> %a, i32 3) + ret <4 x i16> %vrshr_n +} + + +define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) { +; CHECK: test_vrshr_n_u32 +; CHECK: urshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 + %vrshr_n = tail call <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32> %a, i32 3) + ret <2 x i32> %vrshr_n +} + + +define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) { +; CHECK: test_vrshrq_n_u8 +; CHECK: urshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 + %vrshr_n = tail call <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8> %a, i32 3) + ret <16 x i8> %vrshr_n +} + + +define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) { +; CHECK: test_vrshrq_n_u16 +; CHECK: urshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 + %vrshr_n = tail call <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16> %a, i32 3) + ret <8 x i16> %vrshr_n +} + + +define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) { +; CHECK: test_vrshrq_n_u32 +; CHECK: urshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 + %vrshr_n = tail call <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32> %a, i32 3) + ret <4 x i32> %vrshr_n +} + + +define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) { +; CHECK: test_vrshrq_n_u64 +; CHECK: urshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 + %vrshr_n = tail call <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64> %a, i32 3) + ret <2 x i64> %vrshr_n +} + + +define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vrsra_n_s8 +; CHECK: srsra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 + %1 = tail call <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8> %b, i32 3) + %vrsra_n = add <8 x i8> %1, %a + ret <8 x i8> %vrsra_n +} + +define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vrsra_n_s16 +; CHECK: srsra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 + %1 = tail call <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16> %b, i32 3) + %vrsra_n = add <4 x i16> %1, %a + ret <4 x i16> %vrsra_n +} + +define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vrsra_n_s32 +; CHECK: srsra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 + %1 = tail call <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32> %b, i32 3) + %vrsra_n = add <2 x i32> %1, %a + ret <2 x i32> %vrsra_n +} + +define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vrsraq_n_s8 +; CHECK: srsra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 + %1 = tail call <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8> %b, i32 3) + %vrsra_n = add <16 x i8> %1, %a + ret <16 x i8> %vrsra_n +} + +define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vrsraq_n_s16 +; CHECK: srsra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 + %1 = tail call <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16> %b, i32 3) + %vrsra_n = add <8 x i16> %1, %a + ret <8 x i16> %vrsra_n +} + +define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vrsraq_n_s32 +; CHECK: srsra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 + %1 = tail call <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32> %b, i32 3) + %vrsra_n = add <4 x i32> %1, %a + ret <4 x i32> %vrsra_n +} + +define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vrsraq_n_s64 +; CHECK: srsra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 + %1 = tail call <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64> %b, i32 3) + %vrsra_n = add <2 x i64> %1, %a + ret <2 x i64> %vrsra_n +} + +define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vrsra_n_u8 +; CHECK: ursra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 + %1 = tail call <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8> %b, i32 3) + %vrsra_n = add <8 x i8> %1, %a + ret <8 x i8> %vrsra_n +} + +define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vrsra_n_u16 +; CHECK: ursra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 + %1 = tail call <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16> %b, i32 3) + %vrsra_n = add <4 x i16> %1, %a + ret <4 x i16> %vrsra_n +} + +define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vrsra_n_u32 +; CHECK: ursra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 + %1 = tail call <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32> %b, i32 3) + %vrsra_n = add <2 x i32> %1, %a + ret <2 x i32> %vrsra_n +} + +define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vrsraq_n_u8 +; CHECK: ursra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 + %1 = tail call <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8> %b, i32 3) + %vrsra_n = add <16 x i8> %1, %a + ret <16 x i8> %vrsra_n +} + +define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vrsraq_n_u16 +; CHECK: ursra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 + %1 = tail call <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16> %b, i32 3) + %vrsra_n = add <8 x i16> %1, %a + ret <8 x i16> %vrsra_n +} + +define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vrsraq_n_u32 +; CHECK: ursra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 + %1 = tail call <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32> %b, i32 3) + %vrsra_n = add <4 x i32> %1, %a + ret <4 x i32> %vrsra_n +} + +define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vrsraq_n_u64 +; CHECK: ursra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 + %1 = tail call <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64> %b, i32 3) + %vrsra_n = add <2 x i64> %1, %a + ret <2 x i64> %vrsra_n +} + +define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vsri_n_s8 +; CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 + %vsri_n = tail call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) + ret <8 x i8> %vsri_n +} + + +define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vsri_n_s16 +; CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 + %vsri = tail call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %a, <4 x i16> %b, i32 3) + ret <4 x i16> %vsri +} + + +define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vsri_n_s32 +; CHECK: sri {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 + %vsri = tail call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> %a, <2 x i32> %b, i32 3) + ret <2 x i32> %vsri +} + + +define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vsriq_n_s8 +; CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 + %vsri_n = tail call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) + ret <16 x i8> %vsri_n +} + + +define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vsriq_n_s16 +; CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 + %vsri = tail call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3) + ret <8 x i16> %vsri +} + + +define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vsriq_n_s32 +; CHECK: sri {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 + %vsri = tail call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> %a, <4 x i32> %b, i32 3) + ret <4 x i32> %vsri +} + + +define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vsriq_n_s64 +; CHECK: sri {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 + %vsri = tail call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> %a, <2 x i64> %b, i32 3) + ret <2 x i64> %vsri +} + +define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vsri_n_p8 +; CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 + %vsri_n = tail call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) + ret <8 x i8> %vsri_n +} + +define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vsri_n_p16 +; CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15 + %vsri = tail call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %a, <4 x i16> %b, i32 15) + ret <4 x i16> %vsri +} + +define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vsriq_n_p8 +; CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 + %vsri_n = tail call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) + ret <16 x i8> %vsri_n +} + +define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vsriq_n_p16 +; CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15 + %vsri = tail call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 15) + ret <8 x i16> %vsri +} + +define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vsli_n_s8 +; CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 + %vsli_n = tail call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) + ret <8 x i8> %vsli_n +} + +define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vsli_n_s16 +; CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 + %vsli = tail call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %a, <4 x i16> %b, i32 3) + ret <4 x i16> %vsli +} + +define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vsli_n_s32 +; CHECK: sli {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 + %vsli = tail call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %a, <2 x i32> %b, i32 3) + ret <2 x i32> %vsli +} + +define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vsliq_n_s8 +; CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 + %vsli_n = tail call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) + ret <16 x i8> %vsli_n +} + +define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vsliq_n_s16 +; CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 + %vsli = tail call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3) + ret <8 x i16> %vsli +} + +define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vsliq_n_s32 +; CHECK: sli {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 + %vsli = tail call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %a, <4 x i32> %b, i32 3) + ret <4 x i32> %vsli +} + +define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vsliq_n_s64 +; CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 + %vsli = tail call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %a, <2 x i64> %b, i32 3) + ret <2 x i64> %vsli +} + +define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vsli_n_p8 +; CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 + %vsli_n = tail call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) + ret <8 x i8> %vsli_n +} + +define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vsli_n_p16 +; CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15 + %vsli = tail call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %a, <4 x i16> %b, i32 15) + ret <4 x i16> %vsli +} + +define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vsliq_n_p8 +; CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 + %vsli_n = tail call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) + ret <16 x i8> %vsli_n +} + +define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vsliq_n_p16 +; CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15 + %vsli = tail call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 15) + ret <8 x i16> %vsli +} + +define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) { +; CHECK: test_vqshl_n_s8 +; CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 + %vqshl = tail call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>) + ret <8 x i8> %vqshl +} + + +define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) { +; CHECK: test_vqshl_n_s16 +; CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 + %vqshl = tail call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>) + ret <4 x i16> %vqshl +} + + +define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) { +; CHECK: test_vqshl_n_s32 +; CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 + %vqshl = tail call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> <i32 3, i32 3>) + ret <2 x i32> %vqshl +} + + +define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) { +; CHECK: test_vqshlq_n_s8 +; CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 + %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>) + ret <16 x i8> %vqshl_n +} + + +define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) { +; CHECK: test_vqshlq_n_s16 +; CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 + %vqshl = tail call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) + ret <8 x i16> %vqshl +} + + +define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) { +; CHECK: test_vqshlq_n_s32 +; CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 + %vqshl = tail call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) + ret <4 x i32> %vqshl +} + + +define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) { +; CHECK: test_vqshlq_n_s64 +; CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 + %vqshl = tail call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> <i64 3, i64 3>) + ret <2 x i64> %vqshl +} + + +define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) { +; CHECK: test_vqshl_n_u8 +; CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 + %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>) + ret <8 x i8> %vqshl_n +} + + +define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) { +; CHECK: test_vqshl_n_u16 +; CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 + %vqshl = tail call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>) + ret <4 x i16> %vqshl +} + + +define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) { +; CHECK: test_vqshl_n_u32 +; CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 + %vqshl = tail call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> <i32 3, i32 3>) + ret <2 x i32> %vqshl +} + + +define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) { +; CHECK: test_vqshlq_n_u8 +; CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 + %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>) + ret <16 x i8> %vqshl_n +} + + +define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) { +; CHECK: test_vqshlq_n_u16 +; CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 + %vqshl = tail call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) + ret <8 x i16> %vqshl +} + + +define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) { +; CHECK: test_vqshlq_n_u32 +; CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 + %vqshl = tail call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) + ret <4 x i32> %vqshl +} + + +define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) { +; CHECK: test_vqshlq_n_u64 +; CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 + %vqshl = tail call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> <i64 3, i64 3>) + ret <2 x i64> %vqshl +} + +define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) { +; CHECK: test_vqshlu_n_s8 +; CHECK: sqshlu {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 + %vqshlu = tail call <8 x i8> @llvm.aarch64.neon.vsqshlu.v8i8(<8 x i8> %a, i32 3) + ret <8 x i8> %vqshlu +} + + +define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) { +; CHECK: test_vqshlu_n_s16 +; CHECK: sqshlu {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 + %vqshlu = tail call <4 x i16> @llvm.aarch64.neon.vsqshlu.v4i16(<4 x i16> %a, i32 3) + ret <4 x i16> %vqshlu +} + + +define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) { +; CHECK: test_vqshlu_n_s32 +; CHECK: sqshlu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 + %vqshlu = tail call <2 x i32> @llvm.aarch64.neon.vsqshlu.v2i32(<2 x i32> %a, i32 3) + ret <2 x i32> %vqshlu +} + + +define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) { +; CHECK: test_vqshluq_n_s8 +; CHECK: sqshlu {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 + %vqshlu = tail call <16 x i8> @llvm.aarch64.neon.vsqshlu.v16i8(<16 x i8> %a, i32 3) + ret <16 x i8> %vqshlu +} + + +define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) { +; CHECK: test_vqshluq_n_s16 +; CHECK: sqshlu {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 + %vqshlu = tail call <8 x i16> @llvm.aarch64.neon.vsqshlu.v8i16(<8 x i16> %a, i32 3) + ret <8 x i16> %vqshlu +} + + +define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) { +; CHECK: test_vqshluq_n_s32 +; CHECK: sqshlu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 + %vqshlu = tail call <4 x i32> @llvm.aarch64.neon.vsqshlu.v4i32(<4 x i32> %a, i32 3) + ret <4 x i32> %vqshlu +} + + +define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) { +; CHECK: test_vqshluq_n_s64 +; CHECK: sqshlu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 + %vqshlu = tail call <2 x i64> @llvm.aarch64.neon.vsqshlu.v2i64(<2 x i64> %a, i32 3) + ret <2 x i64> %vqshlu +} + + +define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) { +; CHECK: test_vshrn_n_s16 +; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 + %1 = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> + %vshrn_n = trunc <8 x i16> %1 to <8 x i8> + ret <8 x i8> %vshrn_n +} + +define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) { +; CHECK: test_vshrn_n_s32 +; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 + %1 = ashr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9> + %vshrn_n = trunc <4 x i32> %1 to <4 x i16> + ret <4 x i16> %vshrn_n +} + +define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) { +; CHECK: test_vshrn_n_s64 +; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 + %1 = ashr <2 x i64> %a, <i64 19, i64 19> + %vshrn_n = trunc <2 x i64> %1 to <2 x i32> + ret <2 x i32> %vshrn_n +} + +define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) { +; CHECK: test_vshrn_n_u16 +; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 + %1 = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> + %vshrn_n = trunc <8 x i16> %1 to <8 x i8> + ret <8 x i8> %vshrn_n +} + +define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) { +; CHECK: test_vshrn_n_u32 +; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 + %1 = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9> + %vshrn_n = trunc <4 x i32> %1 to <4 x i16> + ret <4 x i16> %vshrn_n +} + +define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) { +; CHECK: test_vshrn_n_u64 +; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 + %1 = lshr <2 x i64> %a, <i64 19, i64 19> + %vshrn_n = trunc <2 x i64> %1 to <2 x i32> + ret <2 x i32> %vshrn_n +} + +define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { +; CHECK: test_vshrn_high_n_s16 +; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 + %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> + %vshrn_n = trunc <8 x i16> %1 to <8 x i8> + %2 = bitcast <8 x i8> %a to <1 x i64> + %3 = bitcast <8 x i8> %vshrn_n to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1> + %4 = bitcast <2 x i64> %shuffle.i to <16 x i8> + ret <16 x i8> %4 +} + +define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { +; CHECK: test_vshrn_high_n_s32 +; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 + %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9> + %vshrn_n = trunc <4 x i32> %1 to <4 x i16> + %2 = bitcast <4 x i16> %a to <1 x i64> + %3 = bitcast <4 x i16> %vshrn_n to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1> + %4 = bitcast <2 x i64> %shuffle.i to <8 x i16> + ret <8 x i16> %4 +} + +define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { +; CHECK: test_vshrn_high_n_s64 +; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 + %1 = bitcast <2 x i32> %a to <1 x i64> + %2 = ashr <2 x i64> %b, <i64 19, i64 19> + %vshrn_n = trunc <2 x i64> %2 to <2 x i32> + %3 = bitcast <2 x i32> %vshrn_n to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1> + %4 = bitcast <2 x i64> %shuffle.i to <4 x i32> + ret <4 x i32> %4 +} + +define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) { +; CHECK: test_vshrn_high_n_u16 +; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 + %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> + %vshrn_n = trunc <8 x i16> %1 to <8 x i8> + %2 = bitcast <8 x i8> %a to <1 x i64> + %3 = bitcast <8 x i8> %vshrn_n to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1> + %4 = bitcast <2 x i64> %shuffle.i to <16 x i8> + ret <16 x i8> %4 +} + +define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) { +; CHECK: test_vshrn_high_n_u32 +; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 + %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9> + %vshrn_n = trunc <4 x i32> %1 to <4 x i16> + %2 = bitcast <4 x i16> %a to <1 x i64> + %3 = bitcast <4 x i16> %vshrn_n to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1> + %4 = bitcast <2 x i64> %shuffle.i to <8 x i16> + ret <8 x i16> %4 +} + +define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) { +; CHECK: test_vshrn_high_n_u64 +; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 + %1 = bitcast <2 x i32> %a to <1 x i64> + %2 = lshr <2 x i64> %b, <i64 19, i64 19> + %vshrn_n = trunc <2 x i64> %2 to <2 x i32> + %3 = bitcast <2 x i32> %vshrn_n to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1> + %4 = bitcast <2 x i64> %shuffle.i to <4 x i32> + ret <4 x i32> %4 +} + +define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) { +; CHECK: test_vqshrun_n_s16 +; CHECK: sqshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 + %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16> %a, i32 3) + ret <8 x i8> %vqshrun +} + + +define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) { +; CHECK: test_vqshrun_n_s32 +; CHECK: sqshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 + %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32> %a, i32 9) + ret <4 x i16> %vqshrun +} + +define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) { +; CHECK: test_vqshrun_n_s64 +; CHECK: sqshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 + %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64> %a, i32 19) + ret <2 x i32> %vqshrun +} + +define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) { +; CHECK: test_vqshrun_high_n_s16 +; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 + %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16> %b, i32 3) + %1 = bitcast <8 x i8> %a to <1 x i64> + %2 = bitcast <8 x i8> %vqshrun to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> + ret <16 x i8> %3 +} + +define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) { +; CHECK: test_vqshrun_high_n_s32 +; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 + %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32> %b, i32 9) + %1 = bitcast <4 x i16> %a to <1 x i64> + %2 = bitcast <4 x i16> %vqshrun to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> + ret <8 x i16> %3 +} + +define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) { +; CHECK: test_vqshrun_high_n_s64 +; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 + %1 = bitcast <2 x i32> %a to <1 x i64> + %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64> %b, i32 19) + %2 = bitcast <2 x i32> %vqshrun to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> + ret <4 x i32> %3 +} + +define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) { +; CHECK: test_vrshrn_n_s16 +; CHECK: rshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 + %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16> %a, i32 3) + ret <8 x i8> %vrshrn +} + + +define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) { +; CHECK: test_vrshrn_n_s32 +; CHECK: rshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 + %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32> %a, i32 9) + ret <4 x i16> %vrshrn +} + + +define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) { +; CHECK: test_vrshrn_n_s64 +; CHECK: rshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 + %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64> %a, i32 19) + ret <2 x i32> %vrshrn +} + +define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { +; CHECK: test_vrshrn_high_n_s16 +; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 + %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16> %b, i32 3) + %1 = bitcast <8 x i8> %a to <1 x i64> + %2 = bitcast <8 x i8> %vrshrn to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> + ret <16 x i8> %3 +} + +define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { +; CHECK: test_vrshrn_high_n_s32 +; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 + %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32> %b, i32 9) + %1 = bitcast <4 x i16> %a to <1 x i64> + %2 = bitcast <4 x i16> %vrshrn to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> + ret <8 x i16> %3 +} + +define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { +; CHECK: test_vrshrn_high_n_s64 +; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 + %1 = bitcast <2 x i32> %a to <1 x i64> + %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64> %b, i32 19) + %2 = bitcast <2 x i32> %vrshrn to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> + ret <4 x i32> %3 +} + +define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) { +; CHECK: test_vqrshrun_n_s16 +; CHECK: sqrshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 + %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16> %a, i32 3) + ret <8 x i8> %vqrshrun +} + +define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) { +; CHECK: test_vqrshrun_n_s32 +; CHECK: sqrshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 + %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32> %a, i32 9) + ret <4 x i16> %vqrshrun +} + +define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) { +; CHECK: test_vqrshrun_n_s64 +; CHECK: sqrshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 + %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64> %a, i32 19) + ret <2 x i32> %vqrshrun +} + +define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) { +; CHECK: test_vqrshrun_high_n_s16 +; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 + %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16> %b, i32 3) + %1 = bitcast <8 x i8> %a to <1 x i64> + %2 = bitcast <8 x i8> %vqrshrun to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> + ret <16 x i8> %3 +} + +define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) { +; CHECK: test_vqrshrun_high_n_s32 +; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 + %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32> %b, i32 9) + %1 = bitcast <4 x i16> %a to <1 x i64> + %2 = bitcast <4 x i16> %vqrshrun to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> + ret <8 x i16> %3 +} + +define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) { +; CHECK: test_vqrshrun_high_n_s64 +; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 + %1 = bitcast <2 x i32> %a to <1 x i64> + %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64> %b, i32 19) + %2 = bitcast <2 x i32> %vqrshrun to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> + ret <4 x i32> %3 +} + +define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) { +; CHECK: test_vqshrn_n_s16 +; CHECK: sqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 + %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16> %a, i32 3) + ret <8 x i8> %vqshrn +} + + +define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) { +; CHECK: test_vqshrn_n_s32 +; CHECK: sqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 + %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32> %a, i32 9) + ret <4 x i16> %vqshrn +} + + +define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) { +; CHECK: test_vqshrn_n_s64 +; CHECK: sqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 + %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64> %a, i32 19) + ret <2 x i32> %vqshrn +} + + +define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) { +; CHECK: test_vqshrn_n_u16 +; CHECK: uqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 + %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16> %a, i32 3) + ret <8 x i8> %vqshrn +} + + +define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) { +; CHECK: test_vqshrn_n_u32 +; CHECK: uqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 + %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32> %a, i32 9) + ret <4 x i16> %vqshrn +} + + +define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) { +; CHECK: test_vqshrn_n_u64 +; CHECK: uqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 + %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64> %a, i32 19) + ret <2 x i32> %vqshrn +} + + +define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { +; CHECK: test_vqshrn_high_n_s16 +; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 + %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16> %b, i32 3) + %1 = bitcast <8 x i8> %a to <1 x i64> + %2 = bitcast <8 x i8> %vqshrn to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> + ret <16 x i8> %3 +} + +define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { +; CHECK: test_vqshrn_high_n_s32 +; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 + %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32> %b, i32 9) + %1 = bitcast <4 x i16> %a to <1 x i64> + %2 = bitcast <4 x i16> %vqshrn to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> + ret <8 x i16> %3 +} + +define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { +; CHECK: test_vqshrn_high_n_s64 +; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 + %1 = bitcast <2 x i32> %a to <1 x i64> + %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64> %b, i32 19) + %2 = bitcast <2 x i32> %vqshrn to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> + ret <4 x i32> %3 +} + +define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) { +; CHECK: test_vqshrn_high_n_u16 +; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 + %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16> %b, i32 3) + %1 = bitcast <8 x i8> %a to <1 x i64> + %2 = bitcast <8 x i8> %vqshrn to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> + ret <16 x i8> %3 +} + +define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) { +; CHECK: test_vqshrn_high_n_u32 +; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 + %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32> %b, i32 9) + %1 = bitcast <4 x i16> %a to <1 x i64> + %2 = bitcast <4 x i16> %vqshrn to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> + ret <8 x i16> %3 +} + +define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) { +; CHECK: test_vqshrn_high_n_u64 +; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 + %1 = bitcast <2 x i32> %a to <1 x i64> + %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64> %b, i32 19) + %2 = bitcast <2 x i32> %vqshrn to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> + ret <4 x i32> %3 +} + +define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) { +; CHECK: test_vqrshrn_n_s16 +; CHECK: sqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 + %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16> %a, i32 3) + ret <8 x i8> %vqrshrn +} + + +define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) { +; CHECK: test_vqrshrn_n_s32 +; CHECK: sqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 + %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32> %a, i32 9) + ret <4 x i16> %vqrshrn +} + + +define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) { +; CHECK: test_vqrshrn_n_s64 +; CHECK: sqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 + %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64> %a, i32 19) + ret <2 x i32> %vqrshrn +} + + +define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) { +; CHECK: test_vqrshrn_n_u16 +; CHECK: uqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 + %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16> %a, i32 3) + ret <8 x i8> %vqrshrn +} + + +define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) { +; CHECK: test_vqrshrn_n_u32 +; CHECK: uqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 + %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32> %a, i32 9) + ret <4 x i16> %vqrshrn +} + + +define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) { +; CHECK: test_vqrshrn_n_u64 +; CHECK: uqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 + %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64> %a, i32 19) + ret <2 x i32> %vqrshrn +} + + +define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { +; CHECK: test_vqrshrn_high_n_s16 +; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 + %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16> %b, i32 3) + %1 = bitcast <8 x i8> %a to <1 x i64> + %2 = bitcast <8 x i8> %vqrshrn to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> + ret <16 x i8> %3 +} + +define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { +; CHECK: test_vqrshrn_high_n_s32 +; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 + %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32> %b, i32 9) + %1 = bitcast <4 x i16> %a to <1 x i64> + %2 = bitcast <4 x i16> %vqrshrn to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> + ret <8 x i16> %3 +} + +define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { +; CHECK: test_vqrshrn_high_n_s64 +; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 + %1 = bitcast <2 x i32> %a to <1 x i64> + %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64> %b, i32 19) + %2 = bitcast <2 x i32> %vqrshrn to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> + ret <4 x i32> %3 +} + +define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) { +; CHECK: test_vqrshrn_high_n_u16 +; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 + %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16> %b, i32 3) + %1 = bitcast <8 x i8> %a to <1 x i64> + %2 = bitcast <8 x i8> %vqrshrn to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> + ret <16 x i8> %3 +} + +define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) { +; CHECK: test_vqrshrn_high_n_u32 +; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 + %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32> %b, i32 9) + %1 = bitcast <4 x i16> %a to <1 x i64> + %2 = bitcast <4 x i16> %vqrshrn to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> + ret <8 x i16> %3 +} + +define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) { +; CHECK: test_vqrshrn_high_n_u64 +; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 + %1 = bitcast <2 x i32> %a to <1 x i64> + %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64> %b, i32 19) + %2 = bitcast <2 x i32> %vqrshrn to <1 x i64> + %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> + %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> + ret <4 x i32> %3 +} + +define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) { +; CHECK: test_vcvt_n_f32_s32 +; CHECK: scvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31 + %vcvt = tail call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %a, i32 31) + ret <2 x float> %vcvt +} + +define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) { +; CHECK: test_vcvtq_n_f32_s32 +; CHECK: scvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31 + %vcvt = tail call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %a, i32 31) + ret <4 x float> %vcvt +} + +define <2 x double> @test_vcvtq_n_f64_s64(<2 x i64> %a) { +; CHECK: test_vcvtq_n_f64_s64 +; CHECK: scvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50 + %vcvt = tail call <2 x double> @llvm.arm.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %a, i32 50) + ret <2 x double> %vcvt +} + +define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) { +; CHECK: test_vcvt_n_f32_u32 +; CHECK: ucvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31 + %vcvt = tail call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %a, i32 31) + ret <2 x float> %vcvt +} + +define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) { +; CHECK: test_vcvtq_n_f32_u32 +; CHECK: ucvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31 + %vcvt = tail call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %a, i32 31) + ret <4 x float> %vcvt +} + +define <2 x double> @test_vcvtq_n_f64_u64(<2 x i64> %a) { +; CHECK: test_vcvtq_n_f64_u64 +; CHECK: ucvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50 + %vcvt = tail call <2 x double> @llvm.arm.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %a, i32 50) + ret <2 x double> %vcvt +} + +define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) { +; CHECK: test_vcvt_n_s32_f32 +; CHECK: fcvtzs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31 + %vcvt = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %a, i32 31) + ret <2 x i32> %vcvt +} + +define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) { +; CHECK: test_vcvtq_n_s32_f32 +; CHECK: fcvtzs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31 + %vcvt = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %a, i32 31) + ret <4 x i32> %vcvt +} + +define <2 x i64> @test_vcvtq_n_s64_f64(<2 x double> %a) { +; CHECK: test_vcvtq_n_s64_f64 +; CHECK: fcvtzs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50 + %vcvt = tail call <2 x i64> @llvm.arm.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> %a, i32 50) + ret <2 x i64> %vcvt +} + +define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) { +; CHECK: test_vcvt_n_u32_f32 +; CHECK: fcvtzu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31 + %vcvt = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %a, i32 31) + ret <2 x i32> %vcvt +} + +define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) { +; CHECK: test_vcvt_n_u32_f32 +; CHECK: fcvtzu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31 + %vcvt = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %a, i32 31) + ret <4 x i32> %vcvt +} + +define <2 x i64> @test_vcvtq_n_u64_f64(<2 x double> %a) { +; CHECK: test_vcvtq_n_u64_f64 +; CHECK: fcvtzu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50 + %vcvt = tail call <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> %a, i32 50) + ret <2 x i64> %vcvt +} + +declare <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8>, i32) + +declare <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16>, i32) + +declare <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32>, i32) + +declare <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8>, i32) + +declare <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16>, i32) + +declare <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32>, i32) + +declare <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64>, i32) + +declare <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8>, i32) + +declare <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16>, i32) + +declare <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32>, i32) + +declare <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8>, i32) + +declare <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16>, i32) + +declare <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32>, i32) + +declare <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64>, i32) + +declare <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8>, <8 x i8>, i32) + +declare <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16>, <4 x i16>, i32) + +declare <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32>, <2 x i32>, i32) + +declare <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8>, <16 x i8>, i32) + +declare <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16>, <8 x i16>, i32) + +declare <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32>, <4 x i32>, i32) + +declare <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64>, <2 x i64>, i32) + +declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) + +declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) + +declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) + +declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) + +declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) + +declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) + +declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) + +declare <8 x i8> @llvm.aarch64.neon.vsqshlu.v8i8(<8 x i8>, i32) + +declare <4 x i16> @llvm.aarch64.neon.vsqshlu.v4i16(<4 x i16>, i32) + +declare <2 x i32> @llvm.aarch64.neon.vsqshlu.v2i32(<2 x i32>, i32) + +declare <16 x i8> @llvm.aarch64.neon.vsqshlu.v16i8(<16 x i8>, i32) + +declare <8 x i16> @llvm.aarch64.neon.vsqshlu.v8i16(<8 x i16>, i32) + +declare <4 x i32> @llvm.aarch64.neon.vsqshlu.v4i32(<4 x i32>, i32) + +declare <2 x i64> @llvm.aarch64.neon.vsqshlu.v2i64(<2 x i64>, i32) + +declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>) + +declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>) + +declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>) + +declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>) + +declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>) + +declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>) + +declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>) + +declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>) + +declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>) + +declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) + +declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) + +declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) + +declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>) + +declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>) + +declare <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16>, i32) + +declare <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32>, i32) + +declare <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64>, i32) + +declare <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16>, i32) + +declare <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32>, i32) + +declare <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64>, i32) + +declare <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16>, i32) + +declare <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32>, i32) + +declare <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64>, i32) + +declare <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16>, i32) + +declare <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32>, i32) + +declare <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64>, i32) + +declare <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16>, i32) + +declare <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32>, i32) + +declare <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64>, i32) + +declare <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16>, i32) + +declare <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32>, i32) + +declare <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64>, i32) + +declare <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16>, i32) + +declare <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32>, i32) + +declare <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64>, i32) + +declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) + +declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) + +declare <2 x double> @llvm.arm.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32) + +declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) + +declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) + +declare <2 x double> @llvm.arm.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32) + +declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) + +declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) + +declare <2 x i64> @llvm.arm.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32) + +declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) + +declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) + +declare <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32) + diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s index bc54b50..52305f1 100644 --- a/test/MC/AArch64/neon-diagnostics.s +++ b/test/MC/AArch64/neon-diagnostics.s @@ -845,12 +845,12 @@ // Vector Saturating Shift Left (Signed and Unsigned Integer) //---------------------------------------------------------------------- // Mismatched vector types - sqshl v0.2s, v15.2s, v16.2d + sqshl v0.2s, v15.4s, v16.2d uqshl v1.8b, v25.4h, v6.8h // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: sqshl v0.2s, v15.2s, v16.2d -// CHECK-ERROR: ^ +// CHECK-ERROR: sqshl v0.2s, v15.4s, v16.2d +// CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: uqshl v1.8b, v25.4h, v6.8h // CHECK-ERROR: ^ @@ -1288,3 +1288,723 @@ // CHECK-ERROR: ushll2 v0.2d, v1.4s, #33 // CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector shift right by immediate +//------------------------------------------------------------------------------ + sshr v0.8b, v1.8h, #3 + sshr v0.4h, v1.4s, #3 + sshr v0.2s, v1.2d, #3 + sshr v0.16b, v1.16b, #9 + sshr v0.8h, v1.8h, #17 + sshr v0.4s, v1.4s, #33 + sshr v0.2d, v1.2d, #65 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sshr v0.8b, v1.8h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sshr v0.4h, v1.4s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sshr v0.2s, v1.2d, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 8] +// CHECK-ERROR: sshr v0.16b, v1.16b, #9 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 16] +// CHECK-ERROR: sshr v0.8h, v1.8h, #17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: sshr v0.4s, v1.4s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 64] +// CHECK-ERROR: sshr v0.2d, v1.2d, #65 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector shift right by immediate +//------------------------------------------------------------------------------ + ushr v0.8b, v1.8h, #3 + ushr v0.4h, v1.4s, #3 + ushr v0.2s, v1.2d, #3 + ushr v0.16b, v1.16b, #9 + ushr v0.8h, v1.8h, #17 + ushr v0.4s, v1.4s, #33 + ushr v0.2d, v1.2d, #65 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ushr v0.8b, v1.8h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ushr v0.4h, v1.4s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ushr v0.2s, v1.2d, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 8] +// CHECK-ERROR: ushr v0.16b, v1.16b, #9 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 16] +// CHECK-ERROR: ushr v0.8h, v1.8h, #17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: ushr v0.4s, v1.4s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 64] +// CHECK-ERROR: ushr v0.2d, v1.2d, #65 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector shift right and accumulate by immediate +//------------------------------------------------------------------------------ + ssra v0.8b, v1.8h, #3 + ssra v0.4h, v1.4s, #3 + ssra v0.2s, v1.2d, #3 + ssra v0.16b, v1.16b, #9 + ssra v0.8h, v1.8h, #17 + ssra v0.4s, v1.4s, #33 + ssra v0.2d, v1.2d, #65 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ssra v0.8b, v1.8h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ssra v0.4h, v1.4s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ssra v0.2s, v1.2d, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 8] +// CHECK-ERROR: ssra v0.16b, v1.16b, #9 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 16] +// CHECK-ERROR: ssra v0.8h, v1.8h, #17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: ssra v0.4s, v1.4s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 64] +// CHECK-ERROR: ssra v0.2d, v1.2d, #65 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector shift right and accumulate by immediate +//------------------------------------------------------------------------------ + usra v0.8b, v1.8h, #3 + usra v0.4h, v1.4s, #3 + usra v0.2s, v1.2d, #3 + usra v0.16b, v1.16b, #9 + usra v0.8h, v1.8h, #17 + usra v0.4s, v1.4s, #33 + usra v0.2d, v1.2d, #65 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: usra v0.8b, v1.8h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: usra v0.4h, v1.4s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: usra v0.2s, v1.2d, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 8] +// CHECK-ERROR: usra v0.16b, v1.16b, #9 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 16] +// CHECK-ERROR: usra v0.8h, v1.8h, #17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: usra v0.4s, v1.4s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 64] +// CHECK-ERROR: usra v0.2d, v1.2d, #65 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector rounding shift right by immediate +//------------------------------------------------------------------------------ + srshr v0.8b, v1.8h, #3 + srshr v0.4h, v1.4s, #3 + srshr v0.2s, v1.2d, #3 + srshr v0.16b, v1.16b, #9 + srshr v0.8h, v1.8h, #17 + srshr v0.4s, v1.4s, #33 + srshr v0.2d, v1.2d, #65 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: srshr v0.8b, v1.8h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: srshr v0.4h, v1.4s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: srshr v0.2s, v1.2d, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 8] +// CHECK-ERROR: srshr v0.16b, v1.16b, #9 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 16] +// CHECK-ERROR: srshr v0.8h, v1.8h, #17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: srshr v0.4s, v1.4s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 64] +// CHECK-ERROR: srshr v0.2d, v1.2d, #65 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vecotr rounding shift right by immediate +//------------------------------------------------------------------------------ + urshr v0.8b, v1.8h, #3 + urshr v0.4h, v1.4s, #3 + urshr v0.2s, v1.2d, #3 + urshr v0.16b, v1.16b, #9 + urshr v0.8h, v1.8h, #17 + urshr v0.4s, v1.4s, #33 + urshr v0.2d, v1.2d, #65 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: urshr v0.8b, v1.8h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: urshr v0.4h, v1.4s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: urshr v0.2s, v1.2d, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 8] +// CHECK-ERROR: urshr v0.16b, v1.16b, #9 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 16] +// CHECK-ERROR: urshr v0.8h, v1.8h, #17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: urshr v0.4s, v1.4s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 64] +// CHECK-ERROR: urshr v0.2d, v1.2d, #65 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector rounding shift right and accumulate by immediate +//------------------------------------------------------------------------------ + srsra v0.8b, v1.8h, #3 + srsra v0.4h, v1.4s, #3 + srsra v0.2s, v1.2d, #3 + srsra v0.16b, v1.16b, #9 + srsra v0.8h, v1.8h, #17 + srsra v0.4s, v1.4s, #33 + srsra v0.2d, v1.2d, #65 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: srsra v0.8b, v1.8h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: srsra v0.4h, v1.4s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: srsra v0.2s, v1.2d, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 8] +// CHECK-ERROR: srsra v0.16b, v1.16b, #9 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 16] +// CHECK-ERROR: srsra v0.8h, v1.8h, #17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: srsra v0.4s, v1.4s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 64] +// CHECK-ERROR: srsra v0.2d, v1.2d, #65 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector rounding shift right and accumulate by immediate +//------------------------------------------------------------------------------ + ursra v0.8b, v1.8h, #3 + ursra v0.4h, v1.4s, #3 + ursra v0.2s, v1.2d, #3 + ursra v0.16b, v1.16b, #9 + ursra v0.8h, v1.8h, #17 + ursra v0.4s, v1.4s, #33 + ursra v0.2d, v1.2d, #65 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ursra v0.8b, v1.8h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ursra v0.4h, v1.4s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ursra v0.2s, v1.2d, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 8] +// CHECK-ERROR: ursra v0.16b, v1.16b, #9 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 16] +// CHECK-ERROR: ursra v0.8h, v1.8h, #17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: ursra v0.4s, v1.4s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 64] +// CHECK-ERROR: ursra v0.2d, v1.2d, #65 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector shift right and insert by immediate +//------------------------------------------------------------------------------ + sri v0.8b, v1.8h, #3 + sri v0.4h, v1.4s, #3 + sri v0.2s, v1.2d, #3 + sri v0.16b, v1.16b, #9 + sri v0.8h, v1.8h, #17 + sri v0.4s, v1.4s, #33 + sri v0.2d, v1.2d, #65 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sri v0.8b, v1.8h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sri v0.4h, v1.4s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sri v0.2s, v1.2d, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 8] +// CHECK-ERROR: sri v0.16b, v1.16b, #9 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 16] +// CHECK-ERROR: sri v0.8h, v1.8h, #17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: sri v0.4s, v1.4s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 64] +// CHECK-ERROR: sri v0.2d, v1.2d, #65 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector shift left and insert by immediate +//------------------------------------------------------------------------------ + sli v0.8b, v1.8h, #3 + sli v0.4h, v1.4s, #3 + sli v0.2s, v1.2d, #3 + sli v0.16b, v1.16b, #8 + sli v0.8h, v1.8h, #16 + sli v0.4s, v1.4s, #32 + sli v0.2d, v1.2d, #64 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sli v0.8b, v1.8h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sli v0.4h, v1.4s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sli v0.2s, v1.2d, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 7] +// CHECK-ERROR: sli v0.16b, v1.16b, #8 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 15] +// CHECK-ERROR: sli v0.8h, v1.8h, #16 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 31] +// CHECK-ERROR: sli v0.4s, v1.4s, #32 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 63] +// CHECK-ERROR: sli v0.2d, v1.2d, #64 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector saturating shift left unsigned by immediate +//------------------------------------------------------------------------------ + sqshlu v0.8b, v1.8h, #3 + sqshlu v0.4h, v1.4s, #3 + sqshlu v0.2s, v1.2d, #3 + sqshlu v0.16b, v1.16b, #8 + sqshlu v0.8h, v1.8h, #16 + sqshlu v0.4s, v1.4s, #32 + sqshlu v0.2d, v1.2d, #64 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqshlu v0.8b, v1.8h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqshlu v0.4h, v1.4s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqshlu v0.2s, v1.2d, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 7] +// CHECK-ERROR: sqshlu v0.16b, v1.16b, #8 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 15] +// CHECK-ERROR: sqshlu v0.8h, v1.8h, #16 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 31] +// CHECK-ERROR: sqshlu v0.4s, v1.4s, #32 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 63] +// CHECK-ERROR: sqshlu v0.2d, v1.2d, #64 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector saturating shift left by immediate +//------------------------------------------------------------------------------ + sqshl v0.8b, v1.8h, #3 + sqshl v0.4h, v1.4s, #3 + sqshl v0.2s, v1.2d, #3 + sqshl v0.16b, v1.16b, #8 + sqshl v0.8h, v1.8h, #16 + sqshl v0.4s, v1.4s, #32 + sqshl v0.2d, v1.2d, #64 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqshl v0.8b, v1.8h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqshl v0.4h, v1.4s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqshl v0.2s, v1.2d, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 7] +// CHECK-ERROR: sqshl v0.16b, v1.16b, #8 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 15] +// CHECK-ERROR: sqshl v0.8h, v1.8h, #16 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 31] +// CHECK-ERROR: sqshl v0.4s, v1.4s, #32 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 63] +// CHECK-ERROR: sqshl v0.2d, v1.2d, #64 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector saturating shift left by immediate +//------------------------------------------------------------------------------ + uqshl v0.8b, v1.8h, #3 + uqshl v0.4h, v1.4s, #3 + uqshl v0.2s, v1.2d, #3 + uqshl v0.16b, v1.16b, #8 + uqshl v0.8h, v1.8h, #16 + uqshl v0.4s, v1.4s, #32 + uqshl v0.2d, v1.2d, #64 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uqshl v0.8b, v1.8h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uqshl v0.4h, v1.4s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uqshl v0.2s, v1.2d, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 7] +// CHECK-ERROR: uqshl v0.16b, v1.16b, #8 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 15] +// CHECK-ERROR: uqshl v0.8h, v1.8h, #16 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 31] +// CHECK-ERROR: uqshl v0.4s, v1.4s, #32 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [0, 63] +// CHECK-ERROR: uqshl v0.2d, v1.2d, #64 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector shift right narrow by immediate +//------------------------------------------------------------------------------ + shrn v0.8b, v1.8b, #3 + shrn v0.4h, v1.4h, #3 + shrn v0.2s, v1.2s, #3 + shrn2 v0.16b, v1.8h, #17 + shrn2 v0.8h, v1.4s, #33 + shrn2 v0.4s, v1.2d, #65 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: shrn v0.8b, v1.8b, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: shrn v0.4h, v1.4h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: shrn v0.2s, v1.2s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 8] +// CHECK-ERROR: shrn2 v0.16b, v1.8h, #17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 16] +// CHECK-ERROR: shrn2 v0.8h, v1.4s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: shrn2 v0.4s, v1.2d, #65 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector saturating shift right unsigned narrow by immediate +//------------------------------------------------------------------------------ + sqshrun v0.8b, v1.8b, #3 + sqshrun v0.4h, v1.4h, #3 + sqshrun v0.2s, v1.2s, #3 + sqshrun2 v0.16b, v1.8h, #17 + sqshrun2 v0.8h, v1.4s, #33 + sqshrun2 v0.4s, v1.2d, #65 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqshrun v0.8b, v1.8b, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqshrun v0.4h, v1.4h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqshrun v0.2s, v1.2s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 8] +// CHECK-ERROR: sqshrun2 v0.16b, v1.8h, #17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 16] +// CHECK-ERROR: sqshrun2 v0.8h, v1.4s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: sqshrun2 v0.4s, v1.2d, #65 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector rounding shift right narrow by immediate +//------------------------------------------------------------------------------ + rshrn v0.8b, v1.8b, #3 + rshrn v0.4h, v1.4h, #3 + rshrn v0.2s, v1.2s, #3 + rshrn2 v0.16b, v1.8h, #17 + rshrn2 v0.8h, v1.4s, #33 + rshrn2 v0.4s, v1.2d, #65 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: rshrn v0.8b, v1.8b, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: rshrn v0.4h, v1.4h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: rshrn v0.2s, v1.2s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 8] +// CHECK-ERROR: rshrn2 v0.16b, v1.8h, #17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 16] +// CHECK-ERROR: rshrn2 v0.8h, v1.4s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: rshrn2 v0.4s, v1.2d, #65 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector saturating shift right rounded unsigned narrow by immediate +//------------------------------------------------------------------------------ + sqrshrun v0.8b, v1.8b, #3 + sqrshrun v0.4h, v1.4h, #3 + sqrshrun v0.2s, v1.2s, #3 + sqrshrun2 v0.16b, v1.8h, #17 + sqrshrun2 v0.8h, v1.4s, #33 + sqrshrun2 v0.4s, v1.2d, #65 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrshrun v0.8b, v1.8b, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrshrun v0.4h, v1.4h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrshrun v0.2s, v1.2s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 8] +// CHECK-ERROR: sqrshrun2 v0.16b, v1.8h, #17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 16] +// CHECK-ERROR: sqrshrun2 v0.8h, v1.4s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: sqrshrun2 v0.4s, v1.2d, #65 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector saturating shift right narrow by immediate +//------------------------------------------------------------------------------ + sqshrn v0.8b, v1.8b, #3 + sqshrn v0.4h, v1.4h, #3 + sqshrn v0.2s, v1.2s, #3 + sqshrn2 v0.16b, v1.8h, #17 + sqshrn2 v0.8h, v1.4s, #33 + sqshrn2 v0.4s, v1.2d, #65 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqshrn v0.8b, v1.8b, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqshrn v0.4h, v1.4h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqshrn v0.2s, v1.2s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 8] +// CHECK-ERROR: sqshrn2 v0.16b, v1.8h, #17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 16] +// CHECK-ERROR: sqshrn2 v0.8h, v1.4s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: sqshrn2 v0.4s, v1.2d, #65 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector saturating shift right narrow by immediate +//------------------------------------------------------------------------------ + uqshrn v0.8b, v1.8b, #3 + uqshrn v0.4h, v1.4h, #3 + uqshrn v0.2s, v1.2s, #3 + uqshrn2 v0.16b, v1.8h, #17 + uqshrn2 v0.8h, v1.4s, #33 + uqshrn2 v0.4s, v1.2d, #65 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uqshrn v0.8b, v1.8b, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uqshrn v0.4h, v1.4h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uqshrn v0.2s, v1.2s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 8] +// CHECK-ERROR: uqshrn2 v0.16b, v1.8h, #17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 16] +// CHECK-ERROR: uqshrn2 v0.8h, v1.4s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: uqshrn2 v0.4s, v1.2d, #65 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector saturating shift right rounded narrow by immediate +//------------------------------------------------------------------------------ + sqrshrn v0.8b, v1.8b, #3 + sqrshrn v0.4h, v1.4h, #3 + sqrshrn v0.2s, v1.2s, #3 + sqrshrn2 v0.16b, v1.8h, #17 + sqrshrn2 v0.8h, v1.4s, #33 + sqrshrn2 v0.4s, v1.2d, #65 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrshrn v0.8b, v1.8b, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrshrn v0.4h, v1.4h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrshrn v0.2s, v1.2s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 8] +// CHECK-ERROR: sqrshrn2 v0.16b, v1.8h, #17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 16] +// CHECK-ERROR: sqrshrn2 v0.8h, v1.4s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: sqrshrn2 v0.4s, v1.2d, #65 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Vector saturating shift right rounded narrow by immediate +//------------------------------------------------------------------------------ + uqrshrn v0.8b, v1.8b, #3 + uqrshrn v0.4h, v1.4h, #3 + uqrshrn v0.2s, v1.2s, #3 + uqrshrn2 v0.16b, v1.8h, #17 + uqrshrn2 v0.8h, v1.4s, #33 + uqrshrn2 v0.4s, v1.2d, #65 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uqrshrn v0.8b, v1.8b, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uqrshrn v0.4h, v1.4h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uqrshrn v0.2s, v1.2s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 8] +// CHECK-ERROR: uqrshrn2 v0.16b, v1.8h, #17 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 16] +// CHECK-ERROR: uqrshrn2 v0.8h, v1.4s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: uqrshrn2 v0.4s, v1.2d, #65 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Fixed-point convert to floating-point +//------------------------------------------------------------------------------ + scvtf v0.2s, v1.2d, #3 + scvtf v0.4s, v1.4h, #3 + scvtf v0.2d, v1.2s, #3 + ucvtf v0.2s, v1.2s, #33 + ucvtf v0.4s, v1.4s, #33 + ucvtf v0.2d, v1.2d, #65 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: scvtf v0.2s, v1.2d, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: scvtf v0.4s, v1.4h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: scvtf v0.2d, v1.2s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: ucvtf v0.2s, v1.2s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: ucvtf v0.4s, v1.4s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 64] +// CHECK-ERROR: ucvtf v0.2d, v1.2d, #65 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Floating-point convert to fixed-point +//------------------------------------------------------------------------------ + fcvtzs v0.2s, v1.2d, #3 + fcvtzs v0.4s, v1.4h, #3 + fcvtzs v0.2d, v1.2s, #3 + fcvtzu v0.2s, v1.2s, #33 + fcvtzu v0.4s, v1.4s, #33 + fcvtzu v0.2d, v1.2d, #65 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtzs v0.2s, v1.2d, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtzs v0.4s, v1.4h, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtzs v0.2d, v1.2s, #3 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: fcvtzu v0.2s, v1.2s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 32] +// CHECK-ERROR: fcvtzu v0.4s, v1.4s, #33 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: expected integer in range [1, 64] +// CHECK-ERROR: fcvtzu v0.2d, v1.2d, #65 +// CHECK-ERROR: ^ + diff --git a/test/MC/AArch64/neon-simd-shift.s b/test/MC/AArch64/neon-simd-shift.s new file mode 100644 index 0000000..9e6e1aa --- /dev/null +++ b/test/MC/AArch64/neon-simd-shift.s @@ -0,0 +1,434 @@ +// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s + +// Check that the assembler can handle the documented syntax for AArch64 + +//------------------------------------------------------------------------------ +// Vector shift right by immediate +//------------------------------------------------------------------------------ + sshr v0.8b, v1.8b, #3 + sshr v0.4h, v1.4h, #3 + sshr v0.2s, v1.2s, #3 + sshr v0.16b, v1.16b, #3 + sshr v0.8h, v1.8h, #3 + sshr v0.4s, v1.4s, #3 + sshr v0.2d, v1.2d, #3 +// CHECK: sshr v0.8b, v1.8b, #3 // encoding: [0x20,0x04,0x0d,0x0f] +// CHECK: sshr v0.4h, v1.4h, #3 // encoding: [0x20,0x04,0x1d,0x0f] +// CHECK: sshr v0.2s, v1.2s, #3 // encoding: [0x20,0x04,0x3d,0x0f] +// CHECK: sshr v0.16b, v1.16b, #3 // encoding: [0x20,0x04,0x0d,0x4f] +// CHECK: sshr v0.8h, v1.8h, #3 // encoding: [0x20,0x04,0x1d,0x4f] +// CHECK: sshr v0.4s, v1.4s, #3 // encoding: [0x20,0x04,0x3d,0x4f] +// CHECK: sshr v0.2d, v1.2d, #3 // encoding: [0x20,0x04,0x7d,0x4f] + +//------------------------------------------------------------------------------ +// Vector shift right by immediate +//------------------------------------------------------------------------------ + ushr v0.8b, v1.8b, #3 + ushr v0.4h, v1.4h, #3 + ushr v0.2s, v1.2s, #3 + ushr v0.16b, v1.16b, #3 + ushr v0.8h, v1.8h, #3 + ushr v0.4s, v1.4s, #3 + ushr v0.2d, v1.2d, #3 + +// CHECK: ushr v0.8b, v1.8b, #3 // encoding: [0x20,0x04,0x0d,0x2f] +// CHECK: ushr v0.4h, v1.4h, #3 // encoding: [0x20,0x04,0x1d,0x2f] +// CHECK: ushr v0.2s, v1.2s, #3 // encoding: [0x20,0x04,0x3d,0x2f] +// CHECK: ushr v0.16b, v1.16b, #3 // encoding: [0x20,0x04,0x0d,0x6f] +// CHECK: ushr v0.8h, v1.8h, #3 // encoding: [0x20,0x04,0x1d,0x6f] +// CHECK: ushr v0.4s, v1.4s, #3 // encoding: [0x20,0x04,0x3d,0x6f] +// CHECK: ushr v0.2d, v1.2d, #3 // encoding: [0x20,0x04,0x7d,0x6f] + +//------------------------------------------------------------------------------ +// Vector shift right and accumulate by immediate +//------------------------------------------------------------------------------ + ssra v0.8b, v1.8b, #3 + ssra v0.4h, v1.4h, #3 + ssra v0.2s, v1.2s, #3 + ssra v0.16b, v1.16b, #3 + ssra v0.8h, v1.8h, #3 + ssra v0.4s, v1.4s, #3 + ssra v0.2d, v1.2d, #3 + +// CHECK: ssra v0.8b, v1.8b, #3 // encoding: [0x20,0x14,0x0d,0x0f] +// CHECK: ssra v0.4h, v1.4h, #3 // encoding: [0x20,0x14,0x1d,0x0f] +// CHECK: ssra v0.2s, v1.2s, #3 // encoding: [0x20,0x14,0x3d,0x0f] +// CHECK: ssra v0.16b, v1.16b, #3 // encoding: [0x20,0x14,0x0d,0x4f] +// CHECK: ssra v0.8h, v1.8h, #3 // encoding: [0x20,0x14,0x1d,0x4f] +// CHECK: ssra v0.4s, v1.4s, #3 // encoding: [0x20,0x14,0x3d,0x4f] +// CHECK: ssra v0.2d, v1.2d, #3 // encoding: [0x20,0x14,0x7d,0x4f] + +//------------------------------------------------------------------------------ +// Vector shift right and accumulate by immediate +//------------------------------------------------------------------------------ + usra v0.8b, v1.8b, #3 + usra v0.4h, v1.4h, #3 + usra v0.2s, v1.2s, #3 + usra v0.16b, v1.16b, #3 + usra v0.8h, v1.8h, #3 + usra v0.4s, v1.4s, #3 + usra v0.2d, v1.2d, #3 + +// CHECK: usra v0.8b, v1.8b, #3 // encoding: [0x20,0x14,0x0d,0x2f] +// CHECK: usra v0.4h, v1.4h, #3 // encoding: [0x20,0x14,0x1d,0x2f] +// CHECK: usra v0.2s, v1.2s, #3 // encoding: [0x20,0x14,0x3d,0x2f] +// CHECK: usra v0.16b, v1.16b, #3 // encoding: [0x20,0x14,0x0d,0x6f] +// CHECK: usra v0.8h, v1.8h, #3 // encoding: [0x20,0x14,0x1d,0x6f] +// CHECK: usra v0.4s, v1.4s, #3 // encoding: [0x20,0x14,0x3d,0x6f] +// CHECK: usra v0.2d, v1.2d, #3 // encoding: [0x20,0x14,0x7d,0x6f] + +//------------------------------------------------------------------------------ +// Vector rounding shift right by immediate +//------------------------------------------------------------------------------ + srshr v0.8b, v1.8b, #3 + srshr v0.4h, v1.4h, #3 + srshr v0.2s, v1.2s, #3 + srshr v0.16b, v1.16b, #3 + srshr v0.8h, v1.8h, #3 + srshr v0.4s, v1.4s, #3 + srshr v0.2d, v1.2d, #3 + +// CHECK: srshr v0.8b, v1.8b, #3 // encoding: [0x20,0x24,0x0d,0x0f] +// CHECK: srshr v0.4h, v1.4h, #3 // encoding: [0x20,0x24,0x1d,0x0f] +// CHECK: srshr v0.2s, v1.2s, #3 // encoding: [0x20,0x24,0x3d,0x0f] +// CHECK: srshr v0.16b, v1.16b, #3 // encoding: [0x20,0x24,0x0d,0x4f] +// CHECK: srshr v0.8h, v1.8h, #3 // encoding: [0x20,0x24,0x1d,0x4f] +// CHECK: srshr v0.4s, v1.4s, #3 // encoding: [0x20,0x24,0x3d,0x4f] +// CHECK: srshr v0.2d, v1.2d, #3 // encoding: [0x20,0x24,0x7d,0x4f] + + +//------------------------------------------------------------------------------ +// Vecotr rounding shift right by immediate +//------------------------------------------------------------------------------ + urshr v0.8b, v1.8b, #3 + urshr v0.4h, v1.4h, #3 + urshr v0.2s, v1.2s, #3 + urshr v0.16b, v1.16b, #3 + urshr v0.8h, v1.8h, #3 + urshr v0.4s, v1.4s, #3 + urshr v0.2d, v1.2d, #3 + +// CHECK: urshr v0.8b, v1.8b, #3 // encoding: [0x20,0x24,0x0d,0x2f] +// CHECK: urshr v0.4h, v1.4h, #3 // encoding: [0x20,0x24,0x1d,0x2f] +// CHECK: urshr v0.2s, v1.2s, #3 // encoding: [0x20,0x24,0x3d,0x2f] +// CHECK: urshr v0.16b, v1.16b, #3 // encoding: [0x20,0x24,0x0d,0x6f] +// CHECK: urshr v0.8h, v1.8h, #3 // encoding: [0x20,0x24,0x1d,0x6f] +// CHECK: urshr v0.4s, v1.4s, #3 // encoding: [0x20,0x24,0x3d,0x6f] +// CHECK: urshr v0.2d, v1.2d, #3 // encoding: [0x20,0x24,0x7d,0x6f] + + +//------------------------------------------------------------------------------ +// Vector rounding shift right and accumulate by immediate +//------------------------------------------------------------------------------ + srsra v0.8b, v1.8b, #3 + srsra v0.4h, v1.4h, #3 + srsra v0.2s, v1.2s, #3 + srsra v0.16b, v1.16b, #3 + srsra v0.8h, v1.8h, #3 + srsra v0.4s, v1.4s, #3 + srsra v0.2d, v1.2d, #3 + +// CHECK: srsra v0.8b, v1.8b, #3 // encoding: [0x20,0x34,0x0d,0x0f] +// CHECK: srsra v0.4h, v1.4h, #3 // encoding: [0x20,0x34,0x1d,0x0f] +// CHECK: srsra v0.2s, v1.2s, #3 // encoding: [0x20,0x34,0x3d,0x0f] +// CHECK: srsra v0.16b, v1.16b, #3 // encoding: [0x20,0x34,0x0d,0x4f] +// CHECK: srsra v0.8h, v1.8h, #3 // encoding: [0x20,0x34,0x1d,0x4f] +// CHECK: srsra v0.4s, v1.4s, #3 // encoding: [0x20,0x34,0x3d,0x4f] +// CHECK: srsra v0.2d, v1.2d, #3 // encoding: [0x20,0x34,0x7d,0x4f] + + +//------------------------------------------------------------------------------ +// Vector rounding shift right and accumulate by immediate +//------------------------------------------------------------------------------ + ursra v0.8b, v1.8b, #3 + ursra v0.4h, v1.4h, #3 + ursra v0.2s, v1.2s, #3 + ursra v0.16b, v1.16b, #3 + ursra v0.8h, v1.8h, #3 + ursra v0.4s, v1.4s, #3 + ursra v0.2d, v1.2d, #3 + +// CHECK: ursra v0.8b, v1.8b, #3 // encoding: [0x20,0x34,0x0d,0x2f] +// CHECK: ursra v0.4h, v1.4h, #3 // encoding: [0x20,0x34,0x1d,0x2f] +// CHECK: ursra v0.2s, v1.2s, #3 // encoding: [0x20,0x34,0x3d,0x2f] +// CHECK: ursra v0.16b, v1.16b, #3 // encoding: [0x20,0x34,0x0d,0x6f] +// CHECK: ursra v0.8h, v1.8h, #3 // encoding: [0x20,0x34,0x1d,0x6f] +// CHECK: ursra v0.4s, v1.4s, #3 // encoding: [0x20,0x34,0x3d,0x6f] +// CHECK: ursra v0.2d, v1.2d, #3 // encoding: [0x20,0x34,0x7d,0x6f] + + +//------------------------------------------------------------------------------ +// Vector shift right and insert by immediate +//------------------------------------------------------------------------------ + sri v0.8b, v1.8b, #3 + sri v0.4h, v1.4h, #3 + sri v0.2s, v1.2s, #3 + sri v0.16b, v1.16b, #3 + sri v0.8h, v1.8h, #3 + sri v0.4s, v1.4s, #3 + sri v0.2d, v1.2d, #3 + +// CHECK: sri v0.8b, v1.8b, #3 // encoding: [0x20,0x44,0x0d,0x2f] +// CHECK: sri v0.4h, v1.4h, #3 // encoding: [0x20,0x44,0x1d,0x2f] +// CHECK: sri v0.2s, v1.2s, #3 // encoding: [0x20,0x44,0x3d,0x2f] +// CHECK: sri v0.16b, v1.16b, #3 // encoding: [0x20,0x44,0x0d,0x6f] +// CHECK: sri v0.8h, v1.8h, #3 // encoding: [0x20,0x44,0x1d,0x6f] +// CHECK: sri v0.4s, v1.4s, #3 // encoding: [0x20,0x44,0x3d,0x6f] + + +//------------------------------------------------------------------------------ +// Vector shift left and insert by immediate +//------------------------------------------------------------------------------ + sli v0.8b, v1.8b, #3 + sli v0.4h, v1.4h, #3 + sli v0.2s, v1.2s, #3 + sli v0.16b, v1.16b, #3 + sli v0.8h, v1.8h, #3 + sli v0.4s, v1.4s, #3 + sli v0.2d, v1.2d, #3 + +// CHECK: sli v0.8b, v1.8b, #3 // encoding: [0x20,0x54,0x0b,0x2f] +// CHECK: sli v0.4h, v1.4h, #3 // encoding: [0x20,0x54,0x13,0x2f] +// CHECK: sli v0.2s, v1.2s, #3 // encoding: [0x20,0x54,0x23,0x2f] +// CHECK: sli v0.16b, v1.16b, #3 // encoding: [0x20,0x54,0x0b,0x6f] +// CHECK: sli v0.8h, v1.8h, #3 // encoding: [0x20,0x54,0x13,0x6f] +// CHECK: sli v0.4s, v1.4s, #3 // encoding: [0x20,0x54,0x23,0x6f] +// CHECK: sli v0.2d, v1.2d, #3 // encoding: [0x20,0x54,0x43,0x6f] + +//------------------------------------------------------------------------------ +// Vector saturating shift left unsigned by immediate +//------------------------------------------------------------------------------ + sqshlu v0.8b, v1.8b, #3 + sqshlu v0.4h, v1.4h, #3 + sqshlu v0.2s, v1.2s, #3 + sqshlu v0.16b, v1.16b, #3 + sqshlu v0.8h, v1.8h, #3 + sqshlu v0.4s, v1.4s, #3 + sqshlu v0.2d, v1.2d, #3 + +// CHECK: sqshlu v0.8b, v1.8b, #3 // encoding: [0x20,0x64,0x0b,0x2f] +// CHECK: sqshlu v0.4h, v1.4h, #3 // encoding: [0x20,0x64,0x13,0x2f] +// CHECK: sqshlu v0.2s, v1.2s, #3 // encoding: [0x20,0x64,0x23,0x2f] +// CHECK: sqshlu v0.16b, v1.16b, #3 // encoding: [0x20,0x64,0x0b,0x6f] +// CHECK: sqshlu v0.8h, v1.8h, #3 // encoding: [0x20,0x64,0x13,0x6f] +// CHECK: sqshlu v0.4s, v1.4s, #3 // encoding: [0x20,0x64,0x23,0x6f] +// CHECK: sqshlu v0.2d, v1.2d, #3 // encoding: [0x20,0x64,0x43,0x6f] + + +//------------------------------------------------------------------------------ +// Vector saturating shift left by immediate +//------------------------------------------------------------------------------ + sqshl v0.8b, v1.8b, #3 + sqshl v0.4h, v1.4h, #3 + sqshl v0.2s, v1.2s, #3 + sqshl v0.16b, v1.16b, #3 + sqshl v0.8h, v1.8h, #3 + sqshl v0.4s, v1.4s, #3 + sqshl v0.2d, v1.2d, #3 + +// CHECK: sqshl v0.8b, v1.8b, #3 // encoding: [0x20,0x74,0x0b,0x0f] +// CHECK: sqshl v0.4h, v1.4h, #3 // encoding: [0x20,0x74,0x13,0x0f] +// CHECK: sqshl v0.2s, v1.2s, #3 // encoding: [0x20,0x74,0x23,0x0f] +// CHECK: sqshl v0.16b, v1.16b, #3 // encoding: [0x20,0x74,0x0b,0x4f] +// CHECK: sqshl v0.8h, v1.8h, #3 // encoding: [0x20,0x74,0x13,0x4f] +// CHECK: sqshl v0.4s, v1.4s, #3 // encoding: [0x20,0x74,0x23,0x4f] +// CHECK: sqshl v0.2d, v1.2d, #3 // encoding: [0x20,0x74,0x43,0x4f] + + + +//------------------------------------------------------------------------------ +// Vector saturating shift left by immediate +//------------------------------------------------------------------------------ + uqshl v0.8b, v1.8b, #3 + uqshl v0.4h, v1.4h, #3 + uqshl v0.2s, v1.2s, #3 + uqshl v0.16b, v1.16b, #3 + uqshl v0.8h, v1.8h, #3 + uqshl v0.4s, v1.4s, #3 + uqshl v0.2d, v1.2d, #3 + +// CHECK: uqshl v0.8b, v1.8b, #3 // encoding: [0x20,0x74,0x0b,0x2f] +// CHECK: uqshl v0.4h, v1.4h, #3 // encoding: [0x20,0x74,0x13,0x2f] +// CHECK: uqshl v0.2s, v1.2s, #3 // encoding: [0x20,0x74,0x23,0x2f] +// CHECK: uqshl v0.16b, v1.16b, #3 // encoding: [0x20,0x74,0x0b,0x6f] +// CHECK: uqshl v0.8h, v1.8h, #3 // encoding: [0x20,0x74,0x13,0x6f] +// CHECK: uqshl v0.4s, v1.4s, #3 // encoding: [0x20,0x74,0x23,0x6f] +// CHECK: uqshl v0.2d, v1.2d, #3 // encoding: [0x20,0x74,0x43,0x6f] + + +//------------------------------------------------------------------------------ +// Vector shift right narrow by immediate +//------------------------------------------------------------------------------ + shrn v0.8b, v1.8h, #3 + shrn v0.4h, v1.4s, #3 + shrn v0.2s, v1.2d, #3 + shrn2 v0.16b, v1.8h, #3 + shrn2 v0.8h, v1.4s, #3 + shrn2 v0.4s, v1.2d, #3 + +// CHECK: shrn v0.8b, v1.8h, #3 // encoding: [0x20,0x84,0x0d,0x0f] +// CHECK: shrn v0.4h, v1.4s, #3 // encoding: [0x20,0x84,0x1d,0x0f] +// CHECK: shrn v0.2s, v1.2d, #3 // encoding: [0x20,0x84,0x3d,0x0f] +// CHECK: shrn2 v0.16b, v1.8h, #3 // encoding: [0x20,0x84,0x0d,0x4f] +// CHECK: shrn2 v0.8h, v1.4s, #3 // encoding: [0x20,0x84,0x1d,0x4f] +// CHECK: shrn2 v0.4s, v1.2d, #3 // encoding: [0x20,0x84,0x3d,0x4f] + +//------------------------------------------------------------------------------ +// Vector saturating shift right unsigned narrow by immediate +//------------------------------------------------------------------------------ + sqshrun v0.8b, v1.8h, #3 + sqshrun v0.4h, v1.4s, #3 + sqshrun v0.2s, v1.2d, #3 + sqshrun2 v0.16b, v1.8h, #3 + sqshrun2 v0.8h, v1.4s, #3 + sqshrun2 v0.4s, v1.2d, #3 + +// CHECK: sqshrun v0.8b, v1.8h, #3 // encoding: [0x20,0x84,0x0d,0x2f] +// CHECK: sqshrun v0.4h, v1.4s, #3 // encoding: [0x20,0x84,0x1d,0x2f] +// CHECK: sqshrun v0.2s, v1.2d, #3 // encoding: [0x20,0x84,0x3d,0x2f] +// CHECK: sqshrun2 v0.16b, v1.8h, #3 // encoding: [0x20,0x84,0x0d,0x6f] +// CHECK: sqshrun2 v0.8h, v1.4s, #3 // encoding: [0x20,0x84,0x1d,0x6f] +// CHECK: sqshrun2 v0.4s, v1.2d, #3 // encoding: [0x20,0x84,0x3d,0x6f] + +//------------------------------------------------------------------------------ +// Vector rounding shift right narrow by immediate +//------------------------------------------------------------------------------ + rshrn v0.8b, v1.8h, #3 + rshrn v0.4h, v1.4s, #3 + rshrn v0.2s, v1.2d, #3 + rshrn2 v0.16b, v1.8h, #3 + rshrn2 v0.8h, v1.4s, #3 + rshrn2 v0.4s, v1.2d, #3 + +// CHECK: rshrn v0.8b, v1.8h, #3 // encoding: [0x20,0x8c,0x0d,0x0f] +// CHECK: rshrn v0.4h, v1.4s, #3 // encoding: [0x20,0x8c,0x1d,0x0f] +// CHECK: rshrn v0.2s, v1.2d, #3 // encoding: [0x20,0x8c,0x3d,0x0f] +// CHECK: rshrn2 v0.16b, v1.8h, #3 // encoding: [0x20,0x8c,0x0d,0x4f] +// CHECK: rshrn2 v0.8h, v1.4s, #3 // encoding: [0x20,0x8c,0x1d,0x4f] +// CHECK: rshrn2 v0.4s, v1.2d, #3 // encoding: [0x20,0x8c,0x3d,0x4f] + + +//------------------------------------------------------------------------------ +// Vector saturating shift right rounded unsigned narrow by immediate +//------------------------------------------------------------------------------ + sqrshrun v0.8b, v1.8h, #3 + sqrshrun v0.4h, v1.4s, #3 + sqrshrun v0.2s, v1.2d, #3 + sqrshrun2 v0.16b, v1.8h, #3 + sqrshrun2 v0.8h, v1.4s, #3 + sqrshrun2 v0.4s, v1.2d, #3 + +// CHECK: sqrshrun v0.8b, v1.8h, #3 // encoding: [0x20,0x8c,0x0d,0x2f] +// CHECK: sqrshrun v0.4h, v1.4s, #3 // encoding: [0x20,0x8c,0x1d,0x2f] +// CHECK: sqrshrun v0.2s, v1.2d, #3 // encoding: [0x20,0x8c,0x3d,0x2f] +// CHECK: sqrshrun2 v0.16b, v1.8h, #3 // encoding: [0x20,0x8c,0x0d,0x6f] +// CHECK: sqrshrun2 v0.8h, v1.4s, #3 // encoding: [0x20,0x8c,0x1d,0x6f] +// CHECK: sqrshrun2 v0.4s, v1.2d, #3 // encoding: [0x20,0x8c,0x3d,0x6f] + + +//------------------------------------------------------------------------------ +// Vector saturating shift right narrow by immediate +//------------------------------------------------------------------------------ + sqshrn v0.8b, v1.8h, #3 + sqshrn v0.4h, v1.4s, #3 + sqshrn v0.2s, v1.2d, #3 + sqshrn2 v0.16b, v1.8h, #3 + sqshrn2 v0.8h, v1.4s, #3 + sqshrn2 v0.4s, v1.2d, #3 + +// CHECK: sqshrn v0.8b, v1.8h, #3 // encoding: [0x20,0x94,0x0d,0x0f] +// CHECK: sqshrn v0.4h, v1.4s, #3 // encoding: [0x20,0x94,0x1d,0x0f] +// CHECK: sqshrn v0.2s, v1.2d, #3 // encoding: [0x20,0x94,0x3d,0x0f] +// CHECK: sqshrn2 v0.16b, v1.8h, #3 // encoding: [0x20,0x94,0x0d,0x4f] +// CHECK: sqshrn2 v0.8h, v1.4s, #3 // encoding: [0x20,0x94,0x1d,0x4f] +// CHECK: sqshrn2 v0.4s, v1.2d, #3 // encoding: [0x20,0x94,0x3d,0x4f] + + +//------------------------------------------------------------------------------ +// Vector saturating shift right narrow by immediate +//------------------------------------------------------------------------------ + uqshrn v0.8b, v1.8h, #3 + uqshrn v0.4h, v1.4s, #3 + uqshrn v0.2s, v1.2d, #3 + uqshrn2 v0.16b, v1.8h, #3 + uqshrn2 v0.8h, v1.4s, #3 + uqshrn2 v0.4s, v1.2d, #3 + +// CHECK: uqshrn v0.8b, v1.8h, #3 // encoding: [0x20,0x94,0x0d,0x2f] +// CHECK: uqshrn v0.4h, v1.4s, #3 // encoding: [0x20,0x94,0x1d,0x2f] +// CHECK: uqshrn v0.2s, v1.2d, #3 // encoding: [0x20,0x94,0x3d,0x2f] +// CHECK: uqshrn2 v0.16b, v1.8h, #3 // encoding: [0x20,0x94,0x0d,0x6f] +// CHECK: uqshrn2 v0.8h, v1.4s, #3 // encoding: [0x20,0x94,0x1d,0x6f] +// CHECK: uqshrn2 v0.4s, v1.2d, #3 // encoding: [0x20,0x94,0x3d,0x6f] + +//------------------------------------------------------------------------------ +// Vector saturating shift right rounded narrow by immediate +//------------------------------------------------------------------------------ + sqrshrn v0.8b, v1.8h, #3 + sqrshrn v0.4h, v1.4s, #3 + sqrshrn v0.2s, v1.2d, #3 + sqrshrn2 v0.16b, v1.8h, #3 + sqrshrn2 v0.8h, v1.4s, #3 + sqrshrn2 v0.4s, v1.2d, #3 + +// CHECK: sqrshrn v0.8b, v1.8h, #3 // encoding: [0x20,0x9c,0x0d,0x0f] +// CHECK: sqrshrn v0.4h, v1.4s, #3 // encoding: [0x20,0x9c,0x1d,0x0f] +// CHECK: sqrshrn v0.2s, v1.2d, #3 // encoding: [0x20,0x9c,0x3d,0x0f] +// CHECK: sqrshrn2 v0.16b, v1.8h, #3 // encoding: [0x20,0x9c,0x0d,0x4f] +// CHECK: sqrshrn2 v0.8h, v1.4s, #3 // encoding: [0x20,0x9c,0x1d,0x4f] +// CHECK: sqrshrn2 v0.4s, v1.2d, #3 // encoding: [0x20,0x9c,0x3d,0x4f] + + +//------------------------------------------------------------------------------ +// Vector saturating shift right rounded narrow by immediate +//------------------------------------------------------------------------------ + uqrshrn v0.8b, v1.8h, #3 + uqrshrn v0.4h, v1.4s, #3 + uqrshrn v0.2s, v1.2d, #3 + uqrshrn2 v0.16b, v1.8h, #3 + uqrshrn2 v0.8h, v1.4s, #3 + uqrshrn2 v0.4s, v1.2d, #3 + +// CHECK: uqrshrn v0.8b, v1.8h, #3 // encoding: [0x20,0x9c,0x0d,0x2f] +// CHECK: uqrshrn v0.4h, v1.4s, #3 // encoding: [0x20,0x9c,0x1d,0x2f] +// CHECK: uqrshrn v0.2s, v1.2d, #3 // encoding: [0x20,0x9c,0x3d,0x2f] +// CHECK: uqrshrn2 v0.16b, v1.8h, #3 // encoding: [0x20,0x9c,0x0d,0x6f] +// CHECK: uqrshrn2 v0.8h, v1.4s, #3 // encoding: [0x20,0x9c,0x1d,0x6f] +// CHECK: uqrshrn2 v0.4s, v1.2d, #3 // encoding: [0x20,0x9c,0x3d,0x6f] + + +//------------------------------------------------------------------------------ +// Fixed-point convert to floating-point +//------------------------------------------------------------------------------ + scvtf v0.2s, v1.2s, #3 + scvtf v0.4s, v1.4s, #3 + scvtf v0.2d, v1.2d, #3 + ucvtf v0.2s, v1.2s, #3 + ucvtf v0.4s, v1.4s, #3 + ucvtf v0.2d, v1.2d, #3 + +// CHECK: scvtf v0.2s, v1.2s, #3 // encoding: [0x20,0xe4,0x3d,0x0f] +// CHECK: scvtf v0.4s, v1.4s, #3 // encoding: [0x20,0xe4,0x3d,0x4f] +// CHECK: scvtf v0.2d, v1.2d, #3 // encoding: [0x20,0xe4,0x7d,0x4f] +// CHECK: ucvtf v0.2s, v1.2s, #3 // encoding: [0x20,0xe4,0x3d,0x2f] +// CHECK: ucvtf v0.4s, v1.4s, #3 // encoding: [0x20,0xe4,0x3d,0x6f] +// CHECK: ucvtf v0.2d, v1.2d, #3 // encoding: [0x20,0xe4,0x7d,0x6f] + +//------------------------------------------------------------------------------ +// Floating-point convert to fixed-point +//------------------------------------------------------------------------------ + fcvtzs v0.2s, v1.2s, #3 + fcvtzs v0.4s, v1.4s, #3 + fcvtzs v0.2d, v1.2d, #3 + fcvtzu v0.2s, v1.2s, #3 + fcvtzu v0.4s, v1.4s, #3 + fcvtzu v0.2d, v1.2d, #3 + + +// CHECK: fcvtzs v0.2s, v1.2s, #3 // encoding: [0x20,0xfc,0x3d,0x0f] +// CHECK: fcvtzs v0.4s, v1.4s, #3 // encoding: [0x20,0xfc,0x3d,0x4f] +// CHECK: fcvtzs v0.2d, v1.2d, #3 // encoding: [0x20,0xfc,0x7d,0x4f] +// CHECK: fcvtzu v0.2s, v1.2s, #3 // encoding: [0x20,0xfc,0x3d,0x2f] +// CHECK: fcvtzu v0.4s, v1.4s, #3 // encoding: [0x20,0xfc,0x3d,0x6f] +// CHECK: fcvtzu v0.2d, v1.2d, #3 // encoding: [0x20,0xfc,0x7d,0x6f] + diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt index e599aba..a7029b2 100644 --- a/test/MC/Disassembler/AArch64/neon-instructions.txt +++ b/test/MC/Disassembler/AArch64/neon-instructions.txt @@ -694,3 +694,398 @@ 0x20 0xa4 0x13 0x4f 0x20 0xa4 0x13 0x2f 0x20 0xa4 0x0b 0x6f + +#----------------------------------------------------------------------------- +#Integer shift right (Signed) +#----------------------------------------------------------------------------- +# CHECK: sshr v0.8b, v1.8b, #3 +# CHECK: sshr v0.4h, v1.4h, #3 +# CHECK: sshr v0.2s, v1.2s, #3 +# CHECK: sshr v0.16b, v1.16b, #3 +# CHECK: sshr v0.8h, v1.8h, #3 +# CHECK: sshr v0.4s, v1.4s, #3 +# CHECK: sshr v0.2d, v1.2d, #3 +0x20,0x04,0x0d,0x0f +0x20,0x04,0x1d,0x0f +0x20,0x04,0x3d,0x0f +0x20,0x04,0x0d,0x4f +0x20,0x04,0x1d,0x4f +0x20,0x04,0x3d,0x4f +0x20,0x04,0x7d,0x4f + +#----------------------------------------------------------------------------- +#Integer shift right (Unsigned) +#----------------------------------------------------------------------------- +# CHECK: ushr v0.8b, v1.8b, #3 +# CHECK: ushr v0.4h, v1.4h, #3 +# CHECK: ushr v0.2s, v1.2s, #3 +# CHECK: ushr v0.16b, v1.16b, #3 +# CHECK: ushr v0.8h, v1.8h, #3 +# CHECK: ushr v0.4s, v1.4s, #3 +# CHECK: ushr v0.2d, v1.2d, #3 +0x20,0x04,0x0d,0x2f +0x20,0x04,0x1d,0x2f +0x20,0x04,0x3d,0x2f +0x20,0x04,0x0d,0x6f +0x20,0x04,0x1d,0x6f +0x20,0x04,0x3d,0x6f +0x20,0x04,0x7d,0x6f + +#----------------------------------------------------------------------------- +#Integer shift right and accumulate (Signed) +#----------------------------------------------------------------------------- +# CHECK: ssra v0.8b, v1.8b, #3 +# CHECK: ssra v0.4h, v1.4h, #3 +# CHECK: ssra v0.2s, v1.2s, #3 +# CHECK: ssra v0.16b, v1.16b, #3 +# CHECK: ssra v0.8h, v1.8h, #3 +# CHECK: ssra v0.4s, v1.4s, #3 +# CHECK: ssra v0.2d, v1.2d, #3 +0x20,0x14,0x0d,0x0f +0x20,0x14,0x1d,0x0f +0x20,0x14,0x3d,0x0f +0x20,0x14,0x0d,0x4f +0x20,0x14,0x1d,0x4f +0x20,0x14,0x3d,0x4f +0x20,0x14,0x7d,0x4f + +#----------------------------------------------------------------------------- +#Integer shift right and accumulate (Unsigned) +#----------------------------------------------------------------------------- +# CHECK: usra v0.8b, v1.8b, #3 +# CHECK: usra v0.4h, v1.4h, #3 +# CHECK: usra v0.2s, v1.2s, #3 +# CHECK: usra v0.16b, v1.16b, #3 +# CHECK: usra v0.8h, v1.8h, #3 +# CHECK: usra v0.4s, v1.4s, #3 +# CHECK: usra v0.2d, v1.2d, #3 +0x20,0x14,0x0d,0x2f +0x20,0x14,0x1d,0x2f +0x20,0x14,0x3d,0x2f +0x20,0x14,0x0d,0x6f +0x20,0x14,0x1d,0x6f +0x20,0x14,0x3d,0x6f +0x20,0x14,0x7d,0x6f + +#----------------------------------------------------------------------------- +#Integer rounding shift right (Signed) +#----------------------------------------------------------------------------- +# CHECK: srshr v0.8b, v1.8b, #3 +# CHECK: srshr v0.4h, v1.4h, #3 +# CHECK: srshr v0.2s, v1.2s, #3 +# CHECK: srshr v0.16b, v1.16b, #3 +# CHECK: srshr v0.8h, v1.8h, #3 +# CHECK: srshr v0.4s, v1.4s, #3 +# CHECK: srshr v0.2d, v1.2d, #3 +0x20,0x24,0x0d,0x0f +0x20,0x24,0x1d,0x0f +0x20,0x24,0x3d,0x0f +0x20,0x24,0x0d,0x4f +0x20,0x24,0x1d,0x4f +0x20,0x24,0x3d,0x4f +0x20,0x24,0x7d,0x4f + +#----------------------------------------------------------------------------- +#Integer rounding shift right (Unsigned) +#----------------------------------------------------------------------------- +# CHECK: urshr v0.8b, v1.8b, #3 +# CHECK: urshr v0.4h, v1.4h, #3 +# CHECK: urshr v0.2s, v1.2s, #3 +# CHECK: urshr v0.16b, v1.16b, #3 +# CHECK: urshr v0.8h, v1.8h, #3 +# CHECK: urshr v0.4s, v1.4s, #3 +# CHECK: urshr v0.2d, v1.2d, #3 +0x20,0x24,0x0d,0x2f +0x20,0x24,0x1d,0x2f +0x20,0x24,0x3d,0x2f +0x20,0x24,0x0d,0x6f +0x20,0x24,0x1d,0x6f +0x20,0x24,0x3d,0x6f +0x20,0x24,0x7d,0x6f + +#----------------------------------------------------------------------------- +#Integer rounding shift right and accumulate (Signed) +#----------------------------------------------------------------------------- +# CHECK: srsra v0.8b, v1.8b, #3 +# CHECK: srsra v0.4h, v1.4h, #3 +# CHECK: srsra v0.2s, v1.2s, #3 +# CHECK: srsra v0.16b, v1.16b, #3 +# CHECK: srsra v0.8h, v1.8h, #3 +# CHECK: srsra v0.4s, v1.4s, #3 +# CHECK: srsra v0.2d, v1.2d, #3 +0x20,0x34,0x0d,0x0f +0x20,0x34,0x1d,0x0f +0x20,0x34,0x3d,0x0f +0x20,0x34,0x0d,0x4f +0x20,0x34,0x1d,0x4f +0x20,0x34,0x3d,0x4f +0x20,0x34,0x7d,0x4f + +#----------------------------------------------------------------------------- +#Integer rounding shift right and accumulate (Unsigned) +#----------------------------------------------------------------------------- +# CHECK: ursra v0.8b, v1.8b, #3 +# CHECK: ursra v0.4h, v1.4h, #3 +# CHECK: ursra v0.2s, v1.2s, #3 +# CHECK: ursra v0.16b, v1.16b, #3 +# CHECK: ursra v0.8h, v1.8h, #3 +# CHECK: ursra v0.4s, v1.4s, #3 +# CHECK: ursra v0.2d, v1.2d, #3 +0x20,0x34,0x0d,0x2f +0x20,0x34,0x1d,0x2f +0x20,0x34,0x3d,0x2f +0x20,0x34,0x0d,0x6f +0x20,0x34,0x1d,0x6f +0x20,0x34,0x3d,0x6f +0x20,0x34,0x7d,0x6f + +#----------------------------------------------------------------------------- +#Integer shift right and insert +#----------------------------------------------------------------------------- +# CHECK: sri v0.8b, v1.8b, #3 +# CHECK: sri v0.4h, v1.4h, #3 +# CHECK: sri v0.2s, v1.2s, #3 +# CHECK: sri v0.16b, v1.16b, #3 +# CHECK: sri v0.8h, v1.8h, #3 +# CHECK: sri v0.4s, v1.4s, #3 +# CHECK: sri v0.2d, v1.2d, #3 +0x20,0x44,0x0d,0x2f +0x20,0x44,0x1d,0x2f +0x20,0x44,0x3d,0x2f +0x20,0x44,0x0d,0x6f +0x20,0x44,0x1d,0x6f +0x20,0x44,0x3d,0x6f +0x20,0x44,0x7d,0x6f + +#----------------------------------------------------------------------------- +#Integer shift left and insert +#----------------------------------------------------------------------------- +# CHECK: sli v0.8b, v1.8b, #3 +# CHECK: sli v0.4h, v1.4h, #3 +# CHECK: sli v0.2s, v1.2s, #3 +# CHECK: sli v0.16b, v1.16b, #3 +# CHECK: sli v0.8h, v1.8h, #3 +# CHECK: sli v0.4s, v1.4s, #3 +# CHECK: sli v0.2d, v1.2d, #3 +0x20,0x54,0x0b,0x2f +0x20,0x54,0x13,0x2f +0x20,0x54,0x23,0x2f +0x20,0x54,0x0b,0x6f +0x20,0x54,0x13,0x6f +0x20,0x54,0x23,0x6f +0x20,0x54,0x43,0x6f + +#----------------------------------------------------------------------------- +#Integer saturating shift left unsigned +#----------------------------------------------------------------------------- +# CHECK: sqshlu v0.8b, v1.8b, #3 +# CHECK: sqshlu v0.4h, v1.4h, #3 +# CHECK: sqshlu v0.2s, v1.2s, #3 +# CHECK: sqshlu v0.16b, v1.16b, #3 +# CHECK: sqshlu v0.8h, v1.8h, #3 +# CHECK: sqshlu v0.4s, v1.4s, #3 +# CHECK: sqshlu v0.2d, v1.2d, #3 +0x20,0x64,0x0b,0x2f +0x20,0x64,0x13,0x2f +0x20,0x64,0x23,0x2f +0x20,0x64,0x0b,0x6f +0x20,0x64,0x13,0x6f +0x20,0x64,0x23,0x6f +0x20,0x64,0x43,0x6f + +#----------------------------------------------------------------------------- +#Integer saturating shift left (Signed) +#----------------------------------------------------------------------------- +# CHECK: sqshl v0.8b, v1.8b, #3 +# CHECK: sqshl v0.4h, v1.4h, #3 +# CHECK: sqshl v0.2s, v1.2s, #3 +# CHECK: sqshl v0.16b, v1.16b, #3 +# CHECK: sqshl v0.8h, v1.8h, #3 +# CHECK: sqshl v0.4s, v1.4s, #3 +# CHECK: sqshl v0.2d, v1.2d, #3 +0x20,0x74,0x0b,0x0f +0x20,0x74,0x13,0x0f +0x20,0x74,0x23,0x0f +0x20,0x74,0x0b,0x4f +0x20,0x74,0x13,0x4f +0x20,0x74,0x23,0x4f +0x20,0x74,0x43,0x4f + +#----------------------------------------------------------------------------- +#Integer saturating shift left (Unsigned) +#----------------------------------------------------------------------------- +# CHECK: uqshl v0.8b, v1.8b, #3 +# CHECK: uqshl v0.4h, v1.4h, #3 +# CHECK: uqshl v0.2s, v1.2s, #3 +# CHECK: uqshl v0.16b, v1.16b, #3 +# CHECK: uqshl v0.8h, v1.8h, #3 +# CHECK: uqshl v0.4s, v1.4s, #3 +# CHECK: uqshl v0.2d, v1.2d, #3 +0x20,0x74,0x0b,0x2f +0x20,0x74,0x13,0x2f +0x20,0x74,0x23,0x2f +0x20,0x74,0x0b,0x6f +0x20,0x74,0x13,0x6f +0x20,0x74,0x23,0x6f +0x20,0x74,0x43,0x6f + +#----------------------------------------------------------------------------- +#Integer shift right narrow +#----------------------------------------------------------------------------- +# CHECK: shrn v0.8b, v1.8h, #3 +# CHECK: shrn v0.4h, v1.4s, #3 +# CHECK: shrn v0.2s, v1.2d, #3 +# CHECK: shrn2 v0.16b, v1.8h, #3 +# CHECK: shrn2 v0.8h, v1.4s, #3 +# CHECK: shrn2 v0.4s, v1.2d, #3 +0x20,0x84,0x0d,0x0f +0x20,0x84,0x1d,0x0f +0x20,0x84,0x3d,0x0f +0x20,0x84,0x0d,0x4f +0x20,0x84,0x1d,0x4f +0x20,0x84,0x3d,0x4f + +#----------------------------------------------------------------------------- +#Integer saturating shift right unsigned narrow (Signed) +#----------------------------------------------------------------------------- +# CHECK: sqshrun v0.8b, v1.8h, #3 +# CHECK: sqshrun v0.4h, v1.4s, #3 +# CHECK: sqshrun v0.2s, v1.2d, #3 +# CHECK: sqshrun2 v0.16b, v1.8h, #3 +# CHECK: sqshrun2 v0.8h, v1.4s, #3 +# CHECK: sqshrun2 v0.4s, v1.2d, #3 +0x20,0x84,0x0d,0x2f +0x20,0x84,0x1d,0x2f +0x20,0x84,0x3d,0x2f +0x20,0x84,0x0d,0x6f +0x20,0x84,0x1d,0x6f +0x20,0x84,0x3d,0x6f + +#----------------------------------------------------------------------------- +#Integer rounding shift right narrow +#----------------------------------------------------------------------------- +# CHECK: rshrn v0.8b, v1.8h, #3 +# CHECK: rshrn v0.4h, v1.4s, #3 +# CHECK: rshrn v0.2s, v1.2d, #3 +# CHECK: rshrn2 v0.16b, v1.8h, #3 +# CHECK: rshrn2 v0.8h, v1.4s, #3 +# CHECK: rshrn2 v0.4s, v1.2d, #3 +0x20,0x8c,0x0d,0x0f +0x20,0x8c,0x1d,0x0f +0x20,0x8c,0x3d,0x0f +0x20,0x8c,0x0d,0x4f +0x20,0x8c,0x1d,0x4f +0x20,0x8c,0x3d,0x4f + +#----------------------------------------------------------------------------- +#Integer saturating shift right rounded unsigned narrow (Signed) +#----------------------------------------------------------------------------- +# CHECK: sqrshrun v0.8b, v1.8h, #3 +# CHECK: sqrshrun v0.4h, v1.4s, #3 +# CHECK: sqrshrun v0.2s, v1.2d, #3 +# CHECK: sqrshrun2 v0.16b, v1.8h, #3 +# CHECK: sqrshrun2 v0.8h, v1.4s, #3 +# CHECK: sqrshrun2 v0.4s, v1.2d, #3 +0x20,0x8c,0x0d,0x2f +0x20,0x8c,0x1d,0x2f +0x20,0x8c,0x3d,0x2f +0x20,0x8c,0x0d,0x6f +0x20,0x8c,0x1d,0x6f +0x20,0x8c,0x3d,0x6f + +#----------------------------------------------------------------------------- +#Integer saturating shift right narrow (Signed) +#----------------------------------------------------------------------------- +# CHECK: sqshrn v0.8b, v1.8h, #3 +# CHECK: sqshrn v0.4h, v1.4s, #3 +# CHECK: sqshrn v0.2s, v1.2d, #3 +# CHECK: sqshrn2 v0.16b, v1.8h, #3 +# CHECK: sqshrn2 v0.8h, v1.4s, #3 +# CHECK: sqshrn2 v0.4s, v1.2d, #3 +0x20,0x94,0x0d,0x0f +0x20,0x94,0x1d,0x0f +0x20,0x94,0x3d,0x0f +0x20,0x94,0x0d,0x4f +0x20,0x94,0x1d,0x4f +0x20,0x94,0x3d,0x4f + +#----------------------------------------------------------------------------- +#Integer saturating shift right narrow (Unsigned) +#----------------------------------------------------------------------------- +# CHECK: uqshrn v0.8b, v1.8h, #3 +# CHECK: uqshrn v0.4h, v1.4s, #3 +# CHECK: uqshrn v0.2s, v1.2d, #3 +# CHECK: uqshrn2 v0.16b, v1.8h, #3 +# CHECK: uqshrn2 v0.8h, v1.4s, #3 +# CHECK: uqshrn2 v0.4s, v1.2d, #3 +0x20,0x94,0x0d,0x2f +0x20,0x94,0x1d,0x2f +0x20,0x94,0x3d,0x2f +0x20,0x94,0x0d,0x6f +0x20,0x94,0x1d,0x6f +0x20,0x94,0x3d,0x6f + +#----------------------------------------------------------------------------- +#Integer saturating shift right rounded narrow (Signed) +#----------------------------------------------------------------------------- +# CHECK: sqrshrn v0.8b, v1.8h, #3 +# CHECK: sqrshrn v0.4h, v1.4s, #3 +# CHECK: sqrshrn v0.2s, v1.2d, #3 +# CHECK: sqrshrn2 v0.16b, v1.8h, #3 +# CHECK: sqrshrn2 v0.8h, v1.4s, #3 +# CHECK: sqrshrn2 v0.4s, v1.2d, #3 +0x20,0x9c,0x0d,0x0f +0x20,0x9c,0x1d,0x0f +0x20,0x9c,0x3d,0x0f +0x20,0x9c,0x0d,0x4f +0x20,0x9c,0x1d,0x4f +0x20,0x9c,0x3d,0x4f + +#----------------------------------------------------------------------------- +#Integer saturating shift right rounded narrow (Unsigned) +#----------------------------------------------------------------------------- +# CHECK: uqrshrn v0.8b, v1.8h, #3 +# CHECK: uqrshrn v0.4h, v1.4s, #3 +# CHECK: uqrshrn v0.2s, v1.2d, #3 +# CHECK: uqrshrn2 v0.16b, v1.8h, #3 +# CHECK: uqrshrn2 v0.8h, v1.4s, #3 +# CHECK: uqrshrn2 v0.4s, v1.2d, #3 +0x20,0x9c,0x0d,0x2f +0x20,0x9c,0x1d,0x2f +0x20,0x9c,0x3d,0x2f +0x20,0x9c,0x0d,0x6f +0x20,0x9c,0x1d,0x6f +0x20,0x9c,0x3d,0x6f + +#----------------------------------------------------------------------------- +#Fixed-point convert to floating-point +#----------------------------------------------------------------------------- +# CHECK: scvtf v0.2s, v1.2s, #3 +# CHECK: scvtf v0.4s, v1.4s, #3 +# CHECK: scvtf v0.2d, v1.2d, #3 +# CHECK: ucvtf v0.2s, v1.2s, #3 +# CHECK: ucvtf v0.4s, v1.4s, #3 +# CHECK: ucvtf v0.2d, v1.2d, #3 + +0x20,0xe4,0x3d,0x0f +0x20,0xe4,0x3d,0x4f +0x20,0xe4,0x7d,0x4f +0x20,0xe4,0x3d,0x2f +0x20,0xe4,0x3d,0x6f +0x20,0xe4,0x7d,0x6f + +#----------------------------------------------------------------------------- +#Floating-point convert to fixed-point +#----------------------------------------------------------------------------- +# CHECK: fcvtzs v0.2s, v1.2s, #3 +# CHECK: fcvtzs v0.4s, v1.4s, #3 +# CHECK: fcvtzs v0.2d, v1.2d, #3 +# CHECK: fcvtzu v0.2s, v1.2s, #3 +# CHECK: fcvtzu v0.4s, v1.4s, #3 +# CHECK: fcvtzu v0.2d, v1.2d, #3 +0x20,0xfc,0x3d,0x0f +0x20,0xfc,0x3d,0x4f +0x20,0xfc,0x7d,0x4f +0x20,0xfc,0x3d,0x2f +0x20,0xfc,0x3d,0x6f +0x20,0xfc,0x7d,0x6f |