From 1d28917dc39f38847f5c69c0a60cd1491430bdad Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 31 Oct 2013 19:28:44 +0000 Subject: [AArch64] Add support for NEON scalar shift immediate instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193790 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsAArch64.td | 40 ++ lib/Target/AArch64/AArch64InstrFormats.td | 19 + lib/Target/AArch64/AArch64InstrNEON.td | 274 ++++++++++- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 31 ++ .../AArch64/Disassembler/AArch64Disassembler.cpp | 52 ++ .../AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp | 29 ++ test/CodeGen/AArch64/neon-scalar-shift-imm.ll | 527 +++++++++++++++++++++ test/MC/AArch64/neon-diagnostics.s | 24 +- test/MC/AArch64/neon-scalar-shift-imm.s | 186 ++++++++ test/MC/Disassembler/AArch64/neon-instructions.txt | 162 +++++++ 10 files changed, 1331 insertions(+), 13 deletions(-) create mode 100644 test/CodeGen/AArch64/neon-scalar-shift-imm.ll create mode 100644 test/MC/AArch64/neon-scalar-shift-imm.s diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td index 76b9215..2dfe02a 100644 --- a/include/llvm/IR/IntrinsicsAArch64.td +++ b/include/llvm/IR/IntrinsicsAArch64.td @@ -220,4 +220,44 @@ def int_aarch64_neon_vqdmlsl : Neon_3Arg_Long_Intrinsic; // Signed Saturating Doubling Multiply Long def int_aarch64_neon_vqdmull : Neon_2Arg_Long_Intrinsic; + +class Neon_2Arg_ShiftImm_Intrinsic + : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>; + +class Neon_3Arg_ShiftImm_Intrinsic + : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty, llvm_i32_ty], + [IntrNoMem]>; + +// Scalar Shift Right (Immediate) +def int_aarch64_neon_vshrds_n : Neon_2Arg_ShiftImm_Intrinsic; +def int_aarch64_neon_vshrdu_n : Neon_2Arg_ShiftImm_Intrinsic; + +// Scalar Rounding Shift Right (Immediate) +def int_aarch64_neon_vrshrds_n : Neon_2Arg_ShiftImm_Intrinsic; +def int_aarch64_neon_vrshrdu_n : Neon_2Arg_ShiftImm_Intrinsic; + +// Scalar Shift Right and Accumulate (Immediate) +def int_aarch64_neon_vsrads_n : Neon_3Arg_ShiftImm_Intrinsic; +def int_aarch64_neon_vsradu_n : Neon_3Arg_ShiftImm_Intrinsic; + +// Scalar Rounding Shift Right and Accumulate (Immediate) +def int_aarch64_neon_vrsrads_n : Neon_3Arg_ShiftImm_Intrinsic; +def int_aarch64_neon_vrsradu_n : Neon_3Arg_ShiftImm_Intrinsic; + +// Scalar Shift Left (Immediate) +def int_aarch64_neon_vshld_n : Neon_2Arg_ShiftImm_Intrinsic; + +// Scalar Saturating Shift Left (Immediate) +def int_aarch64_neon_vqshls_n : Neon_N2V_Intrinsic; +def int_aarch64_neon_vqshlu_n : Neon_N2V_Intrinsic; + +// Scalar Signed Saturating Shift Left Unsigned (Immediate) +def int_aarch64_neon_vqshlus_n : Neon_N2V_Intrinsic; + +// Shift Right And Insert (Immediate) +def int_aarch64_neon_vsrid_n : Neon_2Arg_ShiftImm_Intrinsic; + +// Shift Left And Insert (Immediate) +def int_aarch64_neon_vslid_n : Neon_2Arg_ShiftImm_Intrinsic; + } diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 7a41ce0..44dbc9d 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -1234,5 +1234,24 @@ class NeonI_Scalar3Diff size, bits<4> opcode, // Inherit Rd in 4-0 } +// Format AdvSIMD scalar shift by immediate + +class NeonI_ScalarShiftImm opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn { + bits<4> Imm4; + bits<3> Imm3; + let Inst{31-30} = 0b01; + let Inst{29} = u; + let Inst{28-23} = 0b111110; + let Inst{22-19} = Imm4; + let Inst{18-16} = Imm3; + let Inst{15-11} = opcode; + let Inst{10} = 0b1; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + } diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index b9f83f7..5e58daf 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -1450,7 +1450,7 @@ def imm0_63 : Operand { let ParserMatchClass = uimm6_asmoperand; } -// Shift Right Immediate - A shift right immediate is encoded differently from +// Shift Right/Left Immediate - A shift immediate is encoded differently from // other shift immediates. The immh:immb field is encoded like so: // // Offset Encoding @@ -1458,6 +1458,11 @@ def imm0_63 : Operand { // 16 immh:immb<6:4> = '001xxxx', is encoded in immh:immb<3:0> // 32 immh:immb<6:5> = '01xxxxx', is encoded in immh:immb<4:0> // 64 immh:immb<6> = '1xxxxxx', is encoded in immh:immb<5:0> +// +// The shift right immediate amount, in the range 1 to element bits, is computed +// as Offset - UInt(immh:immb). The shift left immediate amount, in the range 0 +// to element bits - 1, is computed as UInt(immh:immb) - Offset. + class shr_imm_asmoperands : AsmOperandClass { let Name = "ShrImm" # OFFSET; let RenderMethod = "addImmOperands"; @@ -1481,6 +1486,29 @@ def shr_imm16 : shr_imm<"16">; def shr_imm32 : shr_imm<"32">; def shr_imm64 : shr_imm<"64">; +class shl_imm_asmoperands : AsmOperandClass { + let Name = "ShlImm" # OFFSET; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "ShlImm" # OFFSET; +} + +class shl_imm : Operand { + let EncoderMethod = "getShiftLeftImm" # OFFSET; + let DecoderMethod = "DecodeShiftLeftImm" # OFFSET; + let ParserMatchClass = + !cast("shl_imm" # OFFSET # "_asmoperand"); +} + +def shl_imm8_asmoperand : shl_imm_asmoperands<"8">; +def shl_imm16_asmoperand : shl_imm_asmoperands<"16">; +def shl_imm32_asmoperand : shl_imm_asmoperands<"32">; +def shl_imm64_asmoperand : shl_imm_asmoperands<"64">; + +def shl_imm8 : shl_imm<"8">; +def shl_imm16 : shl_imm<"16">; +def shl_imm32 : shl_imm<"32">; +def shl_imm64 : shl_imm<"64">; + class N2VShift opcode, string asmop, string T, RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode> : NeonI_2VShiftImm; } +// Scalar Shift By Immediate + +class NeonI_ScalarShiftImm_size opcode, string asmop, + RegisterClass FPRC, Operand ImmTy> + : NeonI_ScalarShiftImm; + +multiclass NeonI_ScalarShiftRightImm_D_size opcode, + string asmop> { + def ddi : NeonI_ScalarShiftImm_size { + bits<6> Imm; + let Inst{22} = 0b1; // immh:immb = 1xxxxxx + let Inst{21-16} = Imm; + } +} + +multiclass NeonI_ScalarShiftRightImm_BHSD_size opcode, + string asmop> + : NeonI_ScalarShiftRightImm_D_size { + def bbi : NeonI_ScalarShiftImm_size { + bits<3> Imm; + let Inst{22-19} = 0b0001; // immh:immb = 0001xxx + let Inst{18-16} = Imm; + } + def hhi : NeonI_ScalarShiftImm_size { + bits<4> Imm; + let Inst{22-20} = 0b001; // immh:immb = 001xxxx + let Inst{19-16} = Imm; + } + def ssi : NeonI_ScalarShiftImm_size { + bits<5> Imm; + let Inst{22-21} = 0b01; // immh:immb = 01xxxxx + let Inst{20-16} = Imm; + } +} + +multiclass NeonI_ScalarShiftLeftImm_D_size opcode, + string asmop> { + def ddi : NeonI_ScalarShiftImm_size { + bits<6> Imm; + let Inst{22} = 0b1; // immh:immb = 1xxxxxx + let Inst{21-16} = Imm; + } +} + +multiclass NeonI_ScalarShiftLeftImm_BHSD_size opcode, + string asmop> + : NeonI_ScalarShiftLeftImm_D_size { + def bbi : NeonI_ScalarShiftImm_size { + bits<3> Imm; + let Inst{22-19} = 0b0001; // immh:immb = 0001xxx + let Inst{18-16} = Imm; + } + def hhi : NeonI_ScalarShiftImm_size { + bits<4> Imm; + let Inst{22-20} = 0b001; // immh:immb = 001xxxx + let Inst{19-16} = Imm; + } + def ssi : NeonI_ScalarShiftImm_size { + bits<5> Imm; + let Inst{22-21} = 0b01; // immh:immb = 01xxxxx + let Inst{20-16} = Imm; + } +} + +class NeonI_ScalarShiftImm_accum_D_size opcode, string asmop> + : NeonI_ScalarShiftImm { + bits<6> Imm; + let Inst{22} = 0b1; // immh:immb = 1xxxxxx + let Inst{21-16} = Imm; + let Constraints = "$Src = $Rd"; +} + +class NeonI_ScalarShiftImm_narrow_size opcode, string asmop, + RegisterClass FPRCD, RegisterClass FPRCS, + Operand ImmTy> + : NeonI_ScalarShiftImm; + +multiclass NeonI_ScalarShiftImm_narrow_HSD_size opcode, + string asmop> { + def bhi : NeonI_ScalarShiftImm_narrow_size { + bits<3> Imm; + let Inst{22-19} = 0b0001; // immh:immb = 0001xxx + let Inst{18-16} = Imm; + } + def hsi : NeonI_ScalarShiftImm_narrow_size { + bits<4> Imm; + let Inst{22-20} = 0b001; // immh:immb = 001xxxx + let Inst{19-16} = Imm; + } + def sdi : NeonI_ScalarShiftImm_narrow_size { + bits<5> Imm; + let Inst{22-21} = 0b01; // immh:immb = 01xxxxx + let Inst{20-16} = Imm; + } +} + +multiclass Neon_ScalarShiftImm_D_size_patterns { + def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))), + (INSTD FPR64:$Rn, imm:$Imm)>; +} + +multiclass Neon_ScalarShiftImm_BHSD_size_patterns + : Neon_ScalarShiftImm_D_size_patterns { + def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 imm:$Imm))), + (INSTB FPR8:$Rn, imm:$Imm)>; + def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 imm:$Imm))), + (INSTH FPR16:$Rn, imm:$Imm)>; + def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))), + (INSTS FPR32:$Rn, imm:$Imm)>; +} + +class Neon_ScalarShiftImm_accum_D_size_patterns + : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn), (i32 imm:$Imm))), + (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>; + +multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns< + SDPatternOperator opnode, + Instruction INSTH, + Instruction INSTS, + Instruction INSTD> { + def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 imm:$Imm))), + (INSTH FPR16:$Rn, imm:$Imm)>; + def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))), + (INSTS FPR32:$Rn, imm:$Imm)>; + def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))), + (INSTD FPR64:$Rn, imm:$Imm)>; +} + +// Scalar Signed Shift Right (Immediate) +defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">; +defm : Neon_ScalarShiftImm_D_size_patterns; + +// Scalar Unsigned Shift Right (Immediate) +defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">; +defm : Neon_ScalarShiftImm_D_size_patterns; + +// Scalar Signed Rounding Shift Right (Immediate) +defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">; +defm : Neon_ScalarShiftImm_D_size_patterns; + +// Scalar Unigned Rounding Shift Right (Immediate) +defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">; +defm : Neon_ScalarShiftImm_D_size_patterns; + +// Scalar Signed Shift Right and Accumulate (Immediate) +def SSRA : NeonI_ScalarShiftImm_accum_D_size<0b0, 0b00010, "ssra">; +def : Neon_ScalarShiftImm_accum_D_size_patterns; + +// Scalar Unsigned Shift Right and Accumulate (Immediate) +def USRA : NeonI_ScalarShiftImm_accum_D_size<0b1, 0b00010, "usra">; +def : Neon_ScalarShiftImm_accum_D_size_patterns; + +// Scalar Signed Rounding Shift Right and Accumulate (Immediate) +def SRSRA : NeonI_ScalarShiftImm_accum_D_size<0b0, 0b00110, "srsra">; +def : Neon_ScalarShiftImm_accum_D_size_patterns; + +// Scalar Unsigned Rounding Shift Right and Accumulate (Immediate) +def URSRA : NeonI_ScalarShiftImm_accum_D_size<0b1, 0b00110, "ursra">; +def : Neon_ScalarShiftImm_accum_D_size_patterns; + +// Scalar Shift Left (Immediate) +defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">; +defm : Neon_ScalarShiftImm_D_size_patterns; + +// Signed Saturating Shift Left (Immediate) +defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">; +defm : Neon_ScalarShiftImm_BHSD_size_patterns; + +// Unsigned Saturating Shift Left (Immediate) +defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">; +defm : Neon_ScalarShiftImm_BHSD_size_patterns; + +// Signed Saturating Shift Left Unsigned (Immediate) +defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">; +defm : Neon_ScalarShiftImm_BHSD_size_patterns; + +// Shift Right And Insert (Immediate) +defm SRI : NeonI_ScalarShiftRightImm_D_size<0b1, 0b01000, "sri">; +defm : Neon_ScalarShiftImm_D_size_patterns; + +// Shift Left And Insert (Immediate) +defm SLI : NeonI_ScalarShiftLeftImm_D_size<0b1, 0b01010, "sli">; +defm : Neon_ScalarShiftImm_D_size_patterns; + +// Signed Saturating Shift Right Narrow (Immediate) +defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">; +defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; + +// Unsigned Saturating Shift Right Narrow (Immediate) +defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">; +defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; + +// Signed Saturating Rounded Shift Right Narrow (Immediate) +defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">; +defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; + +// Unsigned Saturating Rounded Shift Right Narrow (Immediate) +defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">; +defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; + +// Signed Saturating Shift Right Unsigned Narrow (Immediate) +defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">; +defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; + +// Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate) +defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">; +defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; + // Scalar Integer Add let isCommutable = 1 in { def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">; diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 127c7ec..1f7a7d8 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -698,6 +698,25 @@ public: bool isShrImm64() const { return isShrFixedWidth(64); } + // if 0 <= value < w, return true + bool isShlFixedWidth(int w) const { + if (!isImm()) + return false; + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) + return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < w; + } + + bool isShlImm8() const { return isShlFixedWidth(8); } + + bool isShlImm16() const { return isShlFixedWidth(16); } + + bool isShlImm32() const { return isShlFixedWidth(32); } + + bool isShlImm64() const { return isShlFixedWidth(64); } + bool isNeonMovImmShiftLSL() const { if (!isShiftOrExtend()) return false; @@ -2480,6 +2499,18 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_ShrImm64: return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), "expected integer in range [1, 64]"); + case Match_ShlImm8: + return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 7]"); + case Match_ShlImm16: + return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 15]"); + case Match_ShlImm32: + return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 31]"); + case Match_ShlImm64: + return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 63]"); } llvm_unreachable("Implement any new match types added!"); diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 9a97fbe..db1da49 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -141,6 +141,18 @@ static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeShiftLeftImm8(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeShiftLeftImm16(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeShiftLeftImm32(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeShiftLeftImm64(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder); + template static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, unsigned FullImm, @@ -509,6 +521,46 @@ static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val, return MCDisassembler::Success; } +static DecodeStatus DecodeShiftLeftImm8(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + if (Val > 7) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Val)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeShiftLeftImm16(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + if (Val > 15) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Val)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeShiftLeftImm32(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + if (Val > 31) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Val)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeShiftLeftImm64(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + if (Val > 63) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Val)); + return MCDisassembler::Success; +} + template static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, unsigned FullImm, diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 7bfaecc..b41c566 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -68,6 +68,15 @@ public: unsigned getShiftRightImm64(const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const; + unsigned getShiftLeftImm8(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; + unsigned getShiftLeftImm16(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; + unsigned getShiftLeftImm32(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; + unsigned getShiftLeftImm64(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; + // Labels are handled mostly the same way: a symbol is needed, and // just gets some fixup attached. template @@ -338,6 +347,26 @@ unsigned AArch64MCCodeEmitter::getShiftRightImm64( return 64 - MI.getOperand(Op).getImm(); } +unsigned AArch64MCCodeEmitter::getShiftLeftImm8( + const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const { + return MI.getOperand(Op).getImm() - 8; +} + +unsigned AArch64MCCodeEmitter::getShiftLeftImm16( + const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const { + return MI.getOperand(Op).getImm() - 16; +} + +unsigned AArch64MCCodeEmitter::getShiftLeftImm32( + const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const { + return MI.getOperand(Op).getImm() - 32; +} + +unsigned AArch64MCCodeEmitter::getShiftLeftImm64( + const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const { + return MI.getOperand(Op).getImm() - 64; +} + template unsigned AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI, unsigned OpIdx, diff --git a/test/CodeGen/AArch64/neon-scalar-shift-imm.ll b/test/CodeGen/AArch64/neon-scalar-shift-imm.ll new file mode 100644 index 0000000..b11540f --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-shift-imm.ll @@ -0,0 +1,527 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +define i64 @test_vshrd_n_s64(i64 %a) { +; CHECK: test_vshrd_n_s64 +; CHECK: sshr {{d[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vsshr = insertelement <1 x i64> undef, i64 %a, i32 0 + %vsshr1 = call <1 x i64> @llvm.aarch64.neon.vshrds.n(<1 x i64> %vsshr, i32 63) + %0 = extractelement <1 x i64> %vsshr1, i32 0 + ret i64 %0 +} + +declare <1 x i64> @llvm.aarch64.neon.vshrds.n(<1 x i64>, i32) + +define i64 @test_vshrd_n_u64(i64 %a) { +; CHECK: test_vshrd_n_u64 +; CHECK: ushr {{d[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vushr = insertelement <1 x i64> undef, i64 %a, i32 0 + %vushr1 = call <1 x i64> @llvm.aarch64.neon.vshrdu.n(<1 x i64> %vushr, i32 63) + %0 = extractelement <1 x i64> %vushr1, i32 0 + ret i64 %0 +} + +declare <1 x i64> @llvm.aarch64.neon.vshrdu.n(<1 x i64>, i32) + +define i64 @test_vrshrd_n_s64(i64 %a) { +; CHECK: test_vrshrd_n_s64 +; CHECK: srshr {{d[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vsrshr = insertelement <1 x i64> undef, i64 %a, i32 0 + %vsrshr1 = call <1 x i64> @llvm.aarch64.neon.vrshrds.n(<1 x i64> %vsrshr, i32 63) + %0 = extractelement <1 x i64> %vsrshr1, i32 0 + ret i64 %0 +} + +declare <1 x i64> @llvm.aarch64.neon.vrshrds.n(<1 x i64>, i32) + +define i64 @test_vrshrd_n_u64(i64 %a) { +; CHECK: test_vrshrd_n_u64 +; CHECK: urshr {{d[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vurshr = insertelement <1 x i64> undef, i64 %a, i32 0 + %vurshr1 = call <1 x i64> @llvm.aarch64.neon.vrshrdu.n(<1 x i64> %vurshr, i32 63) + %0 = extractelement <1 x i64> %vurshr1, i32 0 + ret i64 %0 +} + +declare <1 x i64> @llvm.aarch64.neon.vrshrdu.n(<1 x i64>, i32) + +define i64 @test_vsrad_n_s64(i64 %a, i64 %b) { +; CHECK: test_vsrad_n_s64 +; CHECK: ssra {{d[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vssra = insertelement <1 x i64> undef, i64 %a, i32 0 + %vssra1 = insertelement <1 x i64> undef, i64 %b, i32 0 + %vssra2 = call <1 x i64> @llvm.aarch64.neon.vsrads.n(<1 x i64> %vssra, <1 x i64> %vssra1, i32 63) + %0 = extractelement <1 x i64> %vssra2, i32 0 + ret i64 %0 +} + +declare <1 x i64> @llvm.aarch64.neon.vsrads.n(<1 x i64>, <1 x i64>, i32) + +define i64 @test_vsrad_n_u64(i64 %a, i64 %b) { +; CHECK: test_vsrad_n_u64 +; CHECK: usra {{d[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vusra = insertelement <1 x i64> undef, i64 %a, i32 0 + %vusra1 = insertelement <1 x i64> undef, i64 %b, i32 0 + %vusra2 = call <1 x i64> @llvm.aarch64.neon.vsradu.n(<1 x i64> %vusra, <1 x i64> %vusra1, i32 63) + %0 = extractelement <1 x i64> %vusra2, i32 0 + ret i64 %0 +} + +declare <1 x i64> @llvm.aarch64.neon.vsradu.n(<1 x i64>, <1 x i64>, i32) + +define i64 @test_vrsrad_n_s64(i64 %a, i64 %b) { +; CHECK: test_vrsrad_n_s64 +; CHECK: srsra {{d[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vsrsra = insertelement <1 x i64> undef, i64 %a, i32 0 + %vsrsra1 = insertelement <1 x i64> undef, i64 %b, i32 0 + %vsrsra2 = call <1 x i64> @llvm.aarch64.neon.vrsrads.n(<1 x i64> %vsrsra, <1 x i64> %vsrsra1, i32 63) + %0 = extractelement <1 x i64> %vsrsra2, i32 0 + ret i64 %0 +} + +declare <1 x i64> @llvm.aarch64.neon.vrsrads.n(<1 x i64>, <1 x i64>, i32) + +define i64 @test_vrsrad_n_u64(i64 %a, i64 %b) { +; CHECK: test_vrsrad_n_u64 +; CHECK: ursra {{d[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vursra = insertelement <1 x i64> undef, i64 %a, i32 0 + %vursra1 = insertelement <1 x i64> undef, i64 %b, i32 0 + %vursra2 = call <1 x i64> @llvm.aarch64.neon.vrsradu.n(<1 x i64> %vursra, <1 x i64> %vursra1, i32 63) + %0 = extractelement <1 x i64> %vursra2, i32 0 + ret i64 %0 +} + +declare <1 x i64> @llvm.aarch64.neon.vrsradu.n(<1 x i64>, <1 x i64>, i32) + +define i64 @test_vshld_n_s64(i64 %a) { +; CHECK: test_vshld_n_s64 +; CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vshl = insertelement <1 x i64> undef, i64 %a, i32 0 + %vshl1 = call <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64> %vshl, i32 63) + %0 = extractelement <1 x i64> %vshl1, i32 0 + ret i64 %0 +} + +declare <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64>, i32) + +define i64 @test_vshld_n_u64(i64 %a) { +; CHECK: test_vshld_n_u64 +; CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vshl = insertelement <1 x i64> undef, i64 %a, i32 0 + %vshl1 = call <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64> %vshl, i32 63) + %0 = extractelement <1 x i64> %vshl1, i32 0 + ret i64 %0 +} + +define i8 @test_vqshlb_n_s8(i8 %a) { +; CHECK: test_vqshlb_n_s8 +; CHECK: sqshl {{b[0-9]+}}, {{b[0-9]+}}, #7 +entry: + %vsqshl = insertelement <1 x i8> undef, i8 %a, i32 0 + %vsqshl1 = call <1 x i8> @llvm.aarch64.neon.vqshls.n.v1i8(<1 x i8> %vsqshl, i32 7) + %0 = extractelement <1 x i8> %vsqshl1, i32 0 + ret i8 %0 +} + +declare <1 x i8> @llvm.aarch64.neon.vqshls.n.v1i8(<1 x i8>, i32) + +define i16 @test_vqshlh_n_s16(i16 %a) { +; CHECK: test_vqshlh_n_s16 +; CHECK: sqshl {{h[0-9]+}}, {{h[0-9]+}}, #15 +entry: + %vsqshl = insertelement <1 x i16> undef, i16 %a, i32 0 + %vsqshl1 = call <1 x i16> @llvm.aarch64.neon.vqshls.n.v1i16(<1 x i16> %vsqshl, i32 15) + %0 = extractelement <1 x i16> %vsqshl1, i32 0 + ret i16 %0 +} + +declare <1 x i16> @llvm.aarch64.neon.vqshls.n.v1i16(<1 x i16>, i32) + +define i32 @test_vqshls_n_s32(i32 %a) { +; CHECK: test_vqshls_n_s32 +; CHECK: sqshl {{s[0-9]+}}, {{s[0-9]+}}, #31 +entry: + %vsqshl = insertelement <1 x i32> undef, i32 %a, i32 0 + %vsqshl1 = call <1 x i32> @llvm.aarch64.neon.vqshls.n.v1i32(<1 x i32> %vsqshl, i32 31) + %0 = extractelement <1 x i32> %vsqshl1, i32 0 + ret i32 %0 +} + +declare <1 x i32> @llvm.aarch64.neon.vqshls.n.v1i32(<1 x i32>, i32) + +define i64 @test_vqshld_n_s64(i64 %a) { +; CHECK: test_vqshld_n_s64 +; CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vsqshl = insertelement <1 x i64> undef, i64 %a, i32 0 + %vsqshl1 = call <1 x i64> @llvm.aarch64.neon.vqshls.n.v1i64(<1 x i64> %vsqshl, i32 63) + %0 = extractelement <1 x i64> %vsqshl1, i32 0 + ret i64 %0 +} + +declare <1 x i64> @llvm.aarch64.neon.vqshls.n.v1i64(<1 x i64>, i32) + +define i8 @test_vqshlb_n_u8(i8 %a) { +; CHECK: test_vqshlb_n_u8 +; CHECK: uqshl {{b[0-9]+}}, {{b[0-9]+}}, #7 +entry: + %vuqshl = insertelement <1 x i8> undef, i8 %a, i32 0 + %vuqshl1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.n.v1i8(<1 x i8> %vuqshl, i32 7) + %0 = extractelement <1 x i8> %vuqshl1, i32 0 + ret i8 %0 +} + +declare <1 x i8> @llvm.aarch64.neon.vqshlu.n.v1i8(<1 x i8>, i32) + +define i16 @test_vqshlh_n_u16(i16 %a) { +; CHECK: test_vqshlh_n_u16 +; CHECK: uqshl {{h[0-9]+}}, {{h[0-9]+}}, #15 +entry: + %vuqshl = insertelement <1 x i16> undef, i16 %a, i32 0 + %vuqshl1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.n.v1i16(<1 x i16> %vuqshl, i32 15) + %0 = extractelement <1 x i16> %vuqshl1, i32 0 + ret i16 %0 +} + +declare <1 x i16> @llvm.aarch64.neon.vqshlu.n.v1i16(<1 x i16>, i32) + +define i32 @test_vqshls_n_u32(i32 %a) { +; CHECK: test_vqshls_n_u32 +; CHECK: uqshl {{s[0-9]+}}, {{s[0-9]+}}, #31 +entry: + %vuqshl = insertelement <1 x i32> undef, i32 %a, i32 0 + %vuqshl1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.n.v1i32(<1 x i32> %vuqshl, i32 31) + %0 = extractelement <1 x i32> %vuqshl1, i32 0 + ret i32 %0 +} + +declare <1 x i32> @llvm.aarch64.neon.vqshlu.n.v1i32(<1 x i32>, i32) + +define i64 @test_vqshld_n_u64(i64 %a) { +; CHECK: test_vqshld_n_u64 +; CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vuqshl = insertelement <1 x i64> undef, i64 %a, i32 0 + %vuqshl1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.n.v1i64(<1 x i64> %vuqshl, i32 63) + %0 = extractelement <1 x i64> %vuqshl1, i32 0 + ret i64 %0 +} + +declare <1 x i64> @llvm.aarch64.neon.vqshlu.n.v1i64(<1 x i64>, i32) + +define i8 @test_vqshlub_n_s8(i8 %a) { +; CHECK: test_vqshlub_n_s8 +; CHECK: sqshlu {{b[0-9]+}}, {{b[0-9]+}}, #7 +entry: + %vsqshlu = insertelement <1 x i8> undef, i8 %a, i32 0 + %vsqshlu1 = call <1 x i8> @llvm.aarch64.neon.vqshlus.n.v1i8(<1 x i8> %vsqshlu, i32 7) + %0 = extractelement <1 x i8> %vsqshlu1, i32 0 + ret i8 %0 +} + +declare <1 x i8> @llvm.aarch64.neon.vqshlus.n.v1i8(<1 x i8>, i32) + +define i16 @test_vqshluh_n_s16(i16 %a) { +; CHECK: test_vqshluh_n_s16 +; CHECK: sqshlu {{h[0-9]+}}, {{h[0-9]+}}, #15 +entry: + %vsqshlu = insertelement <1 x i16> undef, i16 %a, i32 0 + %vsqshlu1 = call <1 x i16> @llvm.aarch64.neon.vqshlus.n.v1i16(<1 x i16> %vsqshlu, i32 15) + %0 = extractelement <1 x i16> %vsqshlu1, i32 0 + ret i16 %0 +} + +declare <1 x i16> @llvm.aarch64.neon.vqshlus.n.v1i16(<1 x i16>, i32) + +define i32 @test_vqshlus_n_s32(i32 %a) { +; CHECK: test_vqshlus_n_s32 +; CHECK: sqshlu {{s[0-9]+}}, {{s[0-9]+}}, #31 +entry: + %vsqshlu = insertelement <1 x i32> undef, i32 %a, i32 0 + %vsqshlu1 = call <1 x i32> @llvm.aarch64.neon.vqshlus.n.v1i32(<1 x i32> %vsqshlu, i32 31) + %0 = extractelement <1 x i32> %vsqshlu1, i32 0 + ret i32 %0 +} + +declare <1 x i32> @llvm.aarch64.neon.vqshlus.n.v1i32(<1 x i32>, i32) + +define i64 @test_vqshlud_n_s64(i64 %a) { +; CHECK: test_vqshlud_n_s64 +; CHECK: sqshlu {{d[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vsqshlu = insertelement <1 x i64> undef, i64 %a, i32 0 + %vsqshlu1 = call <1 x i64> @llvm.aarch64.neon.vqshlus.n.v1i64(<1 x i64> %vsqshlu, i32 63) + %0 = extractelement <1 x i64> %vsqshlu1, i32 0 + ret i64 %0 +} + +declare <1 x i64> @llvm.aarch64.neon.vqshlus.n.v1i64(<1 x i64>, i32) + +define i64 @test_vsrid_n_s64(i64 %a) { +; CHECK: test_vsrid_n_s64 +; CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vsri = insertelement <1 x i64> undef, i64 %a, i32 0 + %vsri1 = call <1 x i64> @llvm.aarch64.neon.vsrid.n(<1 x i64> %vsri, i32 63) + %0 = extractelement <1 x i64> %vsri1, i32 0 + ret i64 %0 +} + +declare <1 x i64> @llvm.aarch64.neon.vsrid.n(<1 x i64>, i32) + +define i64 @test_vsrid_n_u64(i64 %a) { +; CHECK: test_vsrid_n_u64 +; CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vsri = insertelement <1 x i64> undef, i64 %a, i32 0 + %vsri1 = call <1 x i64> @llvm.aarch64.neon.vsrid.n(<1 x i64> %vsri, i32 63) + %0 = extractelement <1 x i64> %vsri1, i32 0 + ret i64 %0 +} + +define i64 @test_vslid_n_s64(i64 %a) { +; CHECK: test_vslid_n_s64 +; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vsli = insertelement <1 x i64> undef, i64 %a, i32 0 + %vsli1 = call <1 x i64> @llvm.aarch64.neon.vslid.n(<1 x i64> %vsli, i32 63) + %0 = extractelement <1 x i64> %vsli1, i32 0 + ret i64 %0 +} + +declare <1 x i64> @llvm.aarch64.neon.vslid.n(<1 x i64>, i32) + +define i64 @test_vslid_n_u64(i64 %a) { +; CHECK: test_vslid_n_u64 +; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vsli = insertelement <1 x i64> undef, i64 %a, i32 0 + %vsli1 = call <1 x i64> @llvm.aarch64.neon.vslid.n(<1 x i64> %vsli, i32 63) + %0 = extractelement <1 x i64> %vsli1, i32 0 + ret i64 %0 +} + +define i8 @test_vqshrnh_n_s16(i16 %a) { +; CHECK: test_vqshrnh_n_s16 +; CHECK: sqshrn {{b[0-9]+}}, {{h[0-9]+}}, #15 +entry: + %vsqshrn = insertelement <1 x i16> undef, i16 %a, i32 0 + %vsqshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16> %vsqshrn, i32 15) + %0 = extractelement <1 x i8> %vsqshrn1, i32 0 + ret i8 %0 +} + +declare <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16>, i32) + +define i16 @test_vqshrns_n_s32(i32 %a) { +; CHECK: test_vqshrns_n_s32 +; CHECK: sqshrn {{h[0-9]+}}, {{s[0-9]+}}, #31 +entry: + %vsqshrn = insertelement <1 x i32> undef, i32 %a, i32 0 + %vsqshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32> %vsqshrn, i32 31) + %0 = extractelement <1 x i16> %vsqshrn1, i32 0 + ret i16 %0 +} + +declare <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32>, i32) + +define i32 @test_vqshrnd_n_s64(i64 %a) { +; CHECK: test_vqshrnd_n_s64 +; CHECK: sqshrn {{s[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vsqshrn = insertelement <1 x i64> undef, i64 %a, i32 0 + %vsqshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64> %vsqshrn, i32 63) + %0 = extractelement <1 x i32> %vsqshrn1, i32 0 + ret i32 %0 +} + +declare <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64>, i32) + +define i8 @test_vqshrnh_n_u16(i16 %a) { +; CHECK: test_vqshrnh_n_u16 +; CHECK: uqshrn {{b[0-9]+}}, {{h[0-9]+}}, #15 +entry: + %vuqshrn = insertelement <1 x i16> undef, i16 %a, i32 0 + %vuqshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16> %vuqshrn, i32 15) + %0 = extractelement <1 x i8> %vuqshrn1, i32 0 + ret i8 %0 +} + +declare <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16>, i32) + +define i16 @test_vqshrns_n_u32(i32 %a) { +; CHECK: test_vqshrns_n_u32 +; CHECK: uqshrn {{h[0-9]+}}, {{s[0-9]+}}, #31 +entry: + %vuqshrn = insertelement <1 x i32> undef, i32 %a, i32 0 + %vuqshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32> %vuqshrn, i32 31) + %0 = extractelement <1 x i16> %vuqshrn1, i32 0 + ret i16 %0 +} + +declare <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32>, i32) + +define i32 @test_vqshrnd_n_u64(i64 %a) { +; CHECK: test_vqshrnd_n_u64 +; CHECK: uqshrn {{s[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vuqshrn = insertelement <1 x i64> undef, i64 %a, i32 0 + %vuqshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64> %vuqshrn, i32 63) + %0 = extractelement <1 x i32> %vuqshrn1, i32 0 + ret i32 %0 +} + +declare <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64>, i32) + +define i8 @test_vqrshrnh_n_s16(i16 %a) { +; CHECK: test_vqrshrnh_n_s16 +; CHECK: sqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #15 +entry: + %vsqrshrn = insertelement <1 x i16> undef, i16 %a, i32 0 + %vsqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16> %vsqrshrn, i32 15) + %0 = extractelement <1 x i8> %vsqrshrn1, i32 0 + ret i8 %0 +} + +declare <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16>, i32) + +define i16 @test_vqrshrns_n_s32(i32 %a) { +; CHECK: test_vqrshrns_n_s32 +; CHECK: sqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #31 +entry: + %vsqrshrn = insertelement <1 x i32> undef, i32 %a, i32 0 + %vsqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32> %vsqrshrn, i32 31) + %0 = extractelement <1 x i16> %vsqrshrn1, i32 0 + ret i16 %0 +} + +declare <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32>, i32) + +define i32 @test_vqrshrnd_n_s64(i64 %a) { +; CHECK: test_vqrshrnd_n_s64 +; CHECK: sqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vsqrshrn = insertelement <1 x i64> undef, i64 %a, i32 0 + %vsqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64> %vsqrshrn, i32 63) + %0 = extractelement <1 x i32> %vsqrshrn1, i32 0 + ret i32 %0 +} + +declare <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64>, i32) + +define i8 @test_vqrshrnh_n_u16(i16 %a) { +; CHECK: test_vqrshrnh_n_u16 +; CHECK: uqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #15 +entry: + %vuqrshrn = insertelement <1 x i16> undef, i16 %a, i32 0 + %vuqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16> %vuqrshrn, i32 15) + %0 = extractelement <1 x i8> %vuqrshrn1, i32 0 + ret i8 %0 +} + +declare <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16>, i32) + +define i16 @test_vqrshrns_n_u32(i32 %a) { +; CHECK: test_vqrshrns_n_u32 +; CHECK: uqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #31 +entry: + %vuqrshrn = insertelement <1 x i32> undef, i32 %a, i32 0 + %vuqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %vuqrshrn, i32 31) + %0 = extractelement <1 x i16> %vuqrshrn1, i32 0 + ret i16 %0 +} + +declare <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32>, i32) + +define i32 @test_vqrshrnd_n_u64(i64 %a) { +; CHECK: test_vqrshrnd_n_u64 +; CHECK: uqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vuqrshrn = insertelement <1 x i64> undef, i64 %a, i32 0 + %vuqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64> %vuqrshrn, i32 63) + %0 = extractelement <1 x i32> %vuqrshrn1, i32 0 + ret i32 %0 +} + +declare <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64>, i32) + +define i8 @test_vqshrunh_n_s16(i16 %a) { +; CHECK: test_vqshrunh_n_s16 +; CHECK: sqshrun {{b[0-9]+}}, {{h[0-9]+}}, #15 +entry: + %vsqshrun = insertelement <1 x i16> undef, i16 %a, i32 0 + %vsqshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16> %vsqshrun, i32 15) + %0 = extractelement <1 x i8> %vsqshrun1, i32 0 + ret i8 %0 +} + +declare <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16>, i32) + +define i16 @test_vqshruns_n_s32(i32 %a) { +; CHECK: test_vqshruns_n_s32 +; CHECK: sqshrun {{h[0-9]+}}, {{s[0-9]+}}, #31 +entry: + %vsqshrun = insertelement <1 x i32> undef, i32 %a, i32 0 + %vsqshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32> %vsqshrun, i32 31) + %0 = extractelement <1 x i16> %vsqshrun1, i32 0 + ret i16 %0 +} + +declare <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32>, i32) + +define i32 @test_vqshrund_n_s64(i64 %a) { +; CHECK: test_vqshrund_n_s64 +; CHECK: sqshrun {{s[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vsqshrun = insertelement <1 x i64> undef, i64 %a, i32 0 + %vsqshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64> %vsqshrun, i32 63) + %0 = extractelement <1 x i32> %vsqshrun1, i32 0 + ret i32 %0 +} + +declare <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64>, i32) + +define i8 @test_vqrshrunh_n_s16(i16 %a) { +; CHECK: test_vqrshrunh_n_s16 +; CHECK: sqrshrun {{b[0-9]+}}, {{h[0-9]+}}, #15 +entry: + %vsqrshrun = insertelement <1 x i16> undef, i16 %a, i32 0 + %vsqrshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16> %vsqrshrun, i32 15) + %0 = extractelement <1 x i8> %vsqrshrun1, i32 0 + ret i8 %0 +} + +declare <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16>, i32) + +define i16 @test_vqrshruns_n_s32(i32 %a) { +; CHECK: test_vqrshruns_n_s32 +; CHECK: sqrshrun {{h[0-9]+}}, {{s[0-9]+}}, #31 +entry: + %vsqrshrun = insertelement <1 x i32> undef, i32 %a, i32 0 + %vsqrshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32> %vsqrshrun, i32 31) + %0 = extractelement <1 x i16> %vsqrshrun1, i32 0 + ret i16 %0 +} + +declare <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32>, i32) + +define i32 @test_vqrshrund_n_s64(i64 %a) { +; CHECK: test_vqrshrund_n_s64 +; CHECK: sqrshrun {{s[0-9]+}}, {{d[0-9]+}}, #63 +entry: + %vsqrshrun = insertelement <1 x i64> undef, i64 %a, i32 0 + %vsqrshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrun.v1i32(<1 x i64> %vsqrshrun, i32 63) + %0 = extractelement <1 x i32> %vsqrshrun1, i32 0 + ret i32 %0 +} + +declare <1 x i32> @llvm.aarch64.neon.vsqrshrun.v1i32(<1 x i64>, i32) diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s index 28f8e7a..c378ce4 100644 --- a/test/MC/AArch64/neon-diagnostics.s +++ b/test/MC/AArch64/neon-diagnostics.s @@ -970,23 +970,23 @@ //---------------------------------------------------------------------- // Mismatched vector types - sqshl b0, b1, s0 - uqshl h0, h1, b0 - sqshl s0, s1, h0 - uqshl d0, d1, b0 + sqshl b0, s1, b0 + uqshl h0, b1, h0 + sqshl s0, h1, s0 + uqshl d0, b1, d0 // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: sqshl b0, b1, s0 -// CHECK-ERROR: ^ +// CHECK-ERROR: sqshl b0, s1, b0 +// CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: uqshl h0, h1, b0 -// CHECK-ERROR: ^ +// CHECK-ERROR: uqshl h0, b1, h0 +// CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: sqshl s0, s1, h0 -// CHECK-ERROR: ^ +// CHECK-ERROR: sqshl s0, h1, s0 +// CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: uqshl d0, d1, b0 -// CHECK-ERROR: ^ +// CHECK-ERROR: uqshl d0, b1, d0 +// CHECK-ERROR: ^ //---------------------------------------------------------------------- // Scalar Integer Rouding Shift Left (Signed, Unsigned) diff --git a/test/MC/AArch64/neon-scalar-shift-imm.s b/test/MC/AArch64/neon-scalar-shift-imm.s new file mode 100644 index 0000000..96cb815 --- /dev/null +++ b/test/MC/AArch64/neon-scalar-shift-imm.s @@ -0,0 +1,186 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s + +// Check that the assembler can handle the documented syntax for AArch64 + +//---------------------------------------------------------------------- +// Scalar Signed Shift Right (Immediate) +//---------------------------------------------------------------------- + sshr d15, d16, #12 + +// CHECK: sshr d15, d16, #12 // encoding: [0x0f,0x06,0x74,0x5f] + +//---------------------------------------------------------------------- +// Scalar Unsigned Shift Right (Immediate) +//---------------------------------------------------------------------- + ushr d10, d17, #18 + +// CHECK: ushr d10, d17, #18 // encoding: [0x2a,0x06,0x6e,0x7f] + +//---------------------------------------------------------------------- +// Scalar Signed Rounding Shift Right (Immediate) +//---------------------------------------------------------------------- + srshr d19, d18, #7 + +// CHECK: srshr d19, d18, #7 // encoding: [0x53,0x26,0x79,0x5f] + +//---------------------------------------------------------------------- +// Scalar Unigned Rounding Shift Right (Immediate) +//---------------------------------------------------------------------- + urshr d20, d23, #31 + +// CHECK: urshr d20, d23, #31 // encoding: [0xf4,0x26,0x61,0x7f] + +//---------------------------------------------------------------------- +// Scalar Signed Shift Right and Accumulate (Immediate) +//---------------------------------------------------------------------- + ssra d18, d12, #21 + +// CHECK: ssra d18, d12, #21 // encoding: [0x92,0x15,0x6b,0x5f] + +//---------------------------------------------------------------------- +// Scalar Unsigned Shift Right and Accumulate (Immediate) +//---------------------------------------------------------------------- + usra d20, d13, #61 + +// CHECK: usra d20, d13, #61 // encoding: [0xb4,0x15,0x43,0x7f] + +//---------------------------------------------------------------------- +// Scalar Signed Rounding Shift Right and Accumulate (Immediate) +//---------------------------------------------------------------------- + srsra d15, d11, #19 + +// CHECK: srsra d15, d11, #19 // encoding: [0x6f,0x35,0x6d,0x5f] + +//---------------------------------------------------------------------- +// Scalar Unsigned Rounding Shift Right and Accumulate (Immediate) +//---------------------------------------------------------------------- + ursra d18, d10, #13 + +// CHECK: ursra d18, d10, #13 // encoding: [0x52,0x35,0x73,0x7f] + +//---------------------------------------------------------------------- +// Scalar Shift Left (Immediate) +//---------------------------------------------------------------------- + shl d7, d10, #12 + +// CHECK: shl d7, d10, #12 // encoding: [0x47,0x55,0x4c,0x5f] + +//---------------------------------------------------------------------- +// Signed Saturating Shift Left (Immediate) +//---------------------------------------------------------------------- + sqshl b11, b19, #7 + sqshl h13, h18, #11 + sqshl s14, s17, #22 + sqshl d15, d16, #51 + +// CHECK: sqshl b11, b19, #7 // encoding: [0x6b,0x76,0x0f,0x5f] +// CHECK: sqshl h13, h18, #11 // encoding: [0x4d,0x76,0x1b,0x5f] +// CHECK: sqshl s14, s17, #22 // encoding: [0x2e,0x76,0x36,0x5f] +// CHECK: sqshl d15, d16, #51 // encoding: [0x0f,0x76,0x73,0x5f] + +//---------------------------------------------------------------------- +// Unsigned Saturating Shift Left (Immediate) +//---------------------------------------------------------------------- + uqshl b18, b15, #6 + uqshl h11, h18, #7 + uqshl s14, s19, #18 + uqshl d15, d12, #19 + +// CHECK: uqshl b18, b15, #6 // encoding: [0xf2,0x75,0x0e,0x7f] +// CHECK: uqshl h11, h18, #7 // encoding: [0x4b,0x76,0x17,0x7f] +// CHECK: uqshl s14, s19, #18 // encoding: [0x6e,0x76,0x32,0x7f] +// CHECK: uqshl d15, d12, #19 // encoding: [0x8f,0x75,0x53,0x7f] + +//---------------------------------------------------------------------- +// Signed Saturating Shift Left Unsigned (Immediate) +//---------------------------------------------------------------------- + sqshlu b15, b18, #6 + sqshlu h19, h17, #6 + sqshlu s16, s14, #25 + sqshlu d11, d13, #32 + +// CHECK: sqshlu b15, b18, #6 // encoding: [0x4f,0x66,0x0e,0x7f] +// CHECK: sqshlu h19, h17, #6 // encoding: [0x33,0x66,0x16,0x7f] +// CHECK: sqshlu s16, s14, #25 // encoding: [0xd0,0x65,0x39,0x7f] +// CHECK: sqshlu d11, d13, #32 // encoding: [0xab,0x65,0x60,0x7f] + +//---------------------------------------------------------------------- +// Shift Right And Insert (Immediate) +//---------------------------------------------------------------------- + sri d10, d12, #14 + +// CHECK: sri d10, d12, #14 // encoding: [0x8a,0x45,0x72,0x7f] + +//---------------------------------------------------------------------- +// Shift Left And Insert (Immediate) +//---------------------------------------------------------------------- + sli d10, d14, #12 + +// CHECK: sli d10, d14, #12 // encoding: [0xca,0x55,0x4c,0x7f] + +//---------------------------------------------------------------------- +// Signed Saturating Shift Right Narrow (Immediate) +//---------------------------------------------------------------------- + sqshrn b10, h15, #5 + sqshrn h17, s10, #4 + sqshrn s18, d10, #31 + +// CHECK: sqshrn b10, h15, #5 // encoding: [0xea,0x95,0x0b,0x5f] +// CHECK: sqshrn h17, s10, #4 // encoding: [0x51,0x95,0x1c,0x5f] +// CHECK: sqshrn s18, d10, #31 // encoding: [0x52,0x95,0x21,0x5f] + +//---------------------------------------------------------------------- +// Unsigned Saturating Shift Right Narrow (Immediate) +//---------------------------------------------------------------------- + uqshrn b12, h10, #7 + uqshrn h10, s14, #5 + uqshrn s10, d12, #13 + +// CHECK: uqshrn b12, h10, #7 // encoding: [0x4c,0x95,0x09,0x7f] +// CHECK: uqshrn h10, s14, #5 // encoding: [0xca,0x95,0x1b,0x7f] +// CHECK: uqshrn s10, d12, #13 // encoding: [0x8a,0x95,0x33,0x7f] + +//---------------------------------------------------------------------- +// Signed Saturating Rounded Shift Right Narrow (Immediate) +//---------------------------------------------------------------------- + sqrshrn b10, h13, #2 + sqrshrn h15, s10, #6 + sqrshrn s15, d12, #9 + +// CHECK: sqrshrn b10, h13, #2 // encoding: [0xaa,0x9d,0x0e,0x5f] +// CHECK: sqrshrn h15, s10, #6 // encoding: [0x4f,0x9d,0x1a,0x5f] +// CHECK: sqrshrn s15, d12, #9 // encoding: [0x8f,0x9d,0x37,0x5f] + +//---------------------------------------------------------------------- +// Unsigned Saturating Rounded Shift Right Narrow (Immediate) +//---------------------------------------------------------------------- + uqrshrn b10, h12, #5 + uqrshrn h12, s10, #14 + uqrshrn s10, d10, #25 + +// CHECK: uqrshrn b10, h12, #5 // encoding: [0x8a,0x9d,0x0b,0x7f] +// CHECK: uqrshrn h12, s10, #14 // encoding: [0x4c,0x9d,0x12,0x7f] +// CHECK: uqrshrn s10, d10, #25 // encoding: [0x4a,0x9d,0x27,0x7f] + +//---------------------------------------------------------------------- +// Signed Saturating Shift Right Unsigned Narrow (Immediate) +//---------------------------------------------------------------------- + sqshrun b15, h10, #7 + sqshrun h20, s14, #3 + sqshrun s10, d15, #15 + +// CHECK: sqshrun b15, h10, #7 // encoding: [0x4f,0x85,0x09,0x7f] +// CHECK: sqshrun h20, s14, #3 // encoding: [0xd4,0x85,0x1d,0x7f] +// CHECK: sqshrun s10, d15, #15 // encoding: [0xea,0x85,0x31,0x7f] + +//---------------------------------------------------------------------- +// Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate) +//---------------------------------------------------------------------- + + sqrshrun b17, h10, #6 + sqrshrun h10, s13, #15 + sqrshrun s22, d16, #31 + +// CHECK: sqrshrun b17, h10, #6 // encoding: [0x51,0x8d,0x0a,0x7f] +// CHECK: sqrshrun h10, s13, #15 // encoding: [0xaa,0x8d,0x11,0x7f] +// CHECK: sqrshrun s22, d16, #31 // encoding: [0x16,0x8e,0x21,0x7f] diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt index 2627b14..c63b65a 100644 --- a/test/MC/Disassembler/AArch64/neon-instructions.txt +++ b/test/MC/Disassembler/AArch64/neon-instructions.txt @@ -1793,3 +1793,165 @@ 0x52,0x4a,0x21,0x7e 0x34,0x4a,0x61,0x7e 0xd3,0x49,0xa1,0x7e + +#---------------------------------------------------------------------- +# Scalar Signed Shift Right (Immediate) +#---------------------------------------------------------------------- +# CHECK: sshr d15, d16, #12 +0x0f,0x06,0x74,0x5f + +#---------------------------------------------------------------------- +# Scalar Unsigned Shift Right (Immediate) +#---------------------------------------------------------------------- +# CHECK: ushr d10, d17, #18 +0x2a,0x06,0x6e,0x7f + +#---------------------------------------------------------------------- +# Scalar Signed Rounding Shift Right (Immediate) +#---------------------------------------------------------------------- +# CHECK: srshr d19, d18, #7 +0x53,0x26,0x79,0x5f + +#---------------------------------------------------------------------- +# Scalar Unigned Rounding Shift Right (Immediate) +#---------------------------------------------------------------------- +# CHECK: urshr d20, d23, #31 +0xf4,0x26,0x61,0x7f + +#---------------------------------------------------------------------- +# Scalar Signed Shift Right and Accumulate (Immediate) +#---------------------------------------------------------------------- +# CHECK: ssra d18, d12, #21 +0x92,0x15,0x6b,0x5f + +#---------------------------------------------------------------------- +# Scalar Unsigned Shift Right and Accumulate (Immediate) +#---------------------------------------------------------------------- +# CHECK: usra d20, d13, #61 +0xb4,0x15,0x43,0x7f + +#---------------------------------------------------------------------- +# Scalar Signed Rounding Shift Right and Accumulate (Immediate) +#---------------------------------------------------------------------- +# CHECK: srsra d15, d11, #19 +0x6f,0x35,0x6d,0x5f + +#---------------------------------------------------------------------- +# Scalar Unsigned Rounding Shift Right and Accumulate (Immediate) +#---------------------------------------------------------------------- +# CHECK: ursra d18, d10, #13 +0x52,0x35,0x73,0x7f + +#---------------------------------------------------------------------- +# Scalar Shift Left (Immediate) +#---------------------------------------------------------------------- +# CHECK: shl d7, d10, #12 +0x47,0x55,0x4c,0x5f + +#---------------------------------------------------------------------- +# Signed Saturating Shift Left (Immediate) +#---------------------------------------------------------------------- +# CHECK: sqshl b11, b19, #7 +# CHECK: sqshl h13, h18, #11 +# CHECK: sqshl s14, s17, #22 +# CHECK: sqshl d15, d16, #51 +0x6b,0x76,0x0f,0x5f +0x4d,0x76,0x1b,0x5f +0x2e,0x76,0x36,0x5f +0x0f,0x76,0x73,0x5f + +#---------------------------------------------------------------------- +# Unsigned Saturating Shift Left (Immediate) +#---------------------------------------------------------------------- +# CHECK: uqshl b18, b15, #6 +# CHECK: uqshl h11, h18, #7 +# CHECK: uqshl s14, s19, #18 +# CHECK: uqshl d15, d12, #19 +0xf2,0x75,0x0e,0x7f +0x4b,0x76,0x17,0x7f +0x6e,0x76,0x32,0x7f +0x8f,0x75,0x53,0x7f + +#---------------------------------------------------------------------- +# Signed Saturating Shift Left Unsigned (Immediate) +#---------------------------------------------------------------------- +# CHECK: sqshlu b15, b18, #6 +# CHECK: sqshlu h19, h17, #6 +# CHECK: sqshlu s16, s14, #25 +# CHECK: sqshlu d11, d13, #32 +0x4f,0x66,0x0e,0x7f +0x33,0x66,0x16,0x7f +0xd0,0x65,0x39,0x7f +0xab,0x65,0x60,0x7f + +#---------------------------------------------------------------------- +# Shift Right And Insert (Immediate) +#---------------------------------------------------------------------- +# CHECK: sri d10, d12, #14 +0x8a,0x45,0x72,0x7f + +#---------------------------------------------------------------------- +# Shift Left And Insert (Immediate) +#---------------------------------------------------------------------- +# CHECK: sli d10, d14, #12 +0xca,0x55,0x4c,0x7f + +#---------------------------------------------------------------------- +# Signed Saturating Shift Right Narrow (Immediate) +#---------------------------------------------------------------------- +# CHECK: sqshrn b10, h15, #5 +# CHECK: sqshrn h17, s10, #4 +# CHECK: sqshrn s18, d10, #31 +0xea,0x95,0x0b,0x5f +0x51,0x95,0x1c,0x5f +0x52,0x95,0x21,0x5f + +#---------------------------------------------------------------------- +# Unsigned Saturating Shift Right Narrow (Immediate) +#---------------------------------------------------------------------- +# CHECK: uqshrn b12, h10, #7 +# CHECK: uqshrn h10, s14, #5 +# CHECK: uqshrn s10, d12, #13 +0x4c,0x95,0x09,0x7f +0xca,0x95,0x1b,0x7f +0x8a,0x95,0x33,0x7f + +#---------------------------------------------------------------------- +# Signed Saturating Rounded Shift Right Narrow (Immediate) +#---------------------------------------------------------------------- +# CHECK: sqrshrn b10, h13, #2 +# CHECK: sqrshrn h15, s10, #6 +# CHECK: sqrshrn s15, d12, #9 +0xaa,0x9d,0x0e,0x5f +0x4f,0x9d,0x1a,0x5f +0x8f,0x9d,0x37,0x5f + +#---------------------------------------------------------------------- +# Unsigned Saturating Rounded Shift Right Narrow (Immediate) +#---------------------------------------------------------------------- +# CHECK: uqrshrn b10, h12, #5 +# CHECK: uqrshrn h12, s10, #14 +# CHECK: uqrshrn s10, d10, #25 +0x8a,0x9d,0x0b,0x7f +0x4c,0x9d,0x12,0x7f +0x4a,0x9d,0x27,0x7f + +#---------------------------------------------------------------------- +# Signed Saturating Shift Right Unsigned Narrow (Immediate) +#---------------------------------------------------------------------- +# CHECK: sqshrun b15, h10, #7 +# CHECK: sqshrun h20, s14, #3 +# CHECK: sqshrun s10, d15, #15 +0x4f,0x85,0x09,0x7f +0xd4,0x85,0x1d,0x7f +0xea,0x85,0x31,0x7f + +#---------------------------------------------------------------------- +# Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate) +#---------------------------------------------------------------------- +# CHECK: sqrshrun b17, h10, #6 +# CHECK: sqrshrun h10, s13, #15 +# CHECK: sqrshrun s22, d16, #31 +0x51,0x8d,0x0a,0x7f +0xaa,0x8d,0x11,0x7f +0x16,0x8e,0x21,0x7f -- cgit v1.1