aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorHao Liu <Hao.Liu@arm.com>2013-09-04 09:28:24 +0000
committerHao Liu <Hao.Liu@arm.com>2013-09-04 09:28:24 +0000
commit19fdc268c316b3b0bdcb2b558449819f4f402d6a (patch)
tree7e600b5667c314ab009690568492a55b06b90c1b /lib
parent886631cc2790cc0143966069e613d933914724b4 (diff)
downloadexternal_llvm-19fdc268c316b3b0bdcb2b558449819f4f402d6a.zip
external_llvm-19fdc268c316b3b0bdcb2b558449819f4f402d6a.tar.gz
external_llvm-19fdc268c316b3b0bdcb2b558449819f4f402d6a.tar.bz2
Inplement aarch64 neon instructions in AdvSIMD(shift). About 24 shift instructions:
sshr,ushr,ssra,usra,srshr,urshr,srsra,ursra,sri,shl,sli,sqshlu,sqshl,uqshl,shrn,sqrshrun,sqshrn,uqshr,sqrshrn,uqrshrn,sshll,ushll and 4 convert instructions: scvtf,ucvtf,fcvtzs,fcvtzu git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189925 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp90
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h8
-rw-r--r--lib/Target/AArch64/AArch64InstrNEON.td649
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp31
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp38
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp27
6 files changed, 805 insertions, 38 deletions
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index d12302e..b68c43a 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -77,8 +77,11 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+
// AArch64 does not have i1 loads, or much of anything for i1 really.
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
@@ -283,6 +286,8 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);
+
setOperationAction(ISD::SETCC, MVT::v8i8, Custom);
setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
setOperationAction(ISD::SETCC, MVT::v4i16, Custom);
@@ -834,6 +839,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
return "AArch64ISD::NEON_CMPZ";
case AArch64ISD::NEON_TST:
return "AArch64ISD::NEON_TST";
+ case AArch64ISD::NEON_DUPIMM:
+ return "AArch64ISD::NEON_DUPIMM";
+ case AArch64ISD::NEON_QSHLs:
+ return "AArch64ISD::NEON_QSHLs";
+ case AArch64ISD::NEON_QSHLu:
+ return "AArch64ISD::NEON_QSHLu";
default:
return NULL;
}
@@ -3257,7 +3268,7 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
/// Check if this is a valid build_vector for the immediate operand of
/// a vector shift left operation. That value must be in the range:
-/// 0 <= Value < ElementBits for a left shift
+/// 0 <= Value < ElementBits
static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
@@ -3266,10 +3277,25 @@ static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) {
return (Cnt >= 0 && Cnt < ElementBits);
}
-static SDValue PerformSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+/// Check if this is a valid build_vector for the immediate operand of a
+/// vector shift right operation. The value must be in the range:
+/// 1 <= Value <= ElementBits
+static bool isVShiftRImm(SDValue Op, EVT VT, int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ if (!getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ return (Cnt >= 1 && Cnt <= ElementBits);
+}
+
+/// Checks for immediate versions of vector shifts and lowers them.
+static SDValue PerformShiftCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *ST) {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
+ if (N->getOpcode() == ISD::SRA && (VT == MVT::i32 || VT == MVT::i64))
+ return PerformSRACombine(N, DCI);
// Nothing to be done for scalar shifts.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -3278,10 +3304,54 @@ static SDValue PerformSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI
assert(ST->hasNEON() && "unexpected vector shift");
int64_t Cnt;
- if (isVShiftLImm(N->getOperand(1), VT, Cnt)) {
- SDValue RHS = DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(0)),
- VT, DAG.getConstant(Cnt, MVT::i32));
- return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS);
+
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("unexpected shift opcode");
+
+ case ISD::SHL:
+ if (isVShiftLImm(N->getOperand(1), VT, Cnt)) {
+ SDValue RHS =
+ DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(1)), VT,
+ DAG.getConstant(Cnt, MVT::i32));
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS);
+ }
+ break;
+
+ case ISD::SRA:
+ case ISD::SRL:
+ if (isVShiftRImm(N->getOperand(1), VT, Cnt)) {
+ SDValue RHS =
+ DAG.getNode(AArch64ISD::NEON_DUPIMM, SDLoc(N->getOperand(1)), VT,
+ DAG.getConstant(Cnt, MVT::i32));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N->getOperand(0), RHS);
+ }
+ break;
+ }
+
+ return SDValue();
+}
+
+/// ARM-specific DAG combining for intrinsics.
+static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+
+ switch (IntNo) {
+ default:
+ // Don't do anything for most intrinsics.
+ break;
+
+ case Intrinsic::arm_neon_vqshifts:
+ case Intrinsic::arm_neon_vqshiftu:
+ EVT VT = N->getOperand(1).getValueType();
+ int64_t Cnt;
+ if (!isVShiftLImm(N->getOperand(2), VT, Cnt))
+ break;
+ unsigned VShiftOpc = (IntNo == Intrinsic::arm_neon_vqshifts)
+ ? AArch64ISD::NEON_QSHLs
+ : AArch64ISD::NEON_QSHLu;
+ return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
}
return SDValue();
@@ -3294,8 +3364,12 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N,
default: break;
case ISD::AND: return PerformANDCombine(N, DCI);
case ISD::OR: return PerformORCombine(N, DCI, getSubtarget());
- case ISD::SRA: return PerformSRACombine(N, DCI);
- case ISD::SHL: return PerformSHLCombine(N, DCI, getSubtarget());
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ return PerformShiftCombine(N, DCI, getSubtarget());
+ case ISD::INTRINSIC_WO_CHAIN:
+ return PerformIntrinsicCombine(N, DCI.DAG);
}
return SDValue();
}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index c9795b2..7c7d038 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -19,7 +19,7 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
-
+#include "llvm/IR/Intrinsics.h"
namespace llvm {
namespace AArch64ISD {
@@ -135,7 +135,11 @@ namespace AArch64ISD {
NEON_TST,
// Operation for the immediate in vector shift
- NEON_DUPIMM
+ NEON_DUPIMM,
+
+ // Vector saturating shift
+ NEON_QSHLs,
+ NEON_QSHLu
};
}
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index fb6d654..9712a5a 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -44,6 +44,12 @@ def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
def Neon_dupImm : SDNode<"AArch64ISD::NEON_DUPIMM", SDTypeProfile<1, 1,
[SDTCisVec<0>, SDTCisVT<1, i32>]>>;
+def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
+def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
+
+
//===----------------------------------------------------------------------===//
// Multiclasses
//===----------------------------------------------------------------------===//
@@ -1413,58 +1419,133 @@ def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
}
// Vector Shift (Immediate)
-
+// Immediate in [0, 63]
def imm0_63 : Operand<i32> {
let ParserMatchClass = uimm6_asmoperand;
}
-class N2VShiftLeft<bit q, bit u, bits<5> opcode, string asmop, string T,
- RegisterClass VPRC, ValueType Ty, Operand ImmTy>
+// Shift Right Immediate - A shift right immediate is encoded differently from
+// other shift immediates. The immh:immb field is encoded like so:
+//
+// Offset Encoding
+// 8 immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
+// 16 immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
+// 32 immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
+// 64 immh:immb<6> = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
+class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
+ let Name = "ShrImm" # OFFSET;
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "ShrImm" # OFFSET;
+}
+
+class shr_imm<string OFFSET> : Operand<i32> {
+ let EncoderMethod = "getShiftRightImm" # OFFSET;
+ let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
+ let ParserMatchClass =
+ !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
+}
+
+def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
+def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
+def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
+def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
+
+def shr_imm8 : shr_imm<"8">;
+def shr_imm16 : shr_imm<"16">;
+def shr_imm32 : shr_imm<"32">;
+def shr_imm64 : shr_imm<"64">;
+
+class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
+ RegisterClass VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
: NeonI_2VShiftImm<q, u, opcode,
(outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
[(set (Ty VPRC:$Rd),
- (Ty (shl (Ty VPRC:$Rn),
+ (Ty (OpNode (Ty VPRC:$Rn),
(Ty (Neon_dupImm (i32 imm:$Imm))))))],
NoItinerary>;
multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
// 64-bit vector types.
- def _8B : N2VShiftLeft<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3> {
+ def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3, shl> {
let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
}
- def _4H : N2VShiftLeft<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4> {
+ def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4, shl> {
let Inst{22-20} = 0b001; // immh:immb = 001xxxx
}
- def _2S : N2VShiftLeft<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5> {
+ def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5, shl> {
let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
}
// 128-bit vector types.
- def _16B : N2VShiftLeft<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3> {
+ def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3, shl> {
let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
}
- def _8H : N2VShiftLeft<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4> {
+ def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4, shl> {
let Inst{22-20} = 0b001; // immh:immb = 001xxxx
}
- def _4S : N2VShiftLeft<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5> {
+ def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5, shl> {
let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
}
- def _2D : N2VShiftLeft<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63> {
+ def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63, shl> {
let Inst{22} = 0b1; // immh:immb = 1xxxxxx
}
}
-def Neon_top16B : PatFrag<(ops node:$in),
+multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
+ def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
+ OpNode> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// Shift left
+defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
+
+// Shift right
+defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
+defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
+
+def Neon_top16B : PatFrag<(ops node:$in),
(extract_subvector (v16i8 node:$in), (iPTR 8))>;
-def Neon_top8H : PatFrag<(ops node:$in),
+def Neon_top8H : PatFrag<(ops node:$in),
(extract_subvector (v8i16 node:$in), (iPTR 4))>;
-def Neon_top4S : PatFrag<(ops node:$in),
+def Neon_top4S : PatFrag<(ops node:$in),
(extract_subvector (v4i32 node:$in), (iPTR 2))>;
class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
@@ -1474,21 +1555,21 @@ class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
(ins VPR64:$Rn, ImmTy:$Imm),
asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
[(set (DestTy VPR128:$Rd),
- (DestTy (shl
+ (DestTy (shl
(DestTy (ExtOp (SrcTy VPR64:$Rn))),
(DestTy (Neon_dupImm (i32 imm:$Imm))))))],
NoItinerary>;
class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
- string SrcT, ValueType DestTy, ValueType SrcTy,
+ string SrcT, ValueType DestTy, ValueType SrcTy,
int StartIndex, Operand ImmTy,
SDPatternOperator ExtOp, PatFrag getTop>
: NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
(ins VPR128:$Rn, ImmTy:$Imm),
asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
- [(set (DestTy VPR128:$Rd),
+ [(set (DestTy VPR128:$Rd),
(DestTy (shl
- (DestTy (ExtOp
+ (DestTy (ExtOp
(SrcTy (getTop VPR128:$Rn)))),
(DestTy (Neon_dupImm (i32 imm:$Imm))))))],
NoItinerary>;
@@ -1497,33 +1578,33 @@ multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
SDNode ExtOp> {
// 64-bit vector types.
def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
- uimm3, ExtOp>{
+ uimm3, ExtOp> {
let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
}
def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
- uimm4, ExtOp>{
+ uimm4, ExtOp> {
let Inst{22-20} = 0b001; // immh:immb = 001xxxx
}
def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
- uimm5, ExtOp>{
+ uimm5, ExtOp> {
let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
}
// 128-bit vector types
def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b",
- v8i16, v8i8, 8, uimm3, ExtOp, Neon_top16B>{
+ v8i16, v8i8, 8, uimm3, ExtOp, Neon_top16B> {
let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
}
def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h",
- v4i32, v4i16, 4, uimm4, ExtOp, Neon_top8H>{
+ v4i32, v4i16, 4, uimm4, ExtOp, Neon_top8H> {
let Inst{22-20} = 0b001; // immh:immb = 001xxxx
}
def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s",
- v2i64, v2i32, 2, uimm5, ExtOp, Neon_top4S>{
+ v2i64, v2i32, 2, uimm5, ExtOp, Neon_top4S> {
let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
}
@@ -1547,13 +1628,521 @@ multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
(!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
}
-// Shift left immediate
-defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
-
-// Shift left long immediate
+// Shift left long
defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
+// Rounding/Saturating shift
+class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
+ RegisterClass VPRC, ValueType Ty, Operand ImmTy,
+ SDPatternOperator OpNode>
+ : NeonI_2VShiftImm<q, u, opcode,
+ (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+ [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
+ (i32 imm:$Imm))))],
+ NoItinerary>;
+
+// shift right (vector by immediate)
+multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
+ SDPatternOperator OpNode> {
+ def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
+ OpNode> {
+ let Inst{22} = 0b1;
+ }
+}
+
+multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
+ SDPatternOperator OpNode> {
+ // 64-bit vector types.
+ def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ // 128-bit vector types.
+ def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
+ OpNode> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// Rounding shift right
+defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
+ int_aarch64_neon_vsrshr>;
+defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
+ int_aarch64_neon_vurshr>;
+
+// Saturating shift left unsigned
+defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
+
+// Saturating shift left
+defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
+defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
+
+class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
+ RegisterClass VPRC, ValueType Ty, Operand ImmTy,
+ SDNode OpNode>
+ : NeonI_2VShiftImm<q, u, opcode,
+ (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+ [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
+ (Ty (OpNode (Ty VPRC:$Rn),
+ (Ty (Neon_dupImm (i32 imm:$Imm))))))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+// Shift Right accumulate
+multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
+ def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
+ OpNode> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// Shift right and accumulate
+defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
+defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
+
+// Rounding shift accumulate
+class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
+ RegisterClass VPRC, ValueType Ty, Operand ImmTy,
+ SDPatternOperator OpNode>
+ : NeonI_2VShiftImm<q, u, opcode,
+ (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+ [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
+ (Ty (OpNode (Ty VPRC:$Rn), (i32 imm:$Imm))))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
+ SDPatternOperator OpNode> {
+ def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
+ OpNode> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// Rounding shift right and accumulate
+defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
+defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
+
+// Shift insert by immediate
+class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
+ RegisterClass VPRC, ValueType Ty, Operand ImmTy,
+ SDPatternOperator OpNode>
+ : NeonI_2VShiftImm<q, u, opcode,
+ (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+ [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
+ (i32 imm:$Imm))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+// shift left insert (vector by immediate)
+multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
+ def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
+ int_aarch64_neon_vsli> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
+ int_aarch64_neon_vsli> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
+ int_aarch64_neon_vsli> {
+ let Inst{22-21} = 0b01;
+ }
+
+ // 128-bit vector types
+ def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
+ int_aarch64_neon_vsli> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
+ int_aarch64_neon_vsli> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
+ int_aarch64_neon_vsli> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
+ int_aarch64_neon_vsli> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// shift right insert (vector by immediate)
+multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
+ // 64-bit vector types.
+ def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
+ int_aarch64_neon_vsri> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
+ int_aarch64_neon_vsri> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
+ int_aarch64_neon_vsri> {
+ let Inst{22-21} = 0b01;
+ }
+
+ // 128-bit vector types
+ def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
+ int_aarch64_neon_vsri> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
+ int_aarch64_neon_vsri> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
+ int_aarch64_neon_vsri> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
+ int_aarch64_neon_vsri> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// Shift left and insert
+defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
+
+// Shift right and insert
+defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
+
+class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
+ string SrcT, Operand ImmTy>
+ : NeonI_2VShiftImm<q, u, opcode,
+ (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
+ [], NoItinerary>;
+
+class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
+ string SrcT, Operand ImmTy>
+ : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
+ (ins VPR64:$src, VPR128:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
+ [], NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+// left long shift by immediate
+multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
+ def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
+ let Inst{22-21} = 0b01;
+ }
+
+ // Shift Narrow High
+ def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
+ shr_imm8> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
+ shr_imm16> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
+ shr_imm32> {
+ let Inst{22-21} = 0b01;
+ }
+}
+
+// Shift right narrow
+defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
+
+// Shift right narrow (prefix Q is saturating, prefix R is rounding)
+defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
+defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
+defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
+defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
+defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
+defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
+defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
+
+def Neon_combine : PatFrag<(ops node:$Rm, node:$Rn),
+ (v2i64 (concat_vectors (v1i64 node:$Rm),
+ (v1i64 node:$Rn)))>;
+
+def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
+ (v8i16 (srl (v8i16 node:$lhs),
+ (v8i16 (Neon_dupImm (i32 node:$rhs)))))>;
+def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
+ (v4i32 (srl (v4i32 node:$lhs),
+ (v4i32 (Neon_dupImm (i32 node:$rhs)))))>;
+def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
+ (v2i64 (srl (v2i64 node:$lhs),
+ (v2i64 (Neon_dupImm (i32 node:$rhs)))))>;
+def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
+ (v8i16 (sra (v8i16 node:$lhs),
+ (v8i16 (Neon_dupImm (i32 node:$rhs)))))>;
+def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
+ (v4i32 (sra (v4i32 node:$lhs),
+ (v4i32 (Neon_dupImm (i32 node:$rhs)))))>;
+def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
+ (v2i64 (sra (v2i64 node:$lhs),
+ (v2i64 (Neon_dupImm (i32 node:$rhs)))))>;
+
+// Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
+multiclass Neon_shiftNarrow_patterns<string shr> {
+ def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
+ imm:$Imm))),
+ (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
+ imm:$Imm))),
+ (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
+ imm:$Imm))),
+ (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
+
+ def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
+ (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
+ VPR128:$Rn, imm:$Imm)))))),
+ (SHRNvvi_16B VPR64:$src, VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
+ (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
+ VPR128:$Rn, imm:$Imm)))))),
+ (SHRNvvi_8H VPR64:$src, VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
+ (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
+ VPR128:$Rn, imm:$Imm)))))),
+ (SHRNvvi_4S VPR64:$src, VPR128:$Rn, imm:$Imm)>;
+}
+
+multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
+ def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm)),
+ (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm)),
+ (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm)),
+ (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
+
+ def : Pat<(Neon_combine (v1i64 VPR64:$src),
+ (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))),
+ (!cast<Instruction>(prefix # "_16B")
+ VPR64:$src, VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(Neon_combine (v1i64 VPR64:$src),
+ (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))),
+ (!cast<Instruction>(prefix # "_8H")
+ VPR64:$src, VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(Neon_combine (v1i64 VPR64:$src),
+ (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))),
+ (!cast<Instruction>(prefix # "_4S")
+ VPR64:$src, VPR128:$Rn, imm:$Imm)>;
+}
+
+defm : Neon_shiftNarrow_patterns<"lshr">;
+defm : Neon_shiftNarrow_patterns<"ashr">;
+
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
+
+// Convert fix-point and float-pointing
+class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
+ RegisterClass VPRC, ValueType DestTy, ValueType SrcTy,
+ Operand ImmTy, SDPatternOperator IntOp>
+ : NeonI_2VShiftImm<q, u, opcode,
+ (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+ [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
+ (i32 imm:$Imm))))],
+ NoItinerary>;
+
+multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
+ SDPatternOperator IntOp> {
+ def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
+ shr_imm32, IntOp> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
+ shr_imm32, IntOp> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
+ shr_imm64, IntOp> {
+ let Inst{22} = 0b1;
+ }
+}
+
+multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
+ SDPatternOperator IntOp> {
+ def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
+ shr_imm32, IntOp> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
+ shr_imm32, IntOp> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
+ shr_imm64, IntOp> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// Convert fixed-point to floating-point
+defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
+ int_arm_neon_vcvtfxs2fp>;
+defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
+ int_arm_neon_vcvtfxu2fp>;
+
+// Convert floating-point to fixed-point
+defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
+ int_arm_neon_vcvtfp2fxs>;
+defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
+ int_arm_neon_vcvtfp2fxu>;
+
// Scalar Arithmetic
class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
@@ -1726,6 +2315,10 @@ def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>;
// ...and scalar bitcasts...
+def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
+
def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))),
(f64 (EXTRACT_SUBREG (v8i8 VPR64:$src), sub_64))>;
def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))),
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 43e91ac..68d4be4 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -664,6 +664,25 @@ public:
return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4;
}
+ // if 0 < value <= w, return true
+ bool isShrFixedWidth(int w) const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= w;
+ }
+
+ bool isShrImm8() const { return isShrFixedWidth(8); }
+
+ bool isShrImm16() const { return isShrFixedWidth(16); }
+
+ bool isShrImm32() const { return isShrFixedWidth(32); }
+
+ bool isShrImm64() const { return isShrFixedWidth(64); }
+
bool isNeonMovImmShiftLSL() const {
if (!isShiftOrExtend())
return false;
@@ -2240,6 +2259,18 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_Width64:
return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
"expected integer in range [<lsb>, 63]");
+ case Match_ShrImm8:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [1, 8]");
+ case Match_ShrImm16:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [1, 16]");
+ case Match_ShrImm32:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [1, 32]");
+ case Match_ShrImm64:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [1, 64]");
}
llvm_unreachable("Implement any new match types added!");
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index a88a8e8..5b57b50 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -113,6 +113,18 @@ static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
template<int RegWidth>
static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
unsigned FullImm,
@@ -413,7 +425,33 @@ static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(8 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(16 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(32 - Val));
+ return MCDisassembler::Success;
+}
+static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(64 - Val));
+ return MCDisassembler::Success;
+}
template<int RegWidth>
static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index b9770b3..7bfaecc 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -59,6 +59,14 @@ public:
unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRightImm8(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRightImm16(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRightImm32(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRightImm64(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
// Labels are handled mostly the same way: a symbol is needed, and
// just gets some fixup attached.
@@ -310,6 +318,25 @@ AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6;
}
+unsigned AArch64MCCodeEmitter::getShiftRightImm8(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return 8 - MI.getOperand(Op).getImm();
+}
+
+unsigned AArch64MCCodeEmitter::getShiftRightImm16(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return 16 - MI.getOperand(Op).getImm();
+}
+
+unsigned AArch64MCCodeEmitter::getShiftRightImm32(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return 32 - MI.getOperand(Op).getImm();
+}
+
+unsigned AArch64MCCodeEmitter::getShiftRightImm64(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return 64 - MI.getOperand(Op).getImm();
+}
template<AArch64::Fixups fixupDesired> unsigned
AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI,