diff options
Diffstat (limited to 'lib/Target/ARM64/ARM64ISelDAGToDAG.cpp')
| -rw-r--r-- | lib/Target/ARM64/ARM64ISelDAGToDAG.cpp | 2381 |
1 files changed, 2381 insertions, 0 deletions
diff --git a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp new file mode 100644 index 0000000..2e234c9 --- /dev/null +++ b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp @@ -0,0 +1,2381 @@ +//===-- ARM64ISelDAGToDAG.cpp - A dag to dag inst selector for ARM64 ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the ARM64 target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm64-isel" +#include "ARM64TargetMachine.h" +#include "MCTargetDesc/ARM64AddressingModes.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/Function.h" // To access function attributes. +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +//===--------------------------------------------------------------------===// +/// ARM64DAGToDAGISel - ARM64 specific code to select ARM64 machine +/// instructions for SelectionDAG operations. +/// +namespace { + +class ARM64DAGToDAGISel : public SelectionDAGISel { + ARM64TargetMachine &TM; + + /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can + /// make the right decision when generating code for different targets. + const ARM64Subtarget *Subtarget; + + bool ForCodeSize; + +public: + explicit ARM64DAGToDAGISel(ARM64TargetMachine &tm, CodeGenOpt::Level OptLevel) + : SelectionDAGISel(tm, OptLevel), TM(tm), + Subtarget(&TM.getSubtarget<ARM64Subtarget>()), ForCodeSize(false) {} + + virtual const char *getPassName() const { + return "ARM64 Instruction Selection"; + } + + virtual bool runOnMachineFunction(MachineFunction &MF) { + AttributeSet FnAttrs = MF.getFunction()->getAttributes(); + ForCodeSize = + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize) || + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); + return SelectionDAGISel::runOnMachineFunction(MF); + } + + SDNode *Select(SDNode *Node); + + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps); + + SDNode *SelectMLAV64LaneV128(SDNode *N); + SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N); + bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); + bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); + bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); + bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { + return SelectShiftedRegister(N, false, Reg, Shift); + } + bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { + return SelectShiftedRegister(N, true, Reg, Shift); + } + bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeIndexed(N, 1, Base, OffImm); + } + bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeIndexed(N, 2, Base, OffImm); + } + bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeIndexed(N, 4, Base, OffImm); + } + bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeIndexed(N, 8, Base, OffImm); + } + bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeIndexed(N, 16, Base, OffImm); + } + bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeUnscaled(N, 1, Base, OffImm); + } + bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeUnscaled(N, 2, Base, OffImm); + } + bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeUnscaled(N, 4, Base, OffImm); + } + bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeUnscaled(N, 8, Base, OffImm); + } + bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeUnscaled(N, 16, Base, OffImm); + } + + bool SelectAddrModeRO8(SDValue N, SDValue &Base, SDValue &Offset, + SDValue &Imm) { + return SelectAddrModeRO(N, 1, Base, Offset, Imm); + } + bool SelectAddrModeRO16(SDValue N, SDValue &Base, SDValue &Offset, + SDValue &Imm) { + return SelectAddrModeRO(N, 2, Base, Offset, Imm); + } + bool SelectAddrModeRO32(SDValue N, SDValue &Base, SDValue &Offset, + SDValue &Imm) { + return SelectAddrModeRO(N, 4, Base, Offset, Imm); + } + bool SelectAddrModeRO64(SDValue N, SDValue &Base, SDValue &Offset, + SDValue &Imm) { + return SelectAddrModeRO(N, 8, Base, Offset, Imm); + } + bool SelectAddrModeRO128(SDValue N, SDValue &Base, SDValue &Offset, + SDValue &Imm) { + return SelectAddrModeRO(N, 16, Base, Offset, Imm); + } + bool SelectAddrModeNoIndex(SDValue N, SDValue &Val); + + /// Form sequences of consecutive 64/128-bit registers for use in NEON + /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have + /// between 1 and 4 elements. If it contains a single element that is returned + /// unchanged; otherwise a REG_SEQUENCE value is returned. + SDValue createDTuple(ArrayRef<SDValue> Vecs); + SDValue createQTuple(ArrayRef<SDValue> Vecs); + + /// Generic helper for the createDTuple/createQTuple + /// functions. Those should almost always be called instead. + SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[], + unsigned SubRegs[]); + + SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); + + SDNode *SelectIndexedLoad(SDNode *N, bool &Done); + + SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, + unsigned SubRegIdx); + SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); + + SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); + SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); + + SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node); + SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node); + + SDNode *SelectAtomic(SDNode *Node, unsigned Op8, unsigned Op16, unsigned Op32, + unsigned Op64); + + SDNode *SelectBitfieldExtractOp(SDNode *N); + SDNode *SelectBitfieldInsertOp(SDNode *N); + + SDNode *SelectLIBM(SDNode *N); + +// Include the pieces autogenerated from the target description. +#include "ARM64GenDAGISel.inc" + +private: + bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, + SDValue &Shift); + bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, + SDValue &OffImm); + bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, + SDValue &OffImm); + bool SelectAddrModeRO(SDValue N, unsigned Size, SDValue &Base, + SDValue &Offset, SDValue &Imm); + bool isWorthFolding(SDValue V) const; + bool SelectExtendedSHL(SDValue N, unsigned Size, SDValue &Offset, + SDValue &Imm); +}; +} // end anonymous namespace + +/// isIntImmediate - This method tests to see if the node is a constant +/// operand. If so Imm will receive the 32-bit value. +static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { + if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { + Imm = C->getZExtValue(); + return true; + } + return false; +} + +// isIntImmediate - This method tests to see if a constant operand. +// If so Imm will receive the value. +static bool isIntImmediate(SDValue N, uint64_t &Imm) { + return isIntImmediate(N.getNode(), Imm); +} + +// isOpcWithIntImmediate - This method tests to see if the node is a specific +// opcode and that it has a immediate integer right operand. +// If so Imm will receive the 32 bit value. +static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, + uint64_t &Imm) { + return N->getOpcode() == Opc && + isIntImmediate(N->getOperand(1).getNode(), Imm); +} + +bool ARM64DAGToDAGISel::SelectAddrModeNoIndex(SDValue N, SDValue &Val) { + EVT ValTy = N.getValueType(); + if (ValTy != MVT::i64) + return false; + Val = N; + return true; +} + +bool ARM64DAGToDAGISel::SelectInlineAsmMemoryOperand( + const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { + assert(ConstraintCode == 'm' && "unexpected asm memory constraint"); + // Require the address to be in a register. That is safe for all ARM64 + // variants and it is hard to do anything much smarter without knowing + // how the operand is used. + OutOps.push_back(Op); + return false; +} + +/// SelectArithImmed - Select an immediate value that can be represented as +/// a 12-bit value shifted left by either 0 or 12. If so, return true with +/// Val set to the 12-bit value and Shift set to the shifter operand. +bool ARM64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, + SDValue &Shift) { + // This function is called from the addsub_shifted_imm ComplexPattern, + // which lists [imm] as the list of opcode it's interested in, however + // we still need to check whether the operand is actually an immediate + // here because the ComplexPattern opcode list is only used in + // root-level opcode matching. + if (!isa<ConstantSDNode>(N.getNode())) + return false; + + uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); + unsigned ShiftAmt; + + if (Immed >> 12 == 0) { + ShiftAmt = 0; + } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { + ShiftAmt = 12; + Immed = Immed >> 12; + } else + return false; + + unsigned ShVal = ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt); + Val = CurDAG->getTargetConstant(Immed, MVT::i32); + Shift = CurDAG->getTargetConstant(ShVal, MVT::i32); + return true; +} + +/// SelectNegArithImmed - As above, but negates the value before trying to +/// select it. +bool ARM64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, + SDValue &Shift) { + // This function is called from the addsub_shifted_imm ComplexPattern, + // which lists [imm] as the list of opcode it's interested in, however + // we still need to check whether the operand is actually an immediate + // here because the ComplexPattern opcode list is only used in + // root-level opcode matching. + if (!isa<ConstantSDNode>(N.getNode())) + return false; + + // The immediate operand must be a 24-bit zero-extended immediate. + uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); + + // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" + // have the opposite effect on the C flag, so this pattern mustn't match under + // those circumstances. + if (Immed == 0) + return false; + + if (N.getValueType() == MVT::i32) + Immed = ~((uint32_t)Immed) + 1; + else + Immed = ~Immed + 1ULL; + if (Immed & 0xFFFFFFFFFF000000ULL) + return false; + + Immed &= 0xFFFFFFULL; + return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift); +} + +/// getShiftTypeForNode - Translate a shift node to the corresponding +/// ShiftType value. +static ARM64_AM::ShiftType getShiftTypeForNode(SDValue N) { + switch (N.getOpcode()) { + default: + return ARM64_AM::InvalidShift; + case ISD::SHL: + return ARM64_AM::LSL; + case ISD::SRL: + return ARM64_AM::LSR; + case ISD::SRA: + return ARM64_AM::ASR; + case ISD::ROTR: + return ARM64_AM::ROR; + } +} + +/// \brief Determine wether it is worth to fold V into an extended register. +bool ARM64DAGToDAGISel::isWorthFolding(SDValue V) const { + // it hurts if the a value is used at least twice, unless we are optimizing + // for code size. + if (ForCodeSize || V.hasOneUse()) + return true; + return false; +} + +/// SelectShiftedRegister - Select a "shifted register" operand. If the value +/// is not shifted, set the Shift operand to default of "LSL 0". The logical +/// instructions allow the shifted register to be rotated, but the arithmetic +/// instructions do not. The AllowROR parameter specifies whether ROR is +/// supported. +bool ARM64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, + SDValue &Reg, SDValue &Shift) { + ARM64_AM::ShiftType ShType = getShiftTypeForNode(N); + if (ShType == ARM64_AM::InvalidShift) + return false; + if (!AllowROR && ShType == ARM64_AM::ROR) + return false; + + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + unsigned BitSize = N.getValueType().getSizeInBits(); + unsigned Val = RHS->getZExtValue() & (BitSize - 1); + unsigned ShVal = ARM64_AM::getShifterImm(ShType, Val); + + Reg = N.getOperand(0); + Shift = CurDAG->getTargetConstant(ShVal, MVT::i32); + return isWorthFolding(N); + } + + return false; +} + +/// getExtendTypeForNode - Translate an extend node to the corresponding +/// ExtendType value. +static ARM64_AM::ExtendType getExtendTypeForNode(SDValue N, + bool IsLoadStore = false) { + if (N.getOpcode() == ISD::SIGN_EXTEND || + N.getOpcode() == ISD::SIGN_EXTEND_INREG) { + EVT SrcVT; + if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) + SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); + else + SrcVT = N.getOperand(0).getValueType(); + + if (!IsLoadStore && SrcVT == MVT::i8) + return ARM64_AM::SXTB; + else if (!IsLoadStore && SrcVT == MVT::i16) + return ARM64_AM::SXTH; + else if (SrcVT == MVT::i32) + return ARM64_AM::SXTW; + else if (SrcVT == MVT::i64) + return ARM64_AM::SXTX; + + return ARM64_AM::InvalidExtend; + } else if (N.getOpcode() == ISD::ZERO_EXTEND || + N.getOpcode() == ISD::ANY_EXTEND) { + EVT SrcVT = N.getOperand(0).getValueType(); + if (!IsLoadStore && SrcVT == MVT::i8) + return ARM64_AM::UXTB; + else if (!IsLoadStore && SrcVT == MVT::i16) + return ARM64_AM::UXTH; + else if (SrcVT == MVT::i32) + return ARM64_AM::UXTW; + else if (SrcVT == MVT::i64) + return ARM64_AM::UXTX; + + return ARM64_AM::InvalidExtend; + } else if (N.getOpcode() == ISD::AND) { + ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!CSD) + return ARM64_AM::InvalidExtend; + uint64_t AndMask = CSD->getZExtValue(); + + switch (AndMask) { + default: + return ARM64_AM::InvalidExtend; + case 0xFF: + return !IsLoadStore ? ARM64_AM::UXTB : ARM64_AM::InvalidExtend; + case 0xFFFF: + return !IsLoadStore ? ARM64_AM::UXTH : ARM64_AM::InvalidExtend; + case 0xFFFFFFFF: + return ARM64_AM::UXTW; + } + } + + return ARM64_AM::InvalidExtend; +} + +// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. +static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { + if (DL->getOpcode() != ARM64ISD::DUPLANE16 && + DL->getOpcode() != ARM64ISD::DUPLANE32) + return false; + + SDValue SV = DL->getOperand(0); + if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) + return false; + + SDValue EV = SV.getOperand(1); + if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return false; + + ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode()); + ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode()); + LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); + LaneOp = EV.getOperand(0); + + return true; +} + +// Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a +// high lane extract. +static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, + SDValue &LaneOp, int &LaneIdx) { + + if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { + std::swap(Op0, Op1); + if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) + return false; + } + StdOp = Op1; + return true; +} + +/// SelectMLAV64LaneV128 - ARM64 supports 64-bit vector MLAs (v4i16 and v2i32) +/// where one multiplicand is a lane in the upper half of a 128-bit vector. +/// Recognize and select this so that we don't emit unnecessary lane extracts. +SDNode *ARM64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. + SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. + int LaneIdx = -1; // Will hold the lane index. + + if (Op1.getOpcode() != ISD::MUL || + !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, + LaneIdx)) { + std::swap(Op0, Op1); + if (Op1.getOpcode() != ISD::MUL || + !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, + LaneIdx)) + return 0; + } + + SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64); + + SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; + + unsigned MLAOpc = ~0U; + + switch (N->getSimpleValueType(0).SimpleTy) { + default: + llvm_unreachable("Unrecognized MLA."); + case MVT::v4i16: + MLAOpc = ARM64::MLAv4i16_indexed; + break; + case MVT::v2i32: + MLAOpc = ARM64::MLAv2i32_indexed; + break; + } + + return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops); +} + +SDNode *ARM64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) { + SDValue SMULLOp0; + SDValue SMULLOp1; + int LaneIdx; + + if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, + LaneIdx)) + return 0; + + SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64); + + SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; + + unsigned SMULLOpc = ~0U; + + if (IntNo == Intrinsic::arm64_neon_smull) { + switch (N->getSimpleValueType(0).SimpleTy) { + default: + llvm_unreachable("Unrecognized SMULL."); + case MVT::v4i32: + SMULLOpc = ARM64::SMULLv4i16_indexed; + break; + case MVT::v2i64: + SMULLOpc = ARM64::SMULLv2i32_indexed; + break; + } + } else if (IntNo == Intrinsic::arm64_neon_umull) { + switch (N->getSimpleValueType(0).SimpleTy) { + default: + llvm_unreachable("Unrecognized SMULL."); + case MVT::v4i32: + SMULLOpc = ARM64::UMULLv4i16_indexed; + break; + case MVT::v2i64: + SMULLOpc = ARM64::UMULLv2i32_indexed; + break; + } + } else + llvm_unreachable("Unrecognized intrinsic."); + + return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops); +} + +/// SelectArithExtendedRegister - Select a "extended register" operand. This +/// operand folds in an extend followed by an optional left shift. +bool ARM64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, + SDValue &Shift) { + unsigned ShiftVal = 0; + ARM64_AM::ExtendType Ext; + + if (N.getOpcode() == ISD::SHL) { + ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!CSD) + return false; + ShiftVal = CSD->getZExtValue(); + if ((ShiftVal & 0x3) != ShiftVal) + return false; + + Ext = getExtendTypeForNode(N.getOperand(0)); + if (Ext == ARM64_AM::InvalidExtend) + return false; + + Reg = N.getOperand(0).getOperand(0); + } else { + Ext = getExtendTypeForNode(N); + if (Ext == ARM64_AM::InvalidExtend) + return false; + + Reg = N.getOperand(0); + } + + // ARM64 mandates that the RHS of the operation must use the smallest + // register classs that could contain the size being extended from. Thus, + // if we're folding a (sext i8), we need the RHS to be a GPR32, even though + // there might not be an actual 32-bit value in the program. We can + // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. + if (Reg.getValueType() == MVT::i64 && Ext != ARM64_AM::UXTX && + Ext != ARM64_AM::SXTX) { + SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32); + MachineSDNode *Node = CurDAG->getMachineNode( + TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32, Reg, SubReg); + Reg = SDValue(Node, 0); + } + + Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32); + return isWorthFolding(N); +} + +/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit +/// immediate" address. The "Size" argument is the size in bytes of the memory +/// reference, which determines the scale. +bool ARM64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, + SDValue &Base, SDValue &OffImm) { + const TargetLowering *TLI = getTargetLowering(); + if (N.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + OffImm = CurDAG->getTargetConstant(0, MVT::i64); + return true; + } + + if (N.getOpcode() == ARM64ISD::ADDlow) { + GlobalAddressSDNode *GAN = + dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); + Base = N.getOperand(0); + OffImm = N.getOperand(1); + if (!GAN) + return true; + + const GlobalValue *GV = GAN->getGlobal(); + unsigned Alignment = GV->getAlignment(); + const DataLayout *DL = TLI->getDataLayout(); + if (Alignment == 0 && !Subtarget->isTargetDarwin()) + Alignment = DL->getABITypeAlignment(GV->getType()->getElementType()); + + if (Alignment >= Size) + return true; + } + + if (CurDAG->isBaseWithConstantOffset(N)) { + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + int64_t RHSC = (int64_t)RHS->getZExtValue(); + unsigned Scale = Log2_32(Size); + if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + } + OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64); + return true; + } + } + } + + // Before falling back to our general case, check if the unscaled + // instructions can handle this. If so, that's preferable. + if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) + return false; + + // Base only. The address will be materialized into a register before + // the memory is accessed. + // add x0, Xbase, #offset + // ldr x0, [x0] + Base = N; + OffImm = CurDAG->getTargetConstant(0, MVT::i64); + return true; +} + +/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit +/// immediate" address. This should only match when there is an offset that +/// is not valid for a scaled immediate addressing mode. The "Size" argument +/// is the size in bytes of the memory reference, which is needed here to know +/// what is valid for a scaled immediate. +bool ARM64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, + SDValue &Base, SDValue &OffImm) { + if (!CurDAG->isBaseWithConstantOffset(N)) + return false; + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + int64_t RHSC = RHS->getSExtValue(); + // If the offset is valid as a scaled immediate, don't match here. + if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && + RHSC < (0x1000 << Log2_32(Size))) + return false; + if (RHSC >= -256 && RHSC < 256) { + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(Base)->getIndex(); + const TargetLowering *TLI = getTargetLowering(); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + } + OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64); + return true; + } + } + return false; +} + +static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { + SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32); + SDValue ImpDef = SDValue( + CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64), + 0); + MachineSDNode *Node = CurDAG->getMachineNode( + TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg); + return SDValue(Node, 0); +} + +static SDValue WidenIfNeeded(SelectionDAG *CurDAG, SDValue N) { + if (N.getValueType() == MVT::i32) { + return Widen(CurDAG, N); + } + + return N; +} + +/// \brief Check if the given SHL node (\p N), can be used to form an +/// extended register for an addressing mode. +bool ARM64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, + SDValue &Offset, SDValue &Imm) { + assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); + ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (CSD && (CSD->getZExtValue() & 0x7) == CSD->getZExtValue()) { + + ARM64_AM::ExtendType Ext = getExtendTypeForNode(N.getOperand(0), true); + if (Ext == ARM64_AM::InvalidExtend) { + Ext = ARM64_AM::UXTX; + Offset = WidenIfNeeded(CurDAG, N.getOperand(0)); + } else { + Offset = WidenIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); + } + + unsigned LegalShiftVal = Log2_32(Size); + unsigned ShiftVal = CSD->getZExtValue(); + + if (ShiftVal != 0 && ShiftVal != LegalShiftVal) + return false; + + Imm = CurDAG->getTargetConstant( + ARM64_AM::getMemExtendImm(Ext, ShiftVal != 0), MVT::i32); + if (isWorthFolding(N)) + return true; + } + return false; +} + +bool ARM64DAGToDAGISel::SelectAddrModeRO(SDValue N, unsigned Size, + SDValue &Base, SDValue &Offset, + SDValue &Imm) { + if (N.getOpcode() != ISD::ADD) + return false; + SDValue LHS = N.getOperand(0); + SDValue RHS = N.getOperand(1); + + // We don't want to match immediate adds here, because they are better lowered + // to the register-immediate addressing modes. + if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) + return false; + + // Check if this particular node is reused in any non-memory related + // operation. If yes, do not try to fold this node into the address + // computation, since the computation will be kept. + const SDNode *Node = N.getNode(); + for (SDNode::use_iterator UI = Node->use_begin(), UE = Node->use_end(); + UI != UE; ++UI) { + if (!isa<MemSDNode>(*UI)) + return false; + } + + // Remember if it is worth folding N when it produces extended register. + bool IsExtendedRegisterWorthFolding = isWorthFolding(N); + + // Try to match a shifted extend on the RHS. + if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && + SelectExtendedSHL(RHS, Size, Offset, Imm)) { + Base = LHS; + return true; + } + + // Try to match a shifted extend on the LHS. + if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && + SelectExtendedSHL(LHS, Size, Offset, Imm)) { + Base = RHS; + return true; + } + + ARM64_AM::ExtendType Ext = ARM64_AM::UXTX; + // Try to match an unshifted extend on the LHS. + if (IsExtendedRegisterWorthFolding && + (Ext = getExtendTypeForNode(LHS, true)) != ARM64_AM::InvalidExtend) { + Base = RHS; + Offset = WidenIfNeeded(CurDAG, LHS.getOperand(0)); + Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false), + MVT::i32); + if (isWorthFolding(LHS)) + return true; + } + + // Try to match an unshifted extend on the RHS. + if (IsExtendedRegisterWorthFolding && + (Ext = getExtendTypeForNode(RHS, true)) != ARM64_AM::InvalidExtend) { + Base = LHS; + Offset = WidenIfNeeded(CurDAG, RHS.getOperand(0)); + Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false), + MVT::i32); + if (isWorthFolding(RHS)) + return true; + } + + // Match any non-shifted, non-extend, non-immediate add expression. + Base = LHS; + Offset = WidenIfNeeded(CurDAG, RHS); + Ext = ARM64_AM::UXTX; + Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false), + MVT::i32); + // Reg1 + Reg2 is free: no check needed. + return true; +} + +SDValue ARM64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { + static unsigned RegClassIDs[] = { ARM64::DDRegClassID, ARM64::DDDRegClassID, + ARM64::DDDDRegClassID }; + static unsigned SubRegs[] = { ARM64::dsub0, ARM64::dsub1, + ARM64::dsub2, ARM64::dsub3 }; + + return createTuple(Regs, RegClassIDs, SubRegs); +} + +SDValue ARM64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { + static unsigned RegClassIDs[] = { ARM64::QQRegClassID, ARM64::QQQRegClassID, + ARM64::QQQQRegClassID }; + static unsigned SubRegs[] = { ARM64::qsub0, ARM64::qsub1, + ARM64::qsub2, ARM64::qsub3 }; + + return createTuple(Regs, RegClassIDs, SubRegs); +} + +SDValue ARM64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, + unsigned RegClassIDs[], + unsigned SubRegs[]) { + // There's no special register-class for a vector-list of 1 element: it's just + // a vector. + if (Regs.size() == 1) + return Regs[0]; + + assert(Regs.size() >= 2 && Regs.size() <= 4); + + SDLoc DL(Regs[0].getNode()); + + SmallVector<SDValue, 4> Ops; + + // First operand of REG_SEQUENCE is the desired RegClass. + Ops.push_back( + CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32)); + + // Then we get pairs of source & subregister-position for the components. + for (unsigned i = 0; i < Regs.size(); ++i) { + Ops.push_back(Regs[i]); + Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32)); + } + + SDNode *N = + CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); + return SDValue(N, 0); +} + +SDNode *ARM64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, + unsigned Opc, bool isExt) { + SDLoc dl(N); + EVT VT = N->getValueType(0); + + unsigned ExtOff = isExt; + + // Form a REG_SEQUENCE to force register allocation. + unsigned Vec0Off = ExtOff + 1; + SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, + N->op_begin() + Vec0Off + NumVecs); + SDValue RegSeq = createQTuple(Regs); + + SmallVector<SDValue, 6> Ops; + if (isExt) + Ops.push_back(N->getOperand(1)); + Ops.push_back(RegSeq); + Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); + return CurDAG->getMachineNode(Opc, dl, VT, Ops); +} + +SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { + LoadSDNode *LD = cast<LoadSDNode>(N); + if (LD->isUnindexed()) + return NULL; + EVT VT = LD->getMemoryVT(); + EVT DstVT = N->getValueType(0); + ISD::MemIndexedMode AM = LD->getAddressingMode(); + bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; + + // We're not doing validity checking here. That was done when checking + // if we should mark the load as indexed or not. We're just selecting + // the right instruction. + unsigned Opcode = 0; + + ISD::LoadExtType ExtType = LD->getExtensionType(); + bool InsertTo64 = false; + if (VT == MVT::i64) + Opcode = IsPre ? ARM64::LDRXpre_isel : ARM64::LDRXpost_isel; + else if (VT == MVT::i32) { + if (ExtType == ISD::NON_EXTLOAD) + Opcode = IsPre ? ARM64::LDRWpre_isel : ARM64::LDRWpost_isel; + else if (ExtType == ISD::SEXTLOAD) + Opcode = IsPre ? ARM64::LDRSWpre_isel : ARM64::LDRSWpost_isel; + else { + Opcode = IsPre ? ARM64::LDRWpre_isel : ARM64::LDRWpost_isel; + InsertTo64 = true; + // The result of the load is only i32. It's the subreg_to_reg that makes + // it into an i64. + DstVT = MVT::i32; + } + } else if (VT == MVT::i16) { + if (ExtType == ISD::SEXTLOAD) { + if (DstVT == MVT::i64) + Opcode = IsPre ? ARM64::LDRSHXpre_isel : ARM64::LDRSHXpost_isel; + else + Opcode = IsPre ? ARM64::LDRSHWpre_isel : ARM64::LDRSHWpost_isel; + } else { + Opcode = IsPre ? ARM64::LDRHHpre_isel : ARM64::LDRHHpost_isel; + InsertTo64 = DstVT == MVT::i64; + // The result of the load is only i32. It's the subreg_to_reg that makes + // it into an i64. + DstVT = MVT::i32; + } + } else if (VT == MVT::i8) { + if (ExtType == ISD::SEXTLOAD) { + if (DstVT == MVT::i64) + Opcode = IsPre ? ARM64::LDRSBXpre_isel : ARM64::LDRSBXpost_isel; + else + Opcode = IsPre ? ARM64::LDRSBWpre_isel : ARM64::LDRSBWpost_isel; + } else { + Opcode = IsPre ? ARM64::LDRBBpre_isel : ARM64::LDRBBpost_isel; + InsertTo64 = DstVT == MVT::i64; + // The result of the load is only i32. It's the subreg_to_reg that makes + // it into an i64. + DstVT = MVT::i32; + } + } else if (VT == MVT::f32) { + Opcode = IsPre ? ARM64::LDRSpre_isel : ARM64::LDRSpost_isel; + } else if (VT == MVT::f64) { + Opcode = IsPre ? ARM64::LDRDpre_isel : ARM64::LDRDpost_isel; + } else + return NULL; + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); + int OffsetVal = (int)OffsetOp->getZExtValue(); + SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64); + SDValue Ops[] = { Base, Offset, Chain }; + SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), DstVT, MVT::i64, + MVT::Other, Ops); + // Either way, we're replacing the node, so tell the caller that. + Done = true; + if (InsertTo64) { + SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32); + SDNode *Sub = CurDAG->getMachineNode( + ARM64::SUBREG_TO_REG, SDLoc(N), MVT::i64, + CurDAG->getTargetConstant(0, MVT::i64), SDValue(Res, 0), SubReg); + ReplaceUses(SDValue(N, 0), SDValue(Sub, 0)); + ReplaceUses(SDValue(N, 1), SDValue(Res, 1)); + ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); + return 0; + } + return Res; +} + +SDNode *ARM64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, + unsigned SubRegIdx) { + SDLoc dl(N); + EVT VT = N->getValueType(0); + SDValue Chain = N->getOperand(0); + + SmallVector<SDValue, 6> Ops; + Ops.push_back(N->getOperand(2)); // Mem operand; + Ops.push_back(Chain); + + std::vector<EVT> ResTys; + ResTys.push_back(MVT::Untyped); + ResTys.push_back(MVT::Other); + + SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); + SDValue SuperReg = SDValue(Ld, 0); + + // MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + // MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); + // cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1); + + switch (NumVecs) { + case 4: + ReplaceUses(SDValue(N, 3), CurDAG->getTargetExtractSubreg(SubRegIdx + 3, dl, + VT, SuperReg)); + // FALLTHROUGH + case 3: + ReplaceUses(SDValue(N, 2), CurDAG->getTargetExtractSubreg(SubRegIdx + 2, dl, + VT, SuperReg)); + // FALLTHROUGH + case 2: + ReplaceUses(SDValue(N, 1), CurDAG->getTargetExtractSubreg(SubRegIdx + 1, dl, + VT, SuperReg)); + ReplaceUses(SDValue(N, 0), + CurDAG->getTargetExtractSubreg(SubRegIdx, dl, VT, SuperReg)); + break; + case 1: + ReplaceUses(SDValue(N, 0), SuperReg); + break; + } + + ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); + + return 0; +} + +SDNode *ARM64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, + unsigned Opc) { + SDLoc dl(N); + EVT VT = N->getOperand(2)->getValueType(0); + + // Form a REG_SEQUENCE to force register allocation. + bool Is128Bit = VT.getSizeInBits() == 128; + SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); + SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); + + SmallVector<SDValue, 6> Ops; + Ops.push_back(RegSeq); + Ops.push_back(N->getOperand(NumVecs + 2)); + Ops.push_back(N->getOperand(0)); + SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); + + return St; +} + +/// WidenVector - Given a value in the V64 register class, produce the +/// equivalent value in the V128 register class. +class WidenVector { + SelectionDAG &DAG; + +public: + WidenVector(SelectionDAG &DAG) : DAG(DAG) {} + + SDValue operator()(SDValue V64Reg) { + EVT VT = V64Reg.getValueType(); + unsigned NarrowSize = VT.getVectorNumElements(); + MVT EltTy = VT.getVectorElementType().getSimpleVT(); + MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); + SDLoc DL(V64Reg); + + SDValue Undef = + SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); + return DAG.getTargetInsertSubreg(ARM64::dsub, DL, WideTy, Undef, V64Reg); + } +}; + +/// NarrowVector - Given a value in the V128 register class, produce the +/// equivalent value in the V64 register class. +static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { + EVT VT = V128Reg.getValueType(); + unsigned WideSize = VT.getVectorNumElements(); + MVT EltTy = VT.getVectorElementType().getSimpleVT(); + MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); + + return DAG.getTargetExtractSubreg(ARM64::dsub, SDLoc(V128Reg), NarrowTy, + V128Reg); +} + +SDNode *ARM64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, + unsigned Opc) { + SDLoc dl(N); + EVT VT = N->getValueType(0); + bool Narrow = VT.getSizeInBits() == 64; + + // Form a REG_SEQUENCE to force register allocation. + SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); + + if (Narrow) + std::transform(Regs.begin(), Regs.end(), Regs.begin(), + WidenVector(*CurDAG)); + + SDValue RegSeq = createQTuple(Regs); + + std::vector<EVT> ResTys; + ResTys.push_back(MVT::Untyped); + ResTys.push_back(MVT::Other); + + unsigned LaneNo = + cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); + + SmallVector<SDValue, 6> Ops; + Ops.push_back(RegSeq); + Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); + Ops.push_back(N->getOperand(NumVecs + 3)); + Ops.push_back(N->getOperand(0)); + SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); + SDValue SuperReg = SDValue(Ld, 0); + + EVT WideVT = RegSeq.getOperand(1)->getValueType(0); + switch (NumVecs) { + case 4: { + SDValue NV3 = + CurDAG->getTargetExtractSubreg(ARM64::qsub3, dl, WideVT, SuperReg); + if (Narrow) + ReplaceUses(SDValue(N, 3), NarrowVector(NV3, *CurDAG)); + else + ReplaceUses(SDValue(N, 3), NV3); + } + // FALLTHROUGH + case 3: { + SDValue NV2 = + CurDAG->getTargetExtractSubreg(ARM64::qsub2, dl, WideVT, SuperReg); + if (Narrow) + ReplaceUses(SDValue(N, 2), NarrowVector(NV2, *CurDAG)); + else + ReplaceUses(SDValue(N, 2), NV2); + } + // FALLTHROUGH + case 2: { + SDValue NV1 = + CurDAG->getTargetExtractSubreg(ARM64::qsub1, dl, WideVT, SuperReg); + SDValue NV0 = + CurDAG->getTargetExtractSubreg(ARM64::qsub0, dl, WideVT, SuperReg); + if (Narrow) { + ReplaceUses(SDValue(N, 1), NarrowVector(NV1, *CurDAG)); + ReplaceUses(SDValue(N, 0), NarrowVector(NV0, *CurDAG)); + } else { + ReplaceUses(SDValue(N, 1), NV1); + ReplaceUses(SDValue(N, 0), NV0); + } + break; + } + } + + ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); + + return Ld; +} + +SDNode *ARM64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, + unsigned Opc) { + SDLoc dl(N); + EVT VT = N->getOperand(2)->getValueType(0); + bool Narrow = VT.getSizeInBits() == 64; + + // Form a REG_SEQUENCE to force register allocation. + SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); + + if (Narrow) + std::transform(Regs.begin(), Regs.end(), Regs.begin(), + WidenVector(*CurDAG)); + + SDValue RegSeq = createQTuple(Regs); + + unsigned LaneNo = + cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); + + SmallVector<SDValue, 6> Ops; + Ops.push_back(RegSeq); + Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); + Ops.push_back(N->getOperand(NumVecs + 3)); + Ops.push_back(N->getOperand(0)); + SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); + + // Transfer memoperands. + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); + cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); + + return St; +} + +SDNode *ARM64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8, + unsigned Op16, unsigned Op32, + unsigned Op64) { + // Mostly direct translation to the given operations, except that we preserve + // the AtomicOrdering for use later on. + AtomicSDNode *AN = cast<AtomicSDNode>(Node); + EVT VT = AN->getMemoryVT(); + + unsigned Op; + if (VT == MVT::i8) + Op = Op8; + else if (VT == MVT::i16) + Op = Op16; + else if (VT == MVT::i32) + Op = Op32; + else if (VT == MVT::i64) + Op = Op64; + else + llvm_unreachable("Unexpected atomic operation"); + + SmallVector<SDValue, 4> Ops; + for (unsigned i = 1; i < AN->getNumOperands(); ++i) + Ops.push_back(AN->getOperand(i)); + + Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32)); + Ops.push_back(AN->getOperand(0)); // Chain moves to the end + + return CurDAG->SelectNodeTo(Node, Op, AN->getValueType(0), MVT::Other, + &Ops[0], Ops.size()); +} + +static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, + unsigned &Opc, SDValue &Opd0, + unsigned &LSB, unsigned &MSB, + unsigned NumberOfIgnoredLowBits, + bool BiggerPattern) { + assert(N->getOpcode() == ISD::AND && + "N must be a AND operation to call this function"); + + EVT VT = N->getValueType(0); + + // Here we can test the type of VT and return false when the type does not + // match, but since it is done prior to that call in the current context + // we turned that into an assert to avoid redundant code. + assert((VT == MVT::i32 || VT == MVT::i64) && + "Type checking must have been done before calling this function"); + + // FIXME: simplify-demanded-bits in DAGCombine will probably have + // changed the AND node to a 32-bit mask operation. We'll have to + // undo that as part of the transform here if we want to catch all + // the opportunities. + // Currently the NumberOfIgnoredLowBits argument helps to recover + // form these situations when matching bigger pattern (bitfield insert). + + // For unsigned extracts, check for a shift right and mask + uint64_t And_imm = 0; + if (!isOpcWithIntImmediate(N, ISD::AND, And_imm)) + return false; + + const SDNode *Op0 = N->getOperand(0).getNode(); + + // Because of simplify-demanded-bits in DAGCombine, the mask may have been + // simplified. Try to undo that + And_imm |= (1 << NumberOfIgnoredLowBits) - 1; + + // The immediate is a mask of the low bits iff imm & (imm+1) == 0 + if (And_imm & (And_imm + 1)) + return false; + + bool ClampMSB = false; + uint64_t Srl_imm = 0; + // Handle the SRL + ANY_EXTEND case. + if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && + isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) { + // Extend the incoming operand of the SRL to 64-bit. + Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); + // Make sure to clamp the MSB so that we preserve the semantics of the + // original operations. + ClampMSB = true; + } else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) { + Opd0 = Op0->getOperand(0); + } else if (BiggerPattern) { + // Let's pretend a 0 shift right has been performed. + // The resulting code will be at least as good as the original one + // plus it may expose more opportunities for bitfield insert pattern. + // FIXME: Currently we limit this to the bigger pattern, because + // some optimizations expect AND and not UBFM + Opd0 = N->getOperand(0); + } else + return false; + + assert((BiggerPattern || (Srl_imm > 0 && Srl_imm < VT.getSizeInBits())) && + "bad amount in shift node!"); + + LSB = Srl_imm; + MSB = Srl_imm + (VT == MVT::i32 ? CountTrailingOnes_32(And_imm) + : CountTrailingOnes_64(And_imm)) - + 1; + if (ClampMSB) + // Since we're moving the extend before the right shift operation, we need + // to clamp the MSB to make sure we don't shift in undefined bits instead of + // the zeros which would get shifted in with the original right shift + // operation. + MSB = MSB > 31 ? 31 : MSB; + + Opc = VT == MVT::i32 ? ARM64::UBFMWri : ARM64::UBFMXri; + return true; +} + +static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, + unsigned &LSB, unsigned &MSB) { + // We are looking for the following pattern which basically extracts a single + // bit from the source value and places it in the LSB of the destination + // value, all other bits of the destination value or set to zero: + // + // Value2 = AND Value, MaskImm + // SRL Value2, ShiftImm + // + // with MaskImm >> ShiftImm == 1. + // + // This gets selected into a single UBFM: + // + // UBFM Value, ShiftImm, ShiftImm + // + + if (N->getOpcode() != ISD::SRL) + return false; + + uint64_t And_mask = 0; + if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask)) + return false; + + Opd0 = N->getOperand(0).getOperand(0); + + uint64_t Srl_imm = 0; + if (!isIntImmediate(N->getOperand(1), Srl_imm)) + return false; + + // Check whether we really have a one bit extract here. + if (And_mask >> Srl_imm == 0x1) { + if (N->getValueType(0) == MVT::i32) + Opc = ARM64::UBFMWri; + else + Opc = ARM64::UBFMXri; + + LSB = MSB = Srl_imm; + + return true; + } + + return false; +} + +static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, + unsigned &LSB, unsigned &MSB, + bool BiggerPattern) { + assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && + "N must be a SHR/SRA operation to call this function"); + + EVT VT = N->getValueType(0); + + // Here we can test the type of VT and return false when the type does not + // match, but since it is done prior to that call in the current context + // we turned that into an assert to avoid redundant code. + assert((VT == MVT::i32 || VT == MVT::i64) && + "Type checking must have been done before calling this function"); + + // Check for AND + SRL doing a one bit extract. + if (isOneBitExtractOpFromShr(N, Opc, Opd0, LSB, MSB)) + return true; + + // we're looking for a shift of a shift + uint64_t Shl_imm = 0; + if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { + Opd0 = N->getOperand(0).getOperand(0); + } else if (BiggerPattern) { + // Let's pretend a 0 shift left has been performed. + // FIXME: Currently we limit this to the bigger pattern case, + // because some optimizations expect AND and not UBFM + Opd0 = N->getOperand(0); + } else + return false; + + assert(Shl_imm < VT.getSizeInBits() && "bad amount in shift node!"); + uint64_t Srl_imm = 0; + if (!isIntImmediate(N->getOperand(1), Srl_imm)) + return false; + + assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() && + "bad amount in shift node!"); + // Note: The width operand is encoded as width-1. + unsigned Width = VT.getSizeInBits() - Srl_imm - 1; + int sLSB = Srl_imm - Shl_imm; + if (sLSB < 0) + return false; + LSB = sLSB; + MSB = LSB + Width; + // SRA requires a signed extraction + if (VT == MVT::i32) + Opc = N->getOpcode() == ISD::SRA ? ARM64::SBFMWri : ARM64::UBFMWri; + else + Opc = N->getOpcode() == ISD::SRA ? ARM64::SBFMXri : ARM64::UBFMXri; + return true; +} + +static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, + SDValue &Opd0, unsigned &LSB, unsigned &MSB, + unsigned NumberOfIgnoredLowBits = 0, + bool BiggerPattern = false) { + if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) + return false; + + switch (N->getOpcode()) { + default: + if (!N->isMachineOpcode()) + return false; + break; + case ISD::AND: + return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB, + NumberOfIgnoredLowBits, BiggerPattern); + case ISD::SRL: + case ISD::SRA: + return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern); + } + + unsigned NOpc = N->getMachineOpcode(); + switch (NOpc) { + default: + return false; + case ARM64::SBFMWri: + case ARM64::UBFMWri: + case ARM64::SBFMXri: + case ARM64::UBFMXri: + Opc = NOpc; + Opd0 = N->getOperand(0); + LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); + MSB = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); + return true; + } + // Unreachable + return false; +} + +SDNode *ARM64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) { + unsigned Opc, LSB, MSB; + SDValue Opd0; + if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB)) + return NULL; + + EVT VT = N->getValueType(0); + SDValue Ops[] = { Opd0, CurDAG->getTargetConstant(LSB, VT), + CurDAG->getTargetConstant(MSB, VT) }; + return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 3); +} + +// Is mask a i32 or i64 binary sequence 1..10..0 and +// CountTrailingZeros(mask) == ExpectedTrailingZeros +static bool isHighMask(uint64_t Mask, unsigned ExpectedTrailingZeros, + unsigned NumberOfIgnoredHighBits, EVT VT) { + assert((VT == MVT::i32 || VT == MVT::i64) && + "i32 or i64 mask type expected!"); + + uint64_t ExpectedMask; + if (VT == MVT::i32) { + uint32_t ExpectedMaski32 = ~0 << ExpectedTrailingZeros; + ExpectedMask = ExpectedMaski32; + if (NumberOfIgnoredHighBits) { + uint32_t highMask = ~0 << (32 - NumberOfIgnoredHighBits); + Mask |= highMask; + } + } else { + ExpectedMask = ((uint64_t) ~0) << ExpectedTrailingZeros; + if (NumberOfIgnoredHighBits) + Mask |= ((uint64_t) ~0) << (64 - NumberOfIgnoredHighBits); + } + + return Mask == ExpectedMask; +} + +// Look for bits that will be useful for later uses. +// A bit is consider useless as soon as it is dropped and never used +// before it as been dropped. +// E.g., looking for useful bit of x +// 1. y = x & 0x7 +// 2. z = y >> 2 +// After #1, x useful bits are 0x7, then the useful bits of x, live through +// y. +// After #2, the useful bits of x are 0x4. +// However, if x is used on an unpredicatable instruction, then all its bits +// are useful. +// E.g. +// 1. y = x & 0x7 +// 2. z = y >> 2 +// 3. str x, [@x] +static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); + +static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, + unsigned Depth) { + uint64_t Imm = + cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); + Imm = ARM64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); + UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); + getUsefulBits(Op, UsefulBits, Depth + 1); +} + +static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, + uint64_t Imm, uint64_t MSB, + unsigned Depth) { + // inherit the bitwidth value + APInt OpUsefulBits(UsefulBits); + OpUsefulBits = 1; + + if (MSB >= Imm) { + OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1); + --OpUsefulBits; + // The interesting part will be in the lower part of the result + getUsefulBits(Op, OpUsefulBits, Depth + 1); + // The interesting part was starting at Imm in the argument + OpUsefulBits = OpUsefulBits.shl(Imm); + } else { + OpUsefulBits = OpUsefulBits.shl(MSB + 1); + --OpUsefulBits; + // The interesting part will be shifted in the result + OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm); + getUsefulBits(Op, OpUsefulBits, Depth + 1); + // The interesting part was at zero in the argument + OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm); + } + + UsefulBits &= OpUsefulBits; +} + +static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, + unsigned Depth) { + uint64_t Imm = + cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); + uint64_t MSB = + cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); + + getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); +} + +static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, + unsigned Depth) { + uint64_t ShiftTypeAndValue = + cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); + APInt Mask(UsefulBits); + Mask.clearAllBits(); + Mask.flipAllBits(); + + if (ARM64_AM::getShiftType(ShiftTypeAndValue) == ARM64_AM::LSL) { + // Shift Left + uint64_t ShiftAmt = ARM64_AM::getShiftValue(ShiftTypeAndValue); + Mask = Mask.shl(ShiftAmt); + getUsefulBits(Op, Mask, Depth + 1); + Mask = Mask.lshr(ShiftAmt); + } else if (ARM64_AM::getShiftType(ShiftTypeAndValue) == ARM64_AM::LSR) { + // Shift Right + // We do not handle ARM64_AM::ASR, because the sign will change the + // number of useful bits + uint64_t ShiftAmt = ARM64_AM::getShiftValue(ShiftTypeAndValue); + Mask = Mask.lshr(ShiftAmt); + getUsefulBits(Op, Mask, Depth + 1); + Mask = Mask.shl(ShiftAmt); + } else + return; + + UsefulBits &= Mask; +} + +static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, + unsigned Depth) { + uint64_t Imm = + cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); + uint64_t MSB = + cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); + + if (Op.getOperand(1) == Orig) + return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); + + APInt OpUsefulBits(UsefulBits); + OpUsefulBits = 1; + + if (MSB >= Imm) { + OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1); + --OpUsefulBits; + UsefulBits &= ~OpUsefulBits; + getUsefulBits(Op, UsefulBits, Depth + 1); + } else { + OpUsefulBits = OpUsefulBits.shl(MSB + 1); + --OpUsefulBits; + UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm)); + getUsefulBits(Op, UsefulBits, Depth + 1); + } +} + +static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, + SDValue Orig, unsigned Depth) { + + // Users of this node should have already been instruction selected + // FIXME: Can we turn that into an assert? + if (!UserNode->isMachineOpcode()) + return; + + switch (UserNode->getMachineOpcode()) { + default: + return; + case ARM64::ANDSWri: + case ARM64::ANDSXri: + case ARM64::ANDWri: + case ARM64::ANDXri: + // We increment Depth only when we call the getUsefulBits + return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, + Depth); + case ARM64::UBFMWri: + case ARM64::UBFMXri: + return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); + + case ARM64::ORRWrs: + case ARM64::ORRXrs: + if (UserNode->getOperand(1) != Orig) + return; + return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, + Depth); + case ARM64::BFMWri: + case ARM64::BFMXri: + return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); + } +} + +static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { + if (Depth >= 6) + return; + // Initialize UsefulBits + if (!Depth) { + unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits(); + // At the beginning, assume every produced bits is useful + UsefulBits = APInt(Bitwidth, 0); + UsefulBits.flipAllBits(); + } + APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); + + for (SDNode::use_iterator UseIt = Op.getNode()->use_begin(), + UseEnd = Op.getNode()->use_end(); + UseIt != UseEnd; ++UseIt) { + // A use cannot produce useful bits + APInt UsefulBitsForUse = APInt(UsefulBits); + getUsefulBitsForUse(*UseIt, UsefulBitsForUse, Op, Depth); + UsersUsefulBits |= UsefulBitsForUse; + } + // UsefulBits contains the produced bits that are meaningful for the + // current definition, thus a user cannot make a bit meaningful at + // this point + UsefulBits &= UsersUsefulBits; +} + +// Given a OR operation, check if we have the following pattern +// ubfm c, b, imm, imm2 (or something that does the same jobs, see +// isBitfieldExtractOp) +// d = e & mask2 ; where mask is a binary sequence of 1..10..0 and +// countTrailingZeros(mask2) == imm2 - imm + 1 +// f = d | c +// if yes, given reference arguments will be update so that one can replace +// the OR instruction with: +// f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2 +static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Opd0, + SDValue &Opd1, unsigned &LSB, + unsigned &MSB, SelectionDAG *CurDAG) { + assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); + + // Set Opc + EVT VT = N->getValueType(0); + if (VT == MVT::i32) + Opc = ARM64::BFMWri; + else if (VT == MVT::i64) + Opc = ARM64::BFMXri; + else + return false; + + // Because of simplify-demanded-bits in DAGCombine, involved masks may not + // have the expected shape. Try to undo that. + APInt UsefulBits; + getUsefulBits(SDValue(N, 0), UsefulBits); + + unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); + unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); + + // OR is commutative, check both possibilities (does llvm provide a + // way to do that directely, e.g., via code matcher?) + SDValue OrOpd1Val = N->getOperand(1); + SDNode *OrOpd0 = N->getOperand(0).getNode(); + SDNode *OrOpd1 = N->getOperand(1).getNode(); + for (int i = 0; i < 2; + ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) { + unsigned BFXOpc; + // Set Opd1, LSB and MSB arguments by looking for + // c = ubfm b, imm, imm2 + if (!isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Opd1, LSB, MSB, + NumberOfIgnoredLowBits, true)) + continue; + + // Check that the returned opcode is compatible with the pattern, + // i.e., same type and zero extended (U and not S) + if ((BFXOpc != ARM64::UBFMXri && VT == MVT::i64) || + (BFXOpc != ARM64::UBFMWri && VT == MVT::i32)) + continue; + + // Compute the width of the bitfield insertion + int sMSB = MSB - LSB + 1; + // FIXME: This constraints is to catch bitfield insertion we may + // want to widen the pattern if we want to grab general bitfied + // move case + if (sMSB <= 0) + continue; + + // Check the second part of the pattern + EVT VT = OrOpd1->getValueType(0); + if (VT != MVT::i32 && VT != MVT::i64) + continue; + + // Compute the Known Zero for the candidate of the first operand. + // This allows to catch more general case than just looking for + // AND with imm. Indeed, simplify-demanded-bits may have removed + // the AND instruction because it proves it was useless. + APInt KnownZero, KnownOne; + CurDAG->ComputeMaskedBits(OrOpd1Val, KnownZero, KnownOne); + + // Check if there is enough room for the second operand to appear + // in the first one + if (KnownZero.countTrailingOnes() < (unsigned)sMSB) + continue; + + // Set the first operand + uint64_t Imm; + if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && + isHighMask(Imm, sMSB, NumberOfIgnoredHighBits, VT)) + // In that case, we can eliminate the AND + Opd0 = OrOpd1->getOperand(0); + else + // Maybe the AND has been removed by simplify-demanded-bits + // or is useful because it discards more bits + Opd0 = OrOpd1Val; + + // both parts match + return true; + } + + return false; +} + +SDNode *ARM64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) { + if (N->getOpcode() != ISD::OR) + return NULL; + + unsigned Opc; + unsigned LSB, MSB; + SDValue Opd0, Opd1; + + if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG)) + return NULL; + + EVT VT = N->getValueType(0); + SDValue Ops[] = { Opd0, + Opd1, + CurDAG->getTargetConstant(LSB, VT), + CurDAG->getTargetConstant(MSB, VT) }; + return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 4); +} + +SDNode *ARM64DAGToDAGISel::SelectLIBM(SDNode *N) { + EVT VT = N->getValueType(0); + unsigned Variant; + unsigned Opc; + unsigned FRINTXOpcs[] = { ARM64::FRINTXSr, ARM64::FRINTXDr }; + + if (VT == MVT::f32) { + Variant = 0; + } else if (VT == MVT::f64) { + Variant = 1; + } else + return 0; // Unrecognized argument type. Fall back on default codegen. + + // Pick the FRINTX variant needed to set the flags. + unsigned FRINTXOpc = FRINTXOpcs[Variant]; + + switch (N->getOpcode()) { + default: + return 0; // Unrecognized libm ISD node. Fall back on default codegen. + case ISD::FCEIL: { + unsigned FRINTPOpcs[] = { ARM64::FRINTPSr, ARM64::FRINTPDr }; + Opc = FRINTPOpcs[Variant]; + break; + } + case ISD::FFLOOR: { + unsigned FRINTMOpcs[] = { ARM64::FRINTMSr, ARM64::FRINTMDr }; + Opc = FRINTMOpcs[Variant]; + break; + } + case ISD::FTRUNC: { + unsigned FRINTZOpcs[] = { ARM64::FRINTZSr, ARM64::FRINTZDr }; + Opc = FRINTZOpcs[Variant]; + break; + } + case ISD::FROUND: { + unsigned FRINTAOpcs[] = { ARM64::FRINTASr, ARM64::FRINTADr }; + Opc = FRINTAOpcs[Variant]; + break; + } + } + + SDLoc dl(N); + SDValue In = N->getOperand(0); + SmallVector<SDValue, 2> Ops; + Ops.push_back(In); + + if (!TM.Options.UnsafeFPMath) { + SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In); + Ops.push_back(SDValue(FRINTX, 1)); + } + + return CurDAG->getMachineNode(Opc, dl, VT, Ops); +} + +SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) { + // Dump information about the Node being selected + DEBUG(errs() << "Selecting: "); + DEBUG(Node->dump(CurDAG)); + DEBUG(errs() << "\n"); + + // If we have a custom node, we already have selected! + if (Node->isMachineOpcode()) { + DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); + Node->setNodeId(-1); + return NULL; + } + + // Few custom selection stuff. + SDNode *ResNode = 0; + EVT VT = Node->getValueType(0); + + switch (Node->getOpcode()) { + default: + break; + + case ISD::ADD: + if (SDNode *I = SelectMLAV64LaneV128(Node)) + return I; + break; + + case ISD::ATOMIC_LOAD_ADD: + return SelectAtomic(Node, ARM64::ATOMIC_LOAD_ADD_I8, + ARM64::ATOMIC_LOAD_ADD_I16, ARM64::ATOMIC_LOAD_ADD_I32, + ARM64::ATOMIC_LOAD_ADD_I64); + case ISD::ATOMIC_LOAD_SUB: + return SelectAtomic(Node, ARM64::ATOMIC_LOAD_SUB_I8, + ARM64::ATOMIC_LOAD_SUB_I16, ARM64::ATOMIC_LOAD_SUB_I32, + ARM64::ATOMIC_LOAD_SUB_I64); + case ISD::ATOMIC_LOAD_AND: + return SelectAtomic(Node, ARM64::ATOMIC_LOAD_AND_I8, + ARM64::ATOMIC_LOAD_AND_I16, ARM64::ATOMIC_LOAD_AND_I32, + ARM64::ATOMIC_LOAD_AND_I64); + case ISD::ATOMIC_LOAD_OR: + return SelectAtomic(Node, ARM64::ATOMIC_LOAD_OR_I8, + ARM64::ATOMIC_LOAD_OR_I16, ARM64::ATOMIC_LOAD_OR_I32, + ARM64::ATOMIC_LOAD_OR_I64); + case ISD::ATOMIC_LOAD_XOR: + return SelectAtomic(Node, ARM64::ATOMIC_LOAD_XOR_I8, + ARM64::ATOMIC_LOAD_XOR_I16, ARM64::ATOMIC_LOAD_XOR_I32, + ARM64::ATOMIC_LOAD_XOR_I64); + case ISD::ATOMIC_LOAD_NAND: + return SelectAtomic( + Node, ARM64::ATOMIC_LOAD_NAND_I8, ARM64::ATOMIC_LOAD_NAND_I16, + ARM64::ATOMIC_LOAD_NAND_I32, ARM64::ATOMIC_LOAD_NAND_I64); + case ISD::ATOMIC_LOAD_MIN: + return SelectAtomic(Node, ARM64::ATOMIC_LOAD_MIN_I8, + ARM64::ATOMIC_LOAD_MIN_I16, ARM64::ATOMIC_LOAD_MIN_I32, + ARM64::ATOMIC_LOAD_MIN_I64); + case ISD::ATOMIC_LOAD_MAX: + return SelectAtomic(Node, ARM64::ATOMIC_LOAD_MAX_I8, + ARM64::ATOMIC_LOAD_MAX_I16, ARM64::ATOMIC_LOAD_MAX_I32, + ARM64::ATOMIC_LOAD_MAX_I64); + case ISD::ATOMIC_LOAD_UMIN: + return SelectAtomic( + Node, ARM64::ATOMIC_LOAD_UMIN_I8, ARM64::ATOMIC_LOAD_UMIN_I16, + ARM64::ATOMIC_LOAD_UMIN_I32, ARM64::ATOMIC_LOAD_UMIN_I64); + case ISD::ATOMIC_LOAD_UMAX: + return SelectAtomic( + Node, ARM64::ATOMIC_LOAD_UMAX_I8, ARM64::ATOMIC_LOAD_UMAX_I16, + ARM64::ATOMIC_LOAD_UMAX_I32, ARM64::ATOMIC_LOAD_UMAX_I64); + case ISD::ATOMIC_SWAP: + return SelectAtomic(Node, ARM64::ATOMIC_SWAP_I8, ARM64::ATOMIC_SWAP_I16, + ARM64::ATOMIC_SWAP_I32, ARM64::ATOMIC_SWAP_I64); + case ISD::ATOMIC_CMP_SWAP: + return SelectAtomic(Node, ARM64::ATOMIC_CMP_SWAP_I8, + ARM64::ATOMIC_CMP_SWAP_I16, ARM64::ATOMIC_CMP_SWAP_I32, + ARM64::ATOMIC_CMP_SWAP_I64); + + case ISD::LOAD: { + // Try to select as an indexed load. Fall through to normal processing + // if we can't. + bool Done = false; + SDNode *I = SelectIndexedLoad(Node, Done); + if (Done) + return I; + break; + } + + case ISD::SRL: + case ISD::AND: + case ISD::SRA: + if (SDNode *I = SelectBitfieldExtractOp(Node)) + return I; + break; + + case ISD::OR: + if (SDNode *I = SelectBitfieldInsertOp(Node)) + return I; + break; + + case ISD::EXTRACT_VECTOR_ELT: { + // Extracting lane zero is a special case where we can just use a plain + // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for + // the rest of the compiler, especially the register allocator and copyi + // propagation, to reason about, so is preferred when it's possible to + // use it. + ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1)); + // Bail and use the default Select() for non-zero lanes. + if (LaneNode->getZExtValue() != 0) + break; + // If the element type is not the same as the result type, likewise + // bail and use the default Select(), as there's more to do than just + // a cross-class COPY. This catches extracts of i8 and i16 elements + // since they will need an explicit zext. + if (VT != Node->getOperand(0).getValueType().getVectorElementType()) + break; + unsigned SubReg; + switch (Node->getOperand(0) + .getValueType() + .getVectorElementType() + .getSizeInBits()) { + default: + assert(0 && "Unexpected vector element type!"); + case 64: + SubReg = ARM64::dsub; + break; + case 32: + SubReg = ARM64::ssub; + break; + case 16: // FALLTHROUGH + case 8: + llvm_unreachable("unexpected zext-requiring extract element!"); + } + SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT, + Node->getOperand(0)); + DEBUG(dbgs() << "ISEL: Custom selection!\n=> "); + DEBUG(Extract->dumpr(CurDAG)); + DEBUG(dbgs() << "\n"); + return Extract.getNode(); + } + case ISD::Constant: { + // Materialize zero constants as copies from WZR/XZR. This allows + // the coalescer to propagate these into other instructions. + ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); + if (ConstNode->isNullValue()) { + if (VT == MVT::i32) + return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), + ARM64::WZR, MVT::i32).getNode(); + else if (VT == MVT::i64) + return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), + ARM64::XZR, MVT::i64).getNode(); + } + break; + } + + case ISD::FrameIndex: { + // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. + int FI = cast<FrameIndexSDNode>(Node)->getIndex(); + unsigned Shifter = ARM64_AM::getShifterImm(ARM64_AM::LSL, 0); + const TargetLowering *TLI = getTargetLowering(); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), + CurDAG->getTargetConstant(Shifter, MVT::i32) }; + return CurDAG->SelectNodeTo(Node, ARM64::ADDXri, MVT::i64, Ops, 3); + } + case ISD::INTRINSIC_W_CHAIN: { + unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + switch (IntNo) { + default: + break; + case Intrinsic::arm64_ldxp: { + SDValue MemAddr = Node->getOperand(2); + SDLoc DL(Node); + SDValue Chain = Node->getOperand(0); + + SDNode *Ld = CurDAG->getMachineNode(ARM64::LDXPX, DL, MVT::i64, MVT::i64, + MVT::Other, MemAddr, Chain); + + // Transfer memoperands. + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand(); + cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1); + return Ld; + } + case Intrinsic::arm64_stxp: { + SDLoc DL(Node); + SDValue Chain = Node->getOperand(0); + SDValue ValLo = Node->getOperand(2); + SDValue ValHi = Node->getOperand(3); + SDValue MemAddr = Node->getOperand(4); + + // Place arguments in the right order. + SmallVector<SDValue, 7> Ops; + Ops.push_back(ValLo); + Ops.push_back(ValHi); + Ops.push_back(MemAddr); + Ops.push_back(Chain); + + SDNode *St = + CurDAG->getMachineNode(ARM64::STXPX, DL, MVT::i32, MVT::Other, Ops); + // Transfer memoperands. + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand(); + cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1); + + return St; + } + case Intrinsic::arm64_neon_ld1x2: + if (VT == MVT::v8i8) + return SelectLoad(Node, 2, ARM64::LD1Twov8b, ARM64::dsub0); + else if (VT == MVT::v16i8) + return SelectLoad(Node, 2, ARM64::LD1Twov16b, ARM64::qsub0); + else if (VT == MVT::v4i16) + return SelectLoad(Node, 2, ARM64::LD1Twov4h, ARM64::dsub0); + else if (VT == MVT::v8i16) + return SelectLoad(Node, 2, ARM64::LD1Twov8h, ARM64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectLoad(Node, 2, ARM64::LD1Twov2s, ARM64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectLoad(Node, 2, ARM64::LD1Twov4s, ARM64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectLoad(Node, 2, ARM64::LD1Twov1d, ARM64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectLoad(Node, 2, ARM64::LD1Twov2d, ARM64::qsub0); + break; + case Intrinsic::arm64_neon_ld1x3: + if (VT == MVT::v8i8) + return SelectLoad(Node, 3, ARM64::LD1Threev8b, ARM64::dsub0); + else if (VT == MVT::v16i8) + return SelectLoad(Node, 3, ARM64::LD1Threev16b, ARM64::qsub0); + else if (VT == MVT::v4i16) + return SelectLoad(Node, 3, ARM64::LD1Threev4h, ARM64::dsub0); + else if (VT == MVT::v8i16) + return SelectLoad(Node, 3, ARM64::LD1Threev8h, ARM64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectLoad(Node, 3, ARM64::LD1Threev2s, ARM64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectLoad(Node, 3, ARM64::LD1Threev4s, ARM64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectLoad(Node, 3, ARM64::LD1Threev1d, ARM64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectLoad(Node, 3, ARM64::LD1Threev2d, ARM64::qsub0); + break; + case Intrinsic::arm64_neon_ld1x4: + if (VT == MVT::v8i8) + return SelectLoad(Node, 4, ARM64::LD1Fourv8b, ARM64::dsub0); + else if (VT == MVT::v16i8) + return SelectLoad(Node, 4, ARM64::LD1Fourv16b, ARM64::qsub0); + else if (VT == MVT::v4i16) + return SelectLoad(Node, 4, ARM64::LD1Fourv4h, ARM64::dsub0); + else if (VT == MVT::v8i16) + return SelectLoad(Node, 4, ARM64::LD1Fourv8h, ARM64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectLoad(Node, 4, ARM64::LD1Fourv2s, ARM64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectLoad(Node, 4, ARM64::LD1Fourv4s, ARM64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectLoad(Node, 4, ARM64::LD1Fourv1d, ARM64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectLoad(Node, 4, ARM64::LD1Fourv2d, ARM64::qsub0); + break; + case Intrinsic::arm64_neon_ld2: + if (VT == MVT::v8i8) + return SelectLoad(Node, 2, ARM64::LD2Twov8b, ARM64::dsub0); + else if (VT == MVT::v16i8) + return SelectLoad(Node, 2, ARM64::LD2Twov16b, ARM64::qsub0); + else if (VT == MVT::v4i16) + return SelectLoad(Node, 2, ARM64::LD2Twov4h, ARM64::dsub0); + else if (VT == MVT::v8i16) + return SelectLoad(Node, 2, ARM64::LD2Twov8h, ARM64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectLoad(Node, 2, ARM64::LD2Twov2s, ARM64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectLoad(Node, 2, ARM64::LD2Twov4s, ARM64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectLoad(Node, 2, ARM64::LD1Twov1d, ARM64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectLoad(Node, 2, ARM64::LD2Twov2d, ARM64::qsub0); + break; + case Intrinsic::arm64_neon_ld3: + if (VT == MVT::v8i8) + return SelectLoad(Node, 3, ARM64::LD3Threev8b, ARM64::dsub0); + else if (VT == MVT::v16i8) + return SelectLoad(Node, 3, ARM64::LD3Threev16b, ARM64::qsub0); + else if (VT == MVT::v4i16) + return SelectLoad(Node, 3, ARM64::LD3Threev4h, ARM64::dsub0); + else if (VT == MVT::v8i16) + return SelectLoad(Node, 3, ARM64::LD3Threev8h, ARM64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectLoad(Node, 3, ARM64::LD3Threev2s, ARM64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectLoad(Node, 3, ARM64::LD3Threev4s, ARM64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectLoad(Node, 3, ARM64::LD1Threev1d, ARM64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectLoad(Node, 3, ARM64::LD3Threev2d, ARM64::qsub0); + break; + case Intrinsic::arm64_neon_ld4: + if (VT == MVT::v8i8) + return SelectLoad(Node, 4, ARM64::LD4Fourv8b, ARM64::dsub0); + else if (VT == MVT::v16i8) + return SelectLoad(Node, 4, ARM64::LD4Fourv16b, ARM64::qsub0); + else if (VT == MVT::v4i16) + return SelectLoad(Node, 4, ARM64::LD4Fourv4h, ARM64::dsub0); + else if (VT == MVT::v8i16) + return SelectLoad(Node, 4, ARM64::LD4Fourv8h, ARM64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectLoad(Node, 4, ARM64::LD4Fourv2s, ARM64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectLoad(Node, 4, ARM64::LD4Fourv4s, ARM64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectLoad(Node, 4, ARM64::LD1Fourv1d, ARM64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectLoad(Node, 4, ARM64::LD4Fourv2d, ARM64::qsub0); + break; + case Intrinsic::arm64_neon_ld2r: + if (VT == MVT::v8i8) + return SelectLoad(Node, 2, ARM64::LD2Rv8b, ARM64::dsub0); + else if (VT == MVT::v16i8) + return SelectLoad(Node, 2, ARM64::LD2Rv16b, ARM64::qsub0); + else if (VT == MVT::v4i16) + return SelectLoad(Node, 2, ARM64::LD2Rv4h, ARM64::dsub0); + else if (VT == MVT::v8i16) + return SelectLoad(Node, 2, ARM64::LD2Rv8h, ARM64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectLoad(Node, 2, ARM64::LD2Rv2s, ARM64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectLoad(Node, 2, ARM64::LD2Rv4s, ARM64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectLoad(Node, 2, ARM64::LD2Rv1d, ARM64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectLoad(Node, 2, ARM64::LD2Rv2d, ARM64::qsub0); + break; + case Intrinsic::arm64_neon_ld3r: + if (VT == MVT::v8i8) + return SelectLoad(Node, 3, ARM64::LD3Rv8b, ARM64::dsub0); + else if (VT == MVT::v16i8) + return SelectLoad(Node, 3, ARM64::LD3Rv16b, ARM64::qsub0); + else if (VT == MVT::v4i16) + return SelectLoad(Node, 3, ARM64::LD3Rv4h, ARM64::dsub0); + else if (VT == MVT::v8i16) + return SelectLoad(Node, 3, ARM64::LD3Rv8h, ARM64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectLoad(Node, 3, ARM64::LD3Rv2s, ARM64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectLoad(Node, 3, ARM64::LD3Rv4s, ARM64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectLoad(Node, 3, ARM64::LD3Rv1d, ARM64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectLoad(Node, 3, ARM64::LD3Rv2d, ARM64::qsub0); + break; + case Intrinsic::arm64_neon_ld4r: + if (VT == MVT::v8i8) + return SelectLoad(Node, 4, ARM64::LD4Rv8b, ARM64::dsub0); + else if (VT == MVT::v16i8) + return SelectLoad(Node, 4, ARM64::LD4Rv16b, ARM64::qsub0); + else if (VT == MVT::v4i16) + return SelectLoad(Node, 4, ARM64::LD4Rv4h, ARM64::dsub0); + else if (VT == MVT::v8i16) + return SelectLoad(Node, 4, ARM64::LD4Rv8h, ARM64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectLoad(Node, 4, ARM64::LD4Rv2s, ARM64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectLoad(Node, 4, ARM64::LD4Rv4s, ARM64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectLoad(Node, 4, ARM64::LD4Rv1d, ARM64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectLoad(Node, 4, ARM64::LD4Rv2d, ARM64::qsub0); + break; + case Intrinsic::arm64_neon_ld2lane: + if (VT == MVT::v16i8 || VT == MVT::v8i8) + return SelectLoadLane(Node, 2, ARM64::LD2i8); + else if (VT == MVT::v8i16 || VT == MVT::v4i16) + return SelectLoadLane(Node, 2, ARM64::LD2i16); + else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || + VT == MVT::v2f32) + return SelectLoadLane(Node, 2, ARM64::LD2i32); + else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || + VT == MVT::v1f64) + return SelectLoadLane(Node, 2, ARM64::LD2i64); + break; + case Intrinsic::arm64_neon_ld3lane: + if (VT == MVT::v16i8 || VT == MVT::v8i8) + return SelectLoadLane(Node, 3, ARM64::LD3i8); + else if (VT == MVT::v8i16 || VT == MVT::v4i16) + return SelectLoadLane(Node, 3, ARM64::LD3i16); + else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || + VT == MVT::v2f32) + return SelectLoadLane(Node, 3, ARM64::LD3i32); + else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || + VT == MVT::v1f64) + return SelectLoadLane(Node, 3, ARM64::LD3i64); + break; + case Intrinsic::arm64_neon_ld4lane: + if (VT == MVT::v16i8 || VT == MVT::v8i8) + return SelectLoadLane(Node, 4, ARM64::LD4i8); + else if (VT == MVT::v8i16 || VT == MVT::v4i16) + return SelectLoadLane(Node, 4, ARM64::LD4i16); + else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || + VT == MVT::v2f32) + return SelectLoadLane(Node, 4, ARM64::LD4i32); + else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || + VT == MVT::v1f64) + return SelectLoadLane(Node, 4, ARM64::LD4i64); + break; + } + } break; + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + switch (IntNo) { + default: + break; + case Intrinsic::arm64_neon_tbl2: + return SelectTable(Node, 2, VT == MVT::v8i8 ? ARM64::TBLv8i8Two + : ARM64::TBLv16i8Two, + false); + case Intrinsic::arm64_neon_tbl3: + return SelectTable(Node, 3, VT == MVT::v8i8 ? ARM64::TBLv8i8Three + : ARM64::TBLv16i8Three, + false); + case Intrinsic::arm64_neon_tbl4: + return SelectTable(Node, 4, VT == MVT::v8i8 ? ARM64::TBLv8i8Four + : ARM64::TBLv16i8Four, + false); + case Intrinsic::arm64_neon_tbx2: + return SelectTable(Node, 2, VT == MVT::v8i8 ? ARM64::TBXv8i8Two + : ARM64::TBXv16i8Two, + true); + case Intrinsic::arm64_neon_tbx3: + return SelectTable(Node, 3, VT == MVT::v8i8 ? ARM64::TBXv8i8Three + : ARM64::TBXv16i8Three, + true); + case Intrinsic::arm64_neon_tbx4: + return SelectTable(Node, 4, VT == MVT::v8i8 ? ARM64::TBXv8i8Four + : ARM64::TBXv16i8Four, + true); + case Intrinsic::arm64_neon_smull: + case Intrinsic::arm64_neon_umull: + if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node)) + return N; + break; + } + break; + } + case ISD::INTRINSIC_VOID: { + unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + if (Node->getNumOperands() >= 3) + VT = Node->getOperand(2)->getValueType(0); + switch (IntNo) { + default: + break; + case Intrinsic::arm64_neon_st1x2: { + if (VT == MVT::v8i8) + return SelectStore(Node, 2, ARM64::ST1Twov8b); + else if (VT == MVT::v16i8) + return SelectStore(Node, 2, ARM64::ST1Twov16b); + else if (VT == MVT::v4i16) + return SelectStore(Node, 2, ARM64::ST1Twov4h); + else if (VT == MVT::v8i16) + return SelectStore(Node, 2, ARM64::ST1Twov8h); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectStore(Node, 2, ARM64::ST1Twov2s); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectStore(Node, 2, ARM64::ST1Twov4s); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectStore(Node, 2, ARM64::ST1Twov2d); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectStore(Node, 2, ARM64::ST1Twov1d); + break; + } + case Intrinsic::arm64_neon_st1x3: { + if (VT == MVT::v8i8) + return SelectStore(Node, 3, ARM64::ST1Threev8b); + else if (VT == MVT::v16i8) + return SelectStore(Node, 3, ARM64::ST1Threev16b); + else if (VT == MVT::v4i16) + return SelectStore(Node, 3, ARM64::ST1Threev4h); + else if (VT == MVT::v8i16) + return SelectStore(Node, 3, ARM64::ST1Threev8h); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectStore(Node, 3, ARM64::ST1Threev2s); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectStore(Node, 3, ARM64::ST1Threev4s); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectStore(Node, 3, ARM64::ST1Threev2d); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectStore(Node, 3, ARM64::ST1Threev1d); + break; + } + case Intrinsic::arm64_neon_st1x4: { + if (VT == MVT::v8i8) + return SelectStore(Node, 4, ARM64::ST1Fourv8b); + else if (VT == MVT::v16i8) + return SelectStore(Node, 4, ARM64::ST1Fourv16b); + else if (VT == MVT::v4i16) + return SelectStore(Node, 4, ARM64::ST1Fourv4h); + else if (VT == MVT::v8i16) + return SelectStore(Node, 4, ARM64::ST1Fourv8h); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectStore(Node, 4, ARM64::ST1Fourv2s); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectStore(Node, 4, ARM64::ST1Fourv4s); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectStore(Node, 4, ARM64::ST1Fourv2d); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectStore(Node, 4, ARM64::ST1Fourv1d); + break; + } + case Intrinsic::arm64_neon_st2: { + if (VT == MVT::v8i8) + return SelectStore(Node, 2, ARM64::ST2Twov8b); + else if (VT == MVT::v16i8) + return SelectStore(Node, 2, ARM64::ST2Twov16b); + else if (VT == MVT::v4i16) + return SelectStore(Node, 2, ARM64::ST2Twov4h); + else if (VT == MVT::v8i16) + return SelectStore(Node, 2, ARM64::ST2Twov8h); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectStore(Node, 2, ARM64::ST2Twov2s); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectStore(Node, 2, ARM64::ST2Twov4s); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectStore(Node, 2, ARM64::ST2Twov2d); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectStore(Node, 2, ARM64::ST1Twov1d); + break; + } + case Intrinsic::arm64_neon_st3: { + if (VT == MVT::v8i8) + return SelectStore(Node, 3, ARM64::ST3Threev8b); + else if (VT == MVT::v16i8) + return SelectStore(Node, 3, ARM64::ST3Threev16b); + else if (VT == MVT::v4i16) + return SelectStore(Node, 3, ARM64::ST3Threev4h); + else if (VT == MVT::v8i16) + return SelectStore(Node, 3, ARM64::ST3Threev8h); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectStore(Node, 3, ARM64::ST3Threev2s); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectStore(Node, 3, ARM64::ST3Threev4s); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectStore(Node, 3, ARM64::ST3Threev2d); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectStore(Node, 3, ARM64::ST1Threev1d); + break; + } + case Intrinsic::arm64_neon_st4: { + if (VT == MVT::v8i8) + return SelectStore(Node, 4, ARM64::ST4Fourv8b); + else if (VT == MVT::v16i8) + return SelectStore(Node, 4, ARM64::ST4Fourv16b); + else if (VT == MVT::v4i16) + return SelectStore(Node, 4, ARM64::ST4Fourv4h); + else if (VT == MVT::v8i16) + return SelectStore(Node, 4, ARM64::ST4Fourv8h); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectStore(Node, 4, ARM64::ST4Fourv2s); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectStore(Node, 4, ARM64::ST4Fourv4s); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectStore(Node, 4, ARM64::ST4Fourv2d); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectStore(Node, 4, ARM64::ST1Fourv1d); + break; + } + case Intrinsic::arm64_neon_st2lane: { + if (VT == MVT::v16i8 || VT == MVT::v8i8) + return SelectStoreLane(Node, 2, ARM64::ST2i8); + else if (VT == MVT::v8i16 || VT == MVT::v4i16) + return SelectStoreLane(Node, 2, ARM64::ST2i16); + else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || + VT == MVT::v2f32) + return SelectStoreLane(Node, 2, ARM64::ST2i32); + else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || + VT == MVT::v1f64) + return SelectStoreLane(Node, 2, ARM64::ST2i64); + break; + } + case Intrinsic::arm64_neon_st3lane: { + if (VT == MVT::v16i8 || VT == MVT::v8i8) + return SelectStoreLane(Node, 3, ARM64::ST3i8); + else if (VT == MVT::v8i16 || VT == MVT::v4i16) + return SelectStoreLane(Node, 3, ARM64::ST3i16); + else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || + VT == MVT::v2f32) + return SelectStoreLane(Node, 3, ARM64::ST3i32); + else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || + VT == MVT::v1f64) + return SelectStoreLane(Node, 3, ARM64::ST3i64); + break; + } + case Intrinsic::arm64_neon_st4lane: { + if (VT == MVT::v16i8 || VT == MVT::v8i8) + return SelectStoreLane(Node, 4, ARM64::ST4i8); + else if (VT == MVT::v8i16 || VT == MVT::v4i16) + return SelectStoreLane(Node, 4, ARM64::ST4i16); + else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || + VT == MVT::v2f32) + return SelectStoreLane(Node, 4, ARM64::ST4i32); + else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || + VT == MVT::v1f64) + return SelectStoreLane(Node, 4, ARM64::ST4i64); + break; + } + } + } + + case ISD::FCEIL: + case ISD::FFLOOR: + case ISD::FTRUNC: + case ISD::FROUND: + if (SDNode *I = SelectLIBM(Node)) + return I; + break; + } + + // Select the default instruction + ResNode = SelectCode(Node); + + DEBUG(errs() << "=> "); + if (ResNode == NULL || ResNode == Node) + DEBUG(Node->dump(CurDAG)); + else + DEBUG(ResNode->dump(CurDAG)); + DEBUG(errs() << "\n"); + + return ResNode; +} + +/// createARM64ISelDag - This pass converts a legalized DAG into a +/// ARM64-specific DAG, ready for instruction scheduling. +FunctionPass *llvm::createARM64ISelDag(ARM64TargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new ARM64DAGToDAGISel(TM, OptLevel); +} |
