diff options
Diffstat (limited to 'lib/Target/AArch64/AArch64ISelDAGToDAG.cpp')
-rw-r--r-- | lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 247 |
1 files changed, 152 insertions, 95 deletions
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 3f49fab..87a6d80 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -303,7 +303,7 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { /// \brief Determine wether it is worth to fold V into an extended register. bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { - // it hurts if the a value is used at least twice, unless we are optimizing + // it hurts if the value is used at least twice, unless we are optimizing // for code size. if (ForCodeSize || V.hasOneUse()) return true; @@ -777,6 +777,21 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, return false; } +// Check if the given immediate is preferred by ADD. If an immediate can be +// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be +// encoded by one MOVZ, return true. +static bool isPreferredADD(int64_t ImmOff) { + // Constant in [0x0, 0xfff] can be encoded in ADD. + if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) + return true; + // Check if it can be encoded in an "ADD LSL #12". + if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) + // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. + return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && + (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; + return false; +} + bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, SDValue &Offset, SDValue &SignExtend, @@ -786,11 +801,6 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, SDValue LHS = N.getOperand(0); SDValue RHS = N.getOperand(1); - // We don't want to match immediate adds here, because they are better lowered - // to the register-immediate addressing modes. - if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) - return false; - // Check if this particular node is reused in any non-memory related // operation. If yes, do not try to fold this node into the address // computation, since the computation will be kept. @@ -800,6 +810,36 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, return false; } + // Watch out if RHS is a wide immediate, it can not be selected into + // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into + // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate + // instructions like: + // MOV X0, WideImmediate + // ADD X1, BaseReg, X0 + // LDR X2, [X1, 0] + // For such situation, using [BaseReg, XReg] addressing mode can save one + // ADD/SUB: + // MOV X0, WideImmediate + // LDR X2, [BaseReg, X0] + if (isa<ConstantSDNode>(RHS)) { + int64_t ImmOff = (int64_t)dyn_cast<ConstantSDNode>(RHS)->getZExtValue(); + unsigned Scale = Log2_32(Size); + // Skip the immediate can be seleced by load/store addressing mode. + // Also skip the immediate can be encoded by a single ADD (SUB is also + // checked by using -ImmOff). + if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || + isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) + return false; + + SDLoc DL(N.getNode()); + SDValue Ops[] = { RHS }; + SDNode *MOVI = + CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); + SDValue MOVIV = SDValue(MOVI, 0); + // This ADD of two X register will be selected into [Reg+Reg] mode. + N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); + } + // Remember if it is worth folding N when it produces extended register. bool IsExtendedRegisterWorthFolding = isWorthFolding(N); @@ -1381,20 +1421,21 @@ static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, return true; } -static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, - unsigned &LSB, unsigned &MSB) { - // We are looking for the following pattern which basically extracts a single - // bit from the source value and places it in the LSB of the destination - // value, all other bits of the destination value or set to zero: +static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, + SDValue &Opd0, unsigned &LSB, + unsigned &MSB) { + // We are looking for the following pattern which basically extracts several + // continuous bits from the source value and places it from the LSB of the + // destination value, all other bits of the destination value or set to zero: // // Value2 = AND Value, MaskImm // SRL Value2, ShiftImm // - // with MaskImm >> ShiftImm == 1. + // with MaskImm >> ShiftImm to search for the bit width. // // This gets selected into a single UBFM: // - // UBFM Value, ShiftImm, ShiftImm + // UBFM Value, ShiftImm, BitWide + Srl_imm -1 // if (N->getOpcode() != ISD::SRL) @@ -1410,15 +1451,16 @@ static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, if (!isIntImmediate(N->getOperand(1), Srl_imm)) return false; - // Check whether we really have a one bit extract here. - if (And_mask >> Srl_imm == 0x1) { + // Check whether we really have several bits extract here. + unsigned BitWide = 64 - CountLeadingOnes_64(~(And_mask >> Srl_imm)); + if (BitWide && isMask_64(And_mask >> Srl_imm)) { if (N->getValueType(0) == MVT::i32) Opc = AArch64::UBFMWri; else Opc = AArch64::UBFMXri; - LSB = MSB = Srl_imm; - + LSB = Srl_imm; + MSB = BitWide + Srl_imm - 1; return true; } @@ -1439,8 +1481,8 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, assert((VT == MVT::i32 || VT == MVT::i64) && "Type checking must have been done before calling this function"); - // Check for AND + SRL doing a one bit extract. - if (isOneBitExtractOpFromShr(N, Opc, Opd0, LSB, MSB)) + // Check for AND + SRL doing several bits extract. + if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, LSB, MSB)) return true; // we're looking for a shift of a shift @@ -2116,7 +2158,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { case 32: SubReg = AArch64::ssub; break; - case 16: // FALLTHROUGH + case 16: + SubReg = AArch64::hsub; + break; case 8: llvm_unreachable("unexpected zext-requiring extract element!"); } @@ -2204,9 +2248,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); else if (VT == MVT::v16i8) return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); @@ -2222,9 +2266,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); else if (VT == MVT::v16i8) return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); @@ -2240,9 +2284,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); else if (VT == MVT::v16i8) return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); @@ -2258,9 +2302,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); else if (VT == MVT::v16i8) return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); @@ -2276,9 +2320,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); else if (VT == MVT::v16i8) return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); @@ -2294,9 +2338,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); else if (VT == MVT::v16i8) return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); @@ -2312,9 +2356,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); else if (VT == MVT::v16i8) return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); @@ -2330,9 +2374,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); else if (VT == MVT::v16i8) return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); @@ -2348,9 +2392,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); else if (VT == MVT::v16i8) return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); @@ -2364,7 +2408,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { case Intrinsic::aarch64_neon_ld2lane: if (VT == MVT::v16i8 || VT == MVT::v8i8) return SelectLoadLane(Node, 2, AArch64::LD2i8); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) + else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || + VT == MVT::v8f16) return SelectLoadLane(Node, 2, AArch64::LD2i16); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) @@ -2376,7 +2421,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { case Intrinsic::aarch64_neon_ld3lane: if (VT == MVT::v16i8 || VT == MVT::v8i8) return SelectLoadLane(Node, 3, AArch64::LD3i8); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) + else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || + VT == MVT::v8f16) return SelectLoadLane(Node, 3, AArch64::LD3i16); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) @@ -2388,7 +2434,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { case Intrinsic::aarch64_neon_ld4lane: if (VT == MVT::v16i8 || VT == MVT::v8i8) return SelectLoadLane(Node, 4, AArch64::LD4i8); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) + else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || + VT == MVT::v8f16) return SelectLoadLane(Node, 4, AArch64::LD4i16); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) @@ -2448,9 +2495,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectStore(Node, 2, AArch64::ST1Twov8b); else if (VT == MVT::v16i8) return SelectStore(Node, 2, AArch64::ST1Twov16b); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectStore(Node, 2, AArch64::ST1Twov4h); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectStore(Node, 2, AArch64::ST1Twov8h); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectStore(Node, 2, AArch64::ST1Twov2s); @@ -2467,9 +2514,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectStore(Node, 3, AArch64::ST1Threev8b); else if (VT == MVT::v16i8) return SelectStore(Node, 3, AArch64::ST1Threev16b); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectStore(Node, 3, AArch64::ST1Threev4h); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectStore(Node, 3, AArch64::ST1Threev8h); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectStore(Node, 3, AArch64::ST1Threev2s); @@ -2486,9 +2533,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectStore(Node, 4, AArch64::ST1Fourv8b); else if (VT == MVT::v16i8) return SelectStore(Node, 4, AArch64::ST1Fourv16b); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectStore(Node, 4, AArch64::ST1Fourv4h); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectStore(Node, 4, AArch64::ST1Fourv8h); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectStore(Node, 4, AArch64::ST1Fourv2s); @@ -2505,9 +2552,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectStore(Node, 2, AArch64::ST2Twov8b); else if (VT == MVT::v16i8) return SelectStore(Node, 2, AArch64::ST2Twov16b); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectStore(Node, 2, AArch64::ST2Twov4h); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectStore(Node, 2, AArch64::ST2Twov8h); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectStore(Node, 2, AArch64::ST2Twov2s); @@ -2524,9 +2571,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectStore(Node, 3, AArch64::ST3Threev8b); else if (VT == MVT::v16i8) return SelectStore(Node, 3, AArch64::ST3Threev16b); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectStore(Node, 3, AArch64::ST3Threev4h); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectStore(Node, 3, AArch64::ST3Threev8h); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectStore(Node, 3, AArch64::ST3Threev2s); @@ -2543,9 +2590,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectStore(Node, 4, AArch64::ST4Fourv8b); else if (VT == MVT::v16i8) return SelectStore(Node, 4, AArch64::ST4Fourv16b); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectStore(Node, 4, AArch64::ST4Fourv4h); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectStore(Node, 4, AArch64::ST4Fourv8h); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectStore(Node, 4, AArch64::ST4Fourv2s); @@ -2560,7 +2607,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { case Intrinsic::aarch64_neon_st2lane: { if (VT == MVT::v16i8 || VT == MVT::v8i8) return SelectStoreLane(Node, 2, AArch64::ST2i8); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) + else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || + VT == MVT::v8f16) return SelectStoreLane(Node, 2, AArch64::ST2i16); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) @@ -2573,7 +2621,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { case Intrinsic::aarch64_neon_st3lane: { if (VT == MVT::v16i8 || VT == MVT::v8i8) return SelectStoreLane(Node, 3, AArch64::ST3i8); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) + else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || + VT == MVT::v8f16) return SelectStoreLane(Node, 3, AArch64::ST3i16); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) @@ -2586,7 +2635,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { case Intrinsic::aarch64_neon_st4lane: { if (VT == MVT::v16i8 || VT == MVT::v8i8) return SelectStoreLane(Node, 4, AArch64::ST4i8); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) + else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || + VT == MVT::v8f16) return SelectStoreLane(Node, 4, AArch64::ST4i16); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) @@ -2603,9 +2653,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); else if (VT == MVT::v16i8) return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); @@ -2622,9 +2672,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); else if (VT == MVT::v16i8) return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); @@ -2641,9 +2691,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); else if (VT == MVT::v16i8) return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); @@ -2660,9 +2710,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); else if (VT == MVT::v16i8) return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); @@ -2679,9 +2729,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); else if (VT == MVT::v16i8) return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); @@ -2698,9 +2748,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); else if (VT == MVT::v16i8) return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); @@ -2717,9 +2767,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); else if (VT == MVT::v16i8) return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); @@ -2736,9 +2786,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); else if (VT == MVT::v16i8) return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); @@ -2755,9 +2805,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); else if (VT == MVT::v16i8) return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); @@ -2774,9 +2824,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); else if (VT == MVT::v16i8) return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); @@ -2791,7 +2841,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { case AArch64ISD::LD1LANEpost: { if (VT == MVT::v16i8 || VT == MVT::v8i8) return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) + else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || + VT == MVT::v8f16) return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) @@ -2804,7 +2855,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { case AArch64ISD::LD2LANEpost: { if (VT == MVT::v16i8 || VT == MVT::v8i8) return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) + else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || + VT == MVT::v8f16) return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) @@ -2817,7 +2869,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { case AArch64ISD::LD3LANEpost: { if (VT == MVT::v16i8 || VT == MVT::v8i8) return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) + else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || + VT == MVT::v8f16) return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) @@ -2830,7 +2883,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { case AArch64ISD::LD4LANEpost: { if (VT == MVT::v16i8 || VT == MVT::v8i8) return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) + else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || + VT == MVT::v8f16) return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) @@ -2846,9 +2900,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); else if (VT == MVT::v16i8) return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); @@ -2866,9 +2920,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); else if (VT == MVT::v16i8) return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); @@ -2886,9 +2940,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); else if (VT == MVT::v16i8) return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); @@ -2906,9 +2960,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); else if (VT == MVT::v16i8) return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); @@ -2926,9 +2980,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); else if (VT == MVT::v16i8) return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); @@ -2946,9 +3000,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); else if (VT == MVT::v16i8) return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); - else if (VT == MVT::v4i16) + else if (VT == MVT::v4i16 || VT == MVT::v4f16) return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); - else if (VT == MVT::v8i16) + else if (VT == MVT::v8i16 || VT == MVT::v8f16) return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); else if (VT == MVT::v2i32 || VT == MVT::v2f32) return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); @@ -2964,7 +3018,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v16i8 || VT == MVT::v8i8) return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) + else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || + VT == MVT::v8f16) return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) @@ -2978,7 +3033,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v16i8 || VT == MVT::v8i8) return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) + else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || + VT == MVT::v8f16) return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) @@ -2992,7 +3048,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { VT = Node->getOperand(1).getValueType(); if (VT == MVT::v16i8 || VT == MVT::v8i8) return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) + else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || + VT == MVT::v8f16) return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || VT == MVT::v2f32) |