diff options
Diffstat (limited to 'lib/Target/ARM/ARMISelLowering.cpp')
| -rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 216 |
1 files changed, 154 insertions, 62 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 4ae4af1..fb738cd 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -506,6 +506,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); setTargetDAGCombine(ISD::STORE); + setTargetDAGCombine(ISD::FP_TO_SINT); + setTargetDAGCombine(ISD::FP_TO_UINT); + setTargetDAGCombine(ISD::FDIV); } computeRegisterProperties(); @@ -974,12 +977,12 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { // Load are scheduled for latency even if there instruction itinerary // is not available. const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); - if (TID.getNumDefs() == 0) + if (MCID.getNumDefs() == 0) return Sched::RegPressure; if (!Itins->isEmpty() && - Itins->getOperandCycle(TID.getSchedClass(), 0) > 2) + Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) return Sched::Latency; return Sched::RegPressure; @@ -5523,7 +5526,7 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, return SDValue(); } -// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction +// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction // (only after legalization). static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, @@ -5554,25 +5557,25 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, SDNode *V = Vec.getNode(); unsigned nextIndex = 0; - // For each operands to the ADD which are BUILD_VECTORs, + // For each operands to the ADD which are BUILD_VECTORs, // check to see if each of their operands are an EXTRACT_VECTOR with // the same vector and appropriate index. for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) { if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { - + SDValue ExtVec0 = N0->getOperand(i); SDValue ExtVec1 = N1->getOperand(i); - + // First operand is the vector, verify its the same. if (V != ExtVec0->getOperand(0).getNode() || V != ExtVec1->getOperand(0).getNode()) return SDValue(); - + // Second is the constant, verify its correct. ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1)); ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1)); - + // For the constant, we want to see all the even or all the odd. if (!C0 || !C1 || C0->getZExtValue() != nextIndex || C1->getZExtValue() != nextIndex+1) @@ -5580,7 +5583,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, // Increment index. nextIndex+=2; - } else + } else return SDValue(); } @@ -5595,7 +5598,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, // Input is the vector. Ops.push_back(Vec); - + // Get widened type and narrowed type. MVT widenType; unsigned numElem = VT.getVectorNumElements(); @@ -5624,7 +5627,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget); if (Result.getNode()) return Result; - + // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { SDValue Result = combineSelectAndUse(N, N0, N1, DCI); @@ -6479,7 +6482,105 @@ static SDValue PerformVDUPLANECombine(SDNode *N, return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op); } -/// getVShiftImm - Check if this is a valid build_vector for the immediate +// isConstVecPow2 - Return true if each vector element is a power of 2, all +// elements are the same constant, C, and Log2(C) ranges from 1 to 32. +static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C) +{ + integerPart cN; + integerPart c0 = 0; + for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements(); + I != E; I++) { + ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I)); + if (!C) + return false; + + bool isExact; + APFloat APF = C->getValueAPF(); + if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact) + != APFloat::opOK || !isExact) + return false; + + c0 = (I == 0) ? cN : c0; + if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32) + return false; + } + C = c0; + return true; +} + +/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) +/// can replace combinations of VMUL and VCVT (floating-point to integer) +/// when the VMUL has a constant operand that is a power of 2. +/// +/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): +/// vmul.f32 d16, d17, d16 +/// vcvt.s32.f32 d16, d16 +/// becomes: +/// vcvt.s32.f32 d16, d16, #3 +static SDValue PerformVCVTCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + SDValue Op = N->getOperand(0); + + if (!Subtarget->hasNEON() || !Op.getValueType().isVector() || + Op.getOpcode() != ISD::FMUL) + return SDValue(); + + uint64_t C; + SDValue N0 = Op->getOperand(0); + SDValue ConstVec = Op->getOperand(1); + bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; + + if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || + !isConstVecPow2(ConstVec, isSigned, C)) + return SDValue(); + + unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : + Intrinsic::arm_neon_vcvtfp2fxu; + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), + N->getValueType(0), + DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, + DAG.getConstant(Log2_64(C), MVT::i32)); +} + +/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) +/// can replace combinations of VCVT (integer to floating-point) and VDIV +/// when the VDIV has a constant operand that is a power of 2. +/// +/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): +/// vcvt.f32.s32 d16, d16 +/// vdiv.f32 d16, d17, d16 +/// becomes: +/// vcvt.f32.s32 d16, d16, #3 +static SDValue PerformVDIVCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + SDValue Op = N->getOperand(0); + unsigned OpOpcode = Op.getNode()->getOpcode(); + + if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() || + (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP)) + return SDValue(); + + uint64_t C; + SDValue ConstVec = N->getOperand(1); + bool isSigned = OpOpcode == ISD::SINT_TO_FP; + + if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || + !isConstVecPow2(ConstVec, isSigned, C)) + return SDValue(); + + unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : + Intrinsic::arm_neon_vcvtfxu2fp; + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), + Op.getValueType(), + DAG.getConstant(IntrinsicOpcode, MVT::i32), + Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32)); +} + +/// Getvshiftimm - Check if this is a valid build_vector for the immediate /// operand of a vector shift operation, where all the elements of the /// build_vector must have the same constant integer value. static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { @@ -6868,6 +6969,9 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget); + case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: case ISD::SRA: @@ -7378,6 +7482,10 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const { default: break; case 'l': return C_RegisterClass; case 'w': return C_RegisterClass; + case 'h': return C_RegisterClass; + case 'x': return C_RegisterClass; + case 't': return C_RegisterClass; + case 'j': return C_Other; // Constant for movw. } } else if (Constraint.size() == 2) { switch (Constraint[0]) { @@ -7423,26 +7531,43 @@ ARMTargetLowering::getSingleConstraintMatchWeight( return weight; } -std::pair<unsigned, const TargetRegisterClass*> +typedef std::pair<unsigned, const TargetRegisterClass*> RCPair; +RCPair ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { if (Constraint.size() == 1) { // GCC ARM Constraint Letters switch (Constraint[0]) { - case 'l': + case 'l': // Low regs or general regs. if (Subtarget->isThumb()) - return std::make_pair(0U, ARM::tGPRRegisterClass); + return RCPair(0U, ARM::tGPRRegisterClass); else - return std::make_pair(0U, ARM::GPRRegisterClass); + return RCPair(0U, ARM::GPRRegisterClass); + case 'h': // High regs or no regs. + if (Subtarget->isThumb()) + return RCPair(0U, ARM::hGPRRegisterClass); + break; case 'r': - return std::make_pair(0U, ARM::GPRRegisterClass); + return RCPair(0U, ARM::GPRRegisterClass); case 'w': if (VT == MVT::f32) - return std::make_pair(0U, ARM::SPRRegisterClass); + return RCPair(0U, ARM::SPRRegisterClass); if (VT.getSizeInBits() == 64) - return std::make_pair(0U, ARM::DPRRegisterClass); + return RCPair(0U, ARM::DPRRegisterClass); if (VT.getSizeInBits() == 128) - return std::make_pair(0U, ARM::QPRRegisterClass); + return RCPair(0U, ARM::QPRRegisterClass); + break; + case 'x': + if (VT == MVT::f32) + return RCPair(0U, ARM::SPR_8RegisterClass); + if (VT.getSizeInBits() == 64) + return RCPair(0U, ARM::DPR_8RegisterClass); + if (VT.getSizeInBits() == 128) + return RCPair(0U, ARM::QPR_8RegisterClass); + break; + case 't': + if (VT == MVT::f32) + return RCPair(0U, ARM::SPRRegisterClass); break; } } @@ -7452,47 +7577,6 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } -std::vector<unsigned> ARMTargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { - if (Constraint.size() != 1) - return std::vector<unsigned>(); - - switch (Constraint[0]) { // GCC ARM Constraint Letters - default: break; - case 'l': - return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R4, ARM::R5, ARM::R6, ARM::R7, - 0); - case 'r': - return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R4, ARM::R5, ARM::R6, ARM::R7, - ARM::R8, ARM::R9, ARM::R10, ARM::R11, - ARM::R12, ARM::LR, 0); - case 'w': - if (VT == MVT::f32) - return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3, - ARM::S4, ARM::S5, ARM::S6, ARM::S7, - ARM::S8, ARM::S9, ARM::S10, ARM::S11, - ARM::S12,ARM::S13,ARM::S14,ARM::S15, - ARM::S16,ARM::S17,ARM::S18,ARM::S19, - ARM::S20,ARM::S21,ARM::S22,ARM::S23, - ARM::S24,ARM::S25,ARM::S26,ARM::S27, - ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0); - if (VT.getSizeInBits() == 64) - return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3, - ARM::D4, ARM::D5, ARM::D6, ARM::D7, - ARM::D8, ARM::D9, ARM::D10,ARM::D11, - ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0); - if (VT.getSizeInBits() == 128) - return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, - ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0); - break; - } - - return std::vector<unsigned>(); -} - /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, @@ -7507,6 +7591,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { default: break; + case 'j': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); @@ -7521,6 +7606,13 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; switch (ConstraintLetter) { + case 'j': + // Constant suitable for movw, must be between 0 and + // 65535. + if (Subtarget->hasV6T2Ops()) + if (CVal >= 0 && CVal <= 65535) + break; + return; case 'I': if (Subtarget->isThumb1Only()) { // This must be a constant between 0 and 255, for ADD |
