aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/ARM/ARMISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM/ARMISelLowering.cpp')
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp216
1 files changed, 154 insertions, 62 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 4ae4af1..fb738cd 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -506,6 +506,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::FP_TO_SINT);
+ setTargetDAGCombine(ISD::FP_TO_UINT);
+ setTargetDAGCombine(ISD::FDIV);
}
computeRegisterProperties();
@@ -974,12 +977,12 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
// Load are scheduled for latency even if there instruction itinerary
// is not available.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
- if (TID.getNumDefs() == 0)
+ if (MCID.getNumDefs() == 0)
return Sched::RegPressure;
if (!Itins->isEmpty() &&
- Itins->getOperandCycle(TID.getSchedClass(), 0) > 2)
+ Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
return Sched::Latency;
return Sched::RegPressure;
@@ -5523,7 +5526,7 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
return SDValue();
}
-// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
+// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
// (only after legalization).
static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
TargetLowering::DAGCombinerInfo &DCI,
@@ -5554,25 +5557,25 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
SDNode *V = Vec.getNode();
unsigned nextIndex = 0;
- // For each operands to the ADD which are BUILD_VECTORs,
+ // For each operands to the ADD which are BUILD_VECTORs,
// check to see if each of their operands are an EXTRACT_VECTOR with
// the same vector and appropriate index.
for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
&& N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
-
+
SDValue ExtVec0 = N0->getOperand(i);
SDValue ExtVec1 = N1->getOperand(i);
-
+
// First operand is the vector, verify its the same.
if (V != ExtVec0->getOperand(0).getNode() ||
V != ExtVec1->getOperand(0).getNode())
return SDValue();
-
+
// Second is the constant, verify its correct.
ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
-
+
// For the constant, we want to see all the even or all the odd.
if (!C0 || !C1 || C0->getZExtValue() != nextIndex
|| C1->getZExtValue() != nextIndex+1)
@@ -5580,7 +5583,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
// Increment index.
nextIndex+=2;
- } else
+ } else
return SDValue();
}
@@ -5595,7 +5598,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
// Input is the vector.
Ops.push_back(Vec);
-
+
// Get widened type and narrowed type.
MVT widenType;
unsigned numElem = VT.getVectorNumElements();
@@ -5624,7 +5627,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);
if (Result.getNode())
return Result;
-
+
// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
@@ -6479,7 +6482,105 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
}
-/// getVShiftImm - Check if this is a valid build_vector for the immediate
+// isConstVecPow2 - Return true if each vector element is a power of 2, all
+// elements are the same constant, C, and Log2(C) ranges from 1 to 32.
+static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
+{
+ integerPart cN;
+ integerPart c0 = 0;
+ for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements();
+ I != E; I++) {
+ ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I));
+ if (!C)
+ return false;
+
+ bool isExact;
+ APFloat APF = C->getValueAPF();
+ if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact)
+ != APFloat::opOK || !isExact)
+ return false;
+
+ c0 = (I == 0) ? cN : c0;
+ if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32)
+ return false;
+ }
+ C = c0;
+ return true;
+}
+
+/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
+/// can replace combinations of VMUL and VCVT (floating-point to integer)
+/// when the VMUL has a constant operand that is a power of 2.
+///
+/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
+/// vmul.f32 d16, d17, d16
+/// vcvt.s32.f32 d16, d16
+/// becomes:
+/// vcvt.s32.f32 d16, d16, #3
+static SDValue PerformVCVTCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op = N->getOperand(0);
+
+ if (!Subtarget->hasNEON() || !Op.getValueType().isVector() ||
+ Op.getOpcode() != ISD::FMUL)
+ return SDValue();
+
+ uint64_t C;
+ SDValue N0 = Op->getOperand(0);
+ SDValue ConstVec = Op->getOperand(1);
+ bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+ if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
+ !isConstVecPow2(ConstVec, isSigned, C))
+ return SDValue();
+
+ unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
+ Intrinsic::arm_neon_vcvtfp2fxu;
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+ N->getValueType(0),
+ DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
+ DAG.getConstant(Log2_64(C), MVT::i32));
+}
+
+/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
+/// can replace combinations of VCVT (integer to floating-point) and VDIV
+/// when the VDIV has a constant operand that is a power of 2.
+///
+/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
+/// vcvt.f32.s32 d16, d16
+/// vdiv.f32 d16, d17, d16
+/// becomes:
+/// vcvt.f32.s32 d16, d16, #3
+static SDValue PerformVDIVCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op = N->getOperand(0);
+ unsigned OpOpcode = Op.getNode()->getOpcode();
+
+ if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() ||
+ (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
+ return SDValue();
+
+ uint64_t C;
+ SDValue ConstVec = N->getOperand(1);
+ bool isSigned = OpOpcode == ISD::SINT_TO_FP;
+
+ if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
+ !isConstVecPow2(ConstVec, isSigned, C))
+ return SDValue();
+
+ unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
+ Intrinsic::arm_neon_vcvtfxu2fp;
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+ Op.getValueType(),
+ DAG.getConstant(IntrinsicOpcode, MVT::i32),
+ Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32));
+}
+
+/// Getvshiftimm - Check if this is a valid build_vector for the immediate
/// operand of a vector shift operation, where all the elements of the
/// build_vector must have the same constant integer value.
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
@@ -6868,6 +6969,9 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget);
+ case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget);
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
case ISD::SRA:
@@ -7378,6 +7482,10 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
default: break;
case 'l': return C_RegisterClass;
case 'w': return C_RegisterClass;
+ case 'h': return C_RegisterClass;
+ case 'x': return C_RegisterClass;
+ case 't': return C_RegisterClass;
+ case 'j': return C_Other; // Constant for movw.
}
} else if (Constraint.size() == 2) {
switch (Constraint[0]) {
@@ -7423,26 +7531,43 @@ ARMTargetLowering::getSingleConstraintMatchWeight(
return weight;
}
-std::pair<unsigned, const TargetRegisterClass*>
+typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
+RCPair
ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const {
if (Constraint.size() == 1) {
// GCC ARM Constraint Letters
switch (Constraint[0]) {
- case 'l':
+ case 'l': // Low regs or general regs.
if (Subtarget->isThumb())
- return std::make_pair(0U, ARM::tGPRRegisterClass);
+ return RCPair(0U, ARM::tGPRRegisterClass);
else
- return std::make_pair(0U, ARM::GPRRegisterClass);
+ return RCPair(0U, ARM::GPRRegisterClass);
+ case 'h': // High regs or no regs.
+ if (Subtarget->isThumb())
+ return RCPair(0U, ARM::hGPRRegisterClass);
+ break;
case 'r':
- return std::make_pair(0U, ARM::GPRRegisterClass);
+ return RCPair(0U, ARM::GPRRegisterClass);
case 'w':
if (VT == MVT::f32)
- return std::make_pair(0U, ARM::SPRRegisterClass);
+ return RCPair(0U, ARM::SPRRegisterClass);
if (VT.getSizeInBits() == 64)
- return std::make_pair(0U, ARM::DPRRegisterClass);
+ return RCPair(0U, ARM::DPRRegisterClass);
if (VT.getSizeInBits() == 128)
- return std::make_pair(0U, ARM::QPRRegisterClass);
+ return RCPair(0U, ARM::QPRRegisterClass);
+ break;
+ case 'x':
+ if (VT == MVT::f32)
+ return RCPair(0U, ARM::SPR_8RegisterClass);
+ if (VT.getSizeInBits() == 64)
+ return RCPair(0U, ARM::DPR_8RegisterClass);
+ if (VT.getSizeInBits() == 128)
+ return RCPair(0U, ARM::QPR_8RegisterClass);
+ break;
+ case 't':
+ if (VT == MVT::f32)
+ return RCPair(0U, ARM::SPRRegisterClass);
break;
}
}
@@ -7452,47 +7577,6 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
}
-std::vector<unsigned> ARMTargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const {
- if (Constraint.size() != 1)
- return std::vector<unsigned>();
-
- switch (Constraint[0]) { // GCC ARM Constraint Letters
- default: break;
- case 'l':
- return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R4, ARM::R5, ARM::R6, ARM::R7,
- 0);
- case 'r':
- return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R4, ARM::R5, ARM::R6, ARM::R7,
- ARM::R8, ARM::R9, ARM::R10, ARM::R11,
- ARM::R12, ARM::LR, 0);
- case 'w':
- if (VT == MVT::f32)
- return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3,
- ARM::S4, ARM::S5, ARM::S6, ARM::S7,
- ARM::S8, ARM::S9, ARM::S10, ARM::S11,
- ARM::S12,ARM::S13,ARM::S14,ARM::S15,
- ARM::S16,ARM::S17,ARM::S18,ARM::S19,
- ARM::S20,ARM::S21,ARM::S22,ARM::S23,
- ARM::S24,ARM::S25,ARM::S26,ARM::S27,
- ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0);
- if (VT.getSizeInBits() == 64)
- return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3,
- ARM::D4, ARM::D5, ARM::D6, ARM::D7,
- ARM::D8, ARM::D9, ARM::D10,ARM::D11,
- ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
- if (VT.getSizeInBits() == 128)
- return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
- ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0);
- break;
- }
-
- return std::vector<unsigned>();
-}
-
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
@@ -7507,6 +7591,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
char ConstraintLetter = Constraint[0];
switch (ConstraintLetter) {
default: break;
+ case 'j':
case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O':
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
@@ -7521,6 +7606,13 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
return;
switch (ConstraintLetter) {
+ case 'j':
+ // Constant suitable for movw, must be between 0 and
+ // 65535.
+ if (Subtarget->hasV6T2Ops())
+ if (CVal >= 0 && CVal <= 65535)
+ break;
+ return;
case 'I':
if (Subtarget->isThumb1Only()) {
// This must be a constant between 0 and 255, for ADD