diff options
Diffstat (limited to 'lib/Target/ARM/ARMBaseInstrInfo.cpp')
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 244 |
1 files changed, 204 insertions, 40 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 9315348..8bf5475 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -146,7 +146,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); - bool isLoad = !MCID.mayStore(); + bool isLoad = !MI->mayStore(); const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); const MachineOperand &Base = MI->getOperand(2); const MachineOperand &Offset = MI->getOperand(NumOps-3); @@ -439,6 +439,22 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { return false; } +bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const { + if (MI->isBundle()) { + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + int PIdx = I->findFirstPredOperandIdx(); + if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL) + return true; + } + return false; + } + + int PIdx = MI->findFirstPredOperandIdx(); + return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; +} + bool ARMBaseInstrInfo:: PredicateInstruction(MachineInstr *MI, const SmallVectorImpl<MachineOperand> &Pred) const { @@ -491,7 +507,7 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, std::vector<MachineOperand> &Pred) const { // FIXME: This confuses implicit_def with optional CPSR def. const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef()) + if (!MCID.getImplicitDefs() && !MI->hasOptionalDef()) return false; bool Found = false; @@ -510,11 +526,10 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, /// By default, this returns true for every instruction with a /// PredicateOperand. bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.isPredicable()) + if (!MI->isPredicable()) return false; - if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { + if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { ARMFunctionInfo *AFI = MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); return AFI->isThumb2Function(); @@ -548,7 +563,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); if (MI->isLabel()) return 0; - unsigned Opc = MI->getOpcode(); + unsigned Opc = MI->getOpcode(); switch (Opc) { case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: @@ -556,6 +571,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case TargetOpcode::EH_LABEL: case TargetOpcode::DBG_VALUE: return 0; + case TargetOpcode::BUNDLE: + return getInstBundleLength(MI); case ARM::MOVi16_ga_pcrel: case ARM::MOVTi16_ga_pcrel: case ARM::t2MOVi16_ga_pcrel: @@ -593,7 +610,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); unsigned NumOps = MCID.getNumOperands(); MachineOperand JTOP = - MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2)); + MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); unsigned JTI = JTOP.getIndex(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); assert(MJTI != 0); @@ -622,6 +639,17 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { return 0; // Not reached } +unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const { + unsigned Size = 0; + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + assert(!I->isBundle() && "No nested bundle!"); + Size += GetInstSizeInBytes(&*I); + } + return Size; +} + void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -845,7 +873,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const { const MachineMemOperand *Dummy; - return MI->getDesc().mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); + return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); } void ARMBaseInstrInfo:: @@ -991,7 +1019,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const { const MachineMemOperand *Dummy; - return MI->getDesc().mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); + return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); } bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ @@ -1357,7 +1385,7 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, return false; // Terminators and labels can't be scheduled around. - if (MI->getDesc().isTerminator() || MI->isLabel()) + if (MI->isTerminator() || MI->isLabel()) return true; // Treat the start of the IT block as a scheduling boundary, but schedule @@ -1762,8 +1790,7 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, // Check that CPSR isn't set between the comparison instruction and the one we // want to change. - MachineBasicBlock::const_iterator I = CmpInstr, E = MI, - B = MI->getParent()->begin(); + MachineBasicBlock::iterator I = CmpInstr,E = MI, B = MI->getParent()->begin(); // Early exit if CmpInstr is at the beginning of the BB. if (I == B) return false; @@ -1957,7 +1984,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, bool isKill = UseMI->getOperand(OpIdx).isKill(); unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(), - *UseMI, UseMI->getDebugLoc(), + UseMI, UseMI->getDebugLoc(), get(NewUseOpc), NewReg) .addReg(Reg1, getKillRegState(isKill)) .addImm(SOImmValV1))); @@ -2332,6 +2359,59 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return UseCycle; } +static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, + const MachineInstr *MI, unsigned Reg, + unsigned &DefIdx, unsigned &Dist) { + Dist = 0; + + MachineBasicBlock::const_iterator I = MI; ++I; + MachineBasicBlock::const_instr_iterator II = + llvm::prior(I.getInstrIterator()); + assert(II->isInsideBundle() && "Empty bundle?"); + + int Idx = -1; + while (II->isInsideBundle()) { + Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI); + if (Idx != -1) + break; + --II; + ++Dist; + } + + assert(Idx != -1 && "Cannot find bundled definition!"); + DefIdx = Idx; + return II; +} + +static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, + const MachineInstr *MI, unsigned Reg, + unsigned &UseIdx, unsigned &Dist) { + Dist = 0; + + MachineBasicBlock::const_instr_iterator II = MI; ++II; + assert(II->isInsideBundle() && "Empty bundle?"); + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + + // FIXME: This doesn't properly handle multiple uses. + int Idx = -1; + while (II != E && II->isInsideBundle()) { + Idx = II->findRegisterUseOperandIdx(Reg, false, TRI); + if (Idx != -1) + break; + if (II->getOpcode() != ARM::t2IT) + ++Dist; + ++II; + } + + if (Idx == -1) { + Dist = 0; + return 0; + } + + UseIdx = Idx; + return II; +} + int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, @@ -2340,35 +2420,77 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, DefMI->isRegSequence() || DefMI->isImplicitDef()) return 1; - const MCInstrDesc &DefMCID = DefMI->getDesc(); if (!ItinData || ItinData->isEmpty()) - return DefMCID.mayLoad() ? 3 : 1; + return DefMI->mayLoad() ? 3 : 1; - const MCInstrDesc &UseMCID = UseMI->getDesc(); + const MCInstrDesc *DefMCID = &DefMI->getDesc(); + const MCInstrDesc *UseMCID = &UseMI->getDesc(); const MachineOperand &DefMO = DefMI->getOperand(DefIdx); - if (DefMO.getReg() == ARM::CPSR) { + unsigned Reg = DefMO.getReg(); + if (Reg == ARM::CPSR) { if (DefMI->getOpcode() == ARM::FMSTAT) { // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) return Subtarget.isCortexA9() ? 1 : 20; } // CPSR set and branch can be paired in the same cycle. - if (UseMCID.isBranch()) + if (UseMI->isBranch()) return 0; + + // Otherwise it takes the instruction latency (generally one). + int Latency = getInstrLatency(ItinData, DefMI); + + // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to + // its uses. Instructions which are otherwise scheduled between them may + // incur a code size penalty (not able to use the CPSR setting 16-bit + // instructions). + if (Latency > 0 && Subtarget.isThumb2()) { + const MachineFunction *MF = DefMI->getParent()->getParent(); + if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + --Latency; + } + return Latency; } unsigned DefAlign = DefMI->hasOneMemOperand() ? (*DefMI->memoperands_begin())->getAlignment() : 0; unsigned UseAlign = UseMI->hasOneMemOperand() ? (*UseMI->memoperands_begin())->getAlignment() : 0; - int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, - UseMCID, UseIdx, UseAlign); + + unsigned DefAdj = 0; + if (DefMI->isBundle()) { + DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj); + if (DefMI->isCopyLike() || DefMI->isInsertSubreg() || + DefMI->isRegSequence() || DefMI->isImplicitDef()) + return 1; + DefMCID = &DefMI->getDesc(); + } + unsigned UseAdj = 0; + if (UseMI->isBundle()) { + unsigned NewUseIdx; + const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI, + Reg, NewUseIdx, UseAdj); + if (NewUseMI) { + UseMI = NewUseMI; + UseIdx = NewUseIdx; + UseMCID = &UseMI->getDesc(); + } + } + + int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign, + *UseMCID, UseIdx, UseAlign); + int Adj = DefAdj + UseAdj; + if (Adj) { + Latency -= (int)(DefAdj + UseAdj); + if (Latency < 1) + return 1; + } if (Latency > 1 && (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. - switch (DefMCID.getOpcode()) { + switch (DefMCID->getOpcode()) { default: break; case ARM::LDRrs: case ARM::LDRBrs: { @@ -2393,7 +2515,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } if (DefAlign < 8 && Subtarget.isCortexA9()) - switch (DefMCID.getOpcode()) { + switch (DefMCID->getOpcode()) { default: break; case ARM::VLD1q8: case ARM::VLD1q16: @@ -2413,12 +2535,18 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD2q8: case ARM::VLD2q16: case ARM::VLD2q32: - case ARM::VLD2d8_UPD: - case ARM::VLD2d16_UPD: - case ARM::VLD2d32_UPD: - case ARM::VLD2q8_UPD: - case ARM::VLD2q16_UPD: - case ARM::VLD2q32_UPD: + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d32wb_fixed: + case ARM::VLD2q8wb_fixed: + case ARM::VLD2q16wb_fixed: + case ARM::VLD2q32wb_fixed: + case ARM::VLD2d8wb_register: + case ARM::VLD2d16wb_register: + case ARM::VLD2d32wb_register: + case ARM::VLD2q8wb_register: + case ARM::VLD2q16wb_register: + case ARM::VLD2q32wb_register: case ARM::VLD3d8: case ARM::VLD3d16: case ARM::VLD3d32: @@ -2446,9 +2574,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD1DUPq8: case ARM::VLD1DUPq16: case ARM::VLD1DUPq32: - case ARM::VLD1DUPq8_UPD: - case ARM::VLD1DUPq16_UPD: - case ARM::VLD1DUPq32_UPD: + case ARM::VLD1DUPq8wb_fixed: + case ARM::VLD1DUPq16wb_fixed: + case ARM::VLD1DUPq32wb_fixed: + case ARM::VLD1DUPq8wb_register: + case ARM::VLD1DUPq16wb_register: + case ARM::VLD1DUPq32wb_register: case ARM::VLD2DUPd8: case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: @@ -2580,12 +2711,18 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD2q8Pseudo: case ARM::VLD2q16Pseudo: case ARM::VLD2q32Pseudo: - case ARM::VLD2d8Pseudo_UPD: - case ARM::VLD2d16Pseudo_UPD: - case ARM::VLD2d32Pseudo_UPD: - case ARM::VLD2q8Pseudo_UPD: - case ARM::VLD2q16Pseudo_UPD: - case ARM::VLD2q32Pseudo_UPD: + case ARM::VLD2d8PseudoWB_fixed: + case ARM::VLD2d16PseudoWB_fixed: + case ARM::VLD2d32PseudoWB_fixed: + case ARM::VLD2q8PseudoWB_fixed: + case ARM::VLD2q16PseudoWB_fixed: + case ARM::VLD2q32PseudoWB_fixed: + case ARM::VLD2d8PseudoWB_register: + case ARM::VLD2d16PseudoWB_register: + case ARM::VLD2d32PseudoWB_register: + case ARM::VLD2q8PseudoWB_register: + case ARM::VLD2q16PseudoWB_register: + case ARM::VLD2q32PseudoWB_register: case ARM::VLD3d8Pseudo: case ARM::VLD3d16Pseudo: case ARM::VLD3d32Pseudo: @@ -2621,9 +2758,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD1DUPq8Pseudo: case ARM::VLD1DUPq16Pseudo: case ARM::VLD1DUPq32Pseudo: - case ARM::VLD1DUPq8Pseudo_UPD: - case ARM::VLD1DUPq16Pseudo_UPD: - case ARM::VLD1DUPq32Pseudo_UPD: + case ARM::VLD1DUPq8PseudoWB_fixed: + case ARM::VLD1DUPq16PseudoWB_fixed: + case ARM::VLD1DUPq32PseudoWB_fixed: + case ARM::VLD1DUPq8PseudoWB_register: + case ARM::VLD1DUPq16PseudoWB_register: + case ARM::VLD1DUPq32PseudoWB_register: case ARM::VLD2DUPd8Pseudo: case ARM::VLD2DUPd16Pseudo: case ARM::VLD2DUPd32Pseudo: @@ -2671,6 +2811,19 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } +unsigned +ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *DepMI) const { + unsigned Reg = DefMI->getOperand(DefIdx).getReg(); + if (DepMI->readsRegister(Reg, &getRegisterInfo()) || !isPredicated(DepMI)) + return 1; + + // If the second MI is predicated, then there is an implicit use dependency. + return getOperandLatency(ItinData, DefMI, DefIdx, DepMI, + DepMI->getNumOperands()); +} + int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost) const { @@ -2681,6 +2834,17 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, if (!ItinData || ItinData->isEmpty()) return 1; + if (MI->isBundle()) { + int Latency = 0; + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + if (I->getOpcode() != ARM::t2IT) + Latency += getInstrLatency(ItinData, I, PredCost); + } + return Latency; + } + const MCInstrDesc &MCID = MI->getDesc(); unsigned Class = MCID.getSchedClass(); unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; |