diff options
Diffstat (limited to 'lib/Target/ARM/ARMBaseInstrInfo.cpp')
| -rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 92 |
1 files changed, 57 insertions, 35 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index f835a4e..47f5bf9 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -283,7 +283,7 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, // Walk backwards from the end of the basic block until the branch is // analyzed or we give up. - while (isPredicated(I) || I->isTerminator()) { + while (isPredicated(I) || I->isTerminator() || I->isDebugValue()) { // Flag to be raised on unanalyzeable instructions. This is useful in cases // where we want to clean up on the end of the basic block before we bail @@ -336,7 +336,7 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, // If we can modify the function, delete everything below this // unconditional branch. if (AllowModify) { - MachineBasicBlock::iterator DI = llvm::next(I); + MachineBasicBlock::iterator DI = std::next(I); while (DI != MBB.end()) { MachineInstr *InstToDelete = DI; ++DI; @@ -535,6 +535,20 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { return true; } +template<> bool IsCPSRDead<MachineInstr>(MachineInstr* MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isUndef() || MO.isUse()) + continue; + if (MO.getReg() != ARM::CPSR) + continue; + if (!MO.isDead()) + return false; + } + // all definitions of CPSR are dead + return true; +} + /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. LLVM_ATTRIBUTE_NOINLINE static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, @@ -559,15 +573,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { // If this machine instr is an inline asm, measure it. if (MI->getOpcode() == ARM::INLINEASM) return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); - if (MI->isLabel()) - return 0; unsigned Opc = MI->getOpcode(); switch (Opc) { - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - case TargetOpcode::PROLOG_LABEL: - case TargetOpcode::EH_LABEL: - case TargetOpcode::DBG_VALUE: + default: + // pseudo-instruction sizes are zero. return 0; case TargetOpcode::BUNDLE: return getInstBundleLength(MI); @@ -630,9 +639,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { ++NumEntries; return NumEntries * EntrySize + InstSize; } - default: - // Otherwise, pseudo-instruction sizes are zero. - return 0; } } @@ -1243,6 +1249,7 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { unsigned PCLabelId = AFI->createPICLabelUId(); ARMConstantPoolValue *NewCPV = 0; + // FIXME: The below assumes PIC relocation model and that the function // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR @@ -1325,10 +1332,11 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic || - Opcode == ARM::MOV_ga_dyn || + Opcode == ARM::LDRLIT_ga_pcrel || + Opcode == ARM::LDRLIT_ga_pcrel_ldr || + Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || - Opcode == ARM::t2MOV_ga_dyn || Opcode == ARM::t2MOV_ga_pcrel) { if (MI1->getOpcode() != Opcode) return false; @@ -1340,10 +1348,11 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, if (MO0.getOffset() != MO1.getOffset()) return false; - if (Opcode == ARM::MOV_ga_dyn || + if (Opcode == ARM::LDRLIT_ga_pcrel || + Opcode == ARM::LDRLIT_ga_pcrel_ldr || + Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || - Opcode == ARM::t2MOV_ga_dyn || Opcode == ARM::t2MOV_ga_pcrel) // Ignore the PC labels. return MO0.getGlobal() == MO1.getGlobal(); @@ -1534,7 +1543,7 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, return false; // Terminators and labels can't be scheduled around. - if (MI->isTerminator() || MI->isLabel()) + if (MI->isTerminator() || MI->isPosition()) return true; // Treat the start of the IT block as a scheduling boundary, but schedule @@ -1857,12 +1866,21 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, } } -bool llvm::tryFoldSPUpdateIntoPushPop(MachineFunction &MF, - MachineInstr *MI, +static bool isAnySubRegLive(unsigned Reg, const TargetRegisterInfo *TRI, + MachineInstr *MI) { + for (MCSubRegIterator Subreg(Reg, TRI, /* IncludeSelf */ true); + Subreg.isValid(); ++Subreg) + if (MI->getParent()->computeRegisterLiveness(TRI, *Subreg, MI) != + MachineBasicBlock::LQR_Dead) + return true; + return false; +} +bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, + MachineFunction &MF, MachineInstr *MI, unsigned NumBytes) { // This optimisation potentially adds lots of load and store // micro-operations, it's only really a great benefit to code-size. - if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize)) + if (!Subtarget.isMinSize()) return false; // If only one register is pushed/popped, LLVM can use an LDR/STR @@ -1911,7 +1929,6 @@ bool llvm::tryFoldSPUpdateIntoPushPop(MachineFunction &MF, for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) RegList.push_back(MI->getOperand(i)); - MachineBasicBlock *MBB = MI->getParent(); const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo(); const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); @@ -1932,9 +1949,11 @@ bool llvm::tryFoldSPUpdateIntoPushPop(MachineFunction &MF, // registers live within the function we might clobber a return value // register; the other way a register can be live here is if it's // callee-saved. + // TODO: Currently, computeRegisterLiveness() does not report "live" if a + // sub reg is live. When computeRegisterLiveness() works for sub reg, it + // can replace isAnySubRegLive(). if (isCalleeSavedRegister(CurReg, CSRegs) || - MBB->computeRegisterLiveness(TRI, CurReg, MI) != - MachineBasicBlock::LQR_Dead) { + isAnySubRegLive(CurReg, TRI, MI)) { // VFP pops don't allow holes in the register list, so any skip is fatal // for our transformation. GPR pops do, so we should just keep looking. if (IsVFPPushPop) @@ -2159,7 +2178,7 @@ static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, // Walk down one instruction which is potentially an 'and'. const MachineInstr &Copy = *MI; MachineBasicBlock::iterator AND( - llvm::next(MachineBasicBlock::iterator(MI))); + std::next(MachineBasicBlock::iterator(MI))); if (AND == MI->getParent()->end()) return false; MI = AND; return isSuitableForMask(MI, Copy.getOperand(0).getReg(), @@ -2236,8 +2255,9 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, if (CmpMask != ~0) { if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) { MI = 0; - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg), - UE = MRI->use_end(); UI != UE; ++UI) { + for (MachineRegisterInfo::use_instr_iterator + UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end(); + UI != UE; ++UI) { if (UI->getParent() != CmpInstr->getParent()) continue; MachineInstr *PotentialAND = &*UI; if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) || @@ -2947,7 +2967,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, break; } return UOps; - } else if (Subtarget.isCortexA8()) { + } else if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { if (NumRegs < 4) return 2; // 4 registers would be issued: 2, 2. @@ -2984,7 +3004,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, return ItinData->getOperandCycle(DefClass, DefIdx); int DefCycle; - if (Subtarget.isCortexA8()) { + if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { // (regno / 2) + (regno % 2) + 1 DefCycle = RegNo / 2 + 1; if (RegNo % 2) @@ -3025,7 +3045,7 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, return ItinData->getOperandCycle(DefClass, DefIdx); int DefCycle; - if (Subtarget.isCortexA8()) { + if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { // 4 registers would be issued: 1, 2, 1. // 5 registers would be issued: 1, 2, 2. DefCycle = RegNo / 2; @@ -3059,7 +3079,7 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, return ItinData->getOperandCycle(UseClass, UseIdx); int UseCycle; - if (Subtarget.isCortexA8()) { + if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { // (regno / 2) + (regno % 2) + 1 UseCycle = RegNo / 2 + 1; if (RegNo % 2) @@ -3099,7 +3119,7 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, return ItinData->getOperandCycle(UseClass, UseIdx); int UseCycle; - if (Subtarget.isCortexA8()) { + if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { UseCycle = RegNo / 2; if (UseCycle < 2) UseCycle = 2; @@ -3236,8 +3256,7 @@ static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, Dist = 0; MachineBasicBlock::const_iterator I = MI; ++I; - MachineBasicBlock::const_instr_iterator II = - llvm::prior(I.getInstrIterator()); + MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator()); assert(II->isInsideBundle() && "Empty bundle?"); int Idx = -1; @@ -3290,7 +3309,7 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget, const MachineInstr *DefMI, const MCInstrDesc *DefMCID, unsigned DefAlign) { int Adjust = 0; - if (Subtarget.isCortexA8() || Subtarget.isLikeA9()) { + if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. switch (DefMCID->getOpcode()) { @@ -3591,7 +3610,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, UseMCID, UseIdx, UseAlign); if (Latency > 1 && - (Subtarget.isCortexA8() || Subtarget.isLikeA9())) { + (Subtarget.isCortexA8() || Subtarget.isLikeA9() || + Subtarget.isCortexA7())) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. switch (DefMCID.getOpcode()) { @@ -3684,6 +3704,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD3d16Pseudo: case ARM::VLD3d32Pseudo: case ARM::VLD1d64TPseudo: + case ARM::VLD1d64TPseudoWB_fixed: case ARM::VLD3d8Pseudo_UPD: case ARM::VLD3d16Pseudo_UPD: case ARM::VLD3d32Pseudo_UPD: @@ -3700,6 +3721,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD4d16Pseudo: case ARM::VLD4d32Pseudo: case ARM::VLD1d64QPseudo: + case ARM::VLD1d64QPseudoWB_fixed: case ARM::VLD4d8Pseudo_UPD: case ARM::VLD4d16Pseudo_UPD: case ARM::VLD4d32Pseudo_UPD: |
