diff options
| author | Stephen Hines <srhines@google.com> | 2013-01-21 13:15:17 -0800 |
|---|---|---|
| committer | Stephen Hines <srhines@google.com> | 2013-01-21 13:15:17 -0800 |
| commit | 059800f9e3fee2852672f846d91a2da14da7783a (patch) | |
| tree | a6ef16b7263252ae1b8069295ea9cbbae0d9467d /lib/Target/ARM/ARMBaseInstrInfo.cpp | |
| parent | cbefa15de4821975bb99fc6d74b3bdb42b2df45c (diff) | |
| parent | b6714227eda5d499f7667fc865f931126a8dc488 (diff) | |
| download | external_llvm-059800f9e3fee2852672f846d91a2da14da7783a.zip external_llvm-059800f9e3fee2852672f846d91a2da14da7783a.tar.gz external_llvm-059800f9e3fee2852672f846d91a2da14da7783a.tar.bz2 | |
Merge remote-tracking branch 'upstream/master' into merge-llvm
Conflicts:
lib/CodeGen/AsmPrinter/AsmPrinter.cpp
lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
lib/MC/MCAssembler.cpp
lib/Support/Atomic.cpp
lib/Support/Memory.cpp
lib/Target/ARM/ARMJITInfo.cpp
Change-Id: Ib339baf88df5b04870c8df1bedcfe1f877ccab8d
Diffstat (limited to 'lib/Target/ARM/ARMBaseInstrInfo.cpp')
| -rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 654 |
1 files changed, 597 insertions, 57 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index e2f0d7d..0076910 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -18,9 +18,7 @@ #include "ARMHazardRecognizer.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/GlobalValue.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -29,12 +27,14 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/ADT/STLExtras.h" #define GET_INSTRINFO_CTOR #include "ARMGenInstrInfo.inc" @@ -49,6 +49,11 @@ static cl::opt<bool> WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true), cl::desc("Widen ARM vmovs to vmovd when possible")); +static cl::opt<unsigned> +SwiftPartialUpdateClearance("swift-partial-update-clearance", + cl::Hidden, cl::init(12), + cl::desc("Clearance before partial register updates")); + /// ARM_MLxEntry - Record information about MLA / MLS instructions. struct ARM_MLxEntry { uint16_t MLxOpc; // MLA / MLS opcode @@ -101,7 +106,7 @@ CreateTargetHazardRecognizer(const TargetMachine *TM, const InstrItineraryData *II = TM->getInstrItineraryData(); return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); } - return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG); + return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG); } ScheduleHazardRecognizer *ARMBaseInstrInfo:: @@ -110,7 +115,7 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, if (Subtarget.isThumb2() || Subtarget.hasVFP2()) return (ScheduleHazardRecognizer *) new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG); - return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG); + return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); } MachineInstr * @@ -459,8 +464,9 @@ PredicateInstruction(MachineInstr *MI, unsigned Opc = MI->getOpcode(); if (isUncondBranchOpcode(Opc)) { MI->setDesc(get(getMatchingCondBranchOpcode(Opc))); - MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm())); - MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addImm(Pred[0].getImm()) + .addReg(Pred[1].getReg()); return true; } @@ -697,6 +703,8 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3; else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4; + else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) + Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2; else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2; @@ -786,6 +794,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA)) + .addFrameIndex(FI)) + .addMemOperand(MMO); + MIB = AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); } else llvm_unreachable("Unknown reg class!"); break; @@ -933,6 +948,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = @@ -958,6 +974,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (ARM::DPRRegClass.hasSubClassEq(RC)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { + unsigned LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA : ARM::LDMIA; + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(LdmOpc)) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + MIB.addReg(DestReg, RegState::ImplicitDefine); } else llvm_unreachable("Unknown reg class!"); break; @@ -1130,6 +1155,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ // All clear, widen the COPY. DEBUG(dbgs() << "widening: " << *MI); + MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg // or some other super-register. @@ -1141,14 +1167,14 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ MI->setDesc(get(ARM::VMOVD)); MI->getOperand(0).setReg(DstRegD); MI->getOperand(1).setReg(SrcRegD); - AddDefaultPred(MachineInstrBuilder(MI)); + AddDefaultPred(MIB); // We are now reading SrcRegD instead of SrcRegS. This may upset the // register scavenger and machine verifier, so we need to indicate that we // are reading an undefined value from SrcRegD, but a proper value from // SrcRegS. MI->getOperand(1).setIsUndef(); - MachineInstrBuilder(MI).addReg(SrcRegS, RegState::Implicit); + MIB.addReg(SrcRegS, RegState::Implicit); // SrcRegD may actually contain an unrelated value in the ssub_1 // sub-register. Don't kill it. Only kill the ssub_0 sub-register. @@ -1245,7 +1271,7 @@ reMaterialize(MachineBasicBlock &MBB, MachineInstr * ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { - MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF); + MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF); switch(Orig->getOpcode()) { case ARM::tLDRpci_pic: case ARM::t2LDRpci_pic: { @@ -1349,6 +1375,9 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, /// only return true if the base pointers are the same and the only differences /// between the two addresses is the offset. It also returns the offsets by /// reference. +/// +/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched +/// is permanently disabled. bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const { @@ -1389,7 +1418,6 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case ARM::VLDRD: case ARM::VLDRS: case ARM::t2LDRi8: - case ARM::t2LDRDi8: case ARM::t2LDRSHi8: case ARM::t2LDRi12: case ARM::t2LDRSHi12: @@ -1424,6 +1452,9 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, /// from the common base address. It returns true if it decides it's desirable /// to schedule the two loads together. "NumLoads" is the number of loads that /// have already been scheduled after Load1. +/// +/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched +/// is permanently disabled. bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const { @@ -1528,6 +1559,14 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB, return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost; } +bool +ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, + MachineBasicBlock &FMBB) const { + // Reduce false anti-dependencies to let Swift's out-of-order execution + // engine do its thing. + return Subtarget.isSwift(); +} + /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. @@ -1567,7 +1606,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { // MOVCC AL can't be inverted. Shouldn't happen. if (CC == ARMCC::AL || PredReg != ARM::CPSR) return NULL; - MI = TargetInstrInfoImpl::commuteInstruction(MI, NewMI); + MI = TargetInstrInfo::commuteInstruction(MI, NewMI); if (!MI) return NULL; // After swapping the MOVCC operands, also invert the condition. @@ -1576,7 +1615,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { return MI; } } - return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); + return TargetInstrInfo::commuteInstruction(MI, NewMI); } /// Identify instructions that can be folded into a MOVCC instruction, and @@ -1679,7 +1718,7 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, // same register as operand 0. MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); FalseReg.setImplicit(); - NewMI->addOperand(FalseReg); + NewMI.addOperand(FalseReg); NewMI->tieOperands(0, NewMI->getNumOperands() - 1); // The caller will erase MI, but not DefMI. @@ -2342,6 +2381,260 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, return true; } +static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, + const MachineInstr *MI) { + switch (MI->getOpcode()) { + default: { + const MCInstrDesc &Desc = MI->getDesc(); + int UOps = ItinData->getNumMicroOps(Desc.getSchedClass()); + assert(UOps >= 0 && "bad # UOps"); + return UOps; + } + + case ARM::LDRrs: + case ARM::LDRBrs: + case ARM::STRrs: + case ARM::STRBrs: { + unsigned ShOpVal = MI->getOperand(3).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 1; + return 2; + } + + case ARM::LDRH: + case ARM::STRH: { + if (!MI->getOperand(2).getReg()) + return 1; + + unsigned ShOpVal = MI->getOperand(3).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 1; + return 2; + } + + case ARM::LDRSB: + case ARM::LDRSH: + return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2; + + case ARM::LDRSB_POST: + case ARM::LDRSH_POST: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + return (Rt == Rm) ? 4 : 3; + } + + case ARM::LDR_PRE_REG: + case ARM::LDRB_PRE_REG: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + if (Rt == Rm) + return 3; + unsigned ShOpVal = MI->getOperand(4).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 2; + return 3; + } + + case ARM::STR_PRE_REG: + case ARM::STRB_PRE_REG: { + unsigned ShOpVal = MI->getOperand(4).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 2; + return 3; + } + + case ARM::LDRH_PRE: + case ARM::STRH_PRE: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + if (!Rm) + return 2; + if (Rt == Rm) + return 3; + return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) + ? 3 : 2; + } + + case ARM::LDR_POST_REG: + case ARM::LDRB_POST_REG: + case ARM::LDRH_POST: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + return (Rt == Rm) ? 3 : 2; + } + + case ARM::LDR_PRE_IMM: + case ARM::LDRB_PRE_IMM: + case ARM::LDR_POST_IMM: + case ARM::LDRB_POST_IMM: + case ARM::STRB_POST_IMM: + case ARM::STRB_POST_REG: + case ARM::STRB_PRE_IMM: + case ARM::STRH_POST: + case ARM::STR_POST_IMM: + case ARM::STR_POST_REG: + case ARM::STR_PRE_IMM: + return 2; + + case ARM::LDRSB_PRE: + case ARM::LDRSH_PRE: { + unsigned Rm = MI->getOperand(3).getReg(); + if (Rm == 0) + return 3; + unsigned Rt = MI->getOperand(0).getReg(); + if (Rt == Rm) + return 4; + unsigned ShOpVal = MI->getOperand(4).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 3; + return 4; + } + + case ARM::LDRD: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rn = MI->getOperand(2).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + if (Rm) + return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; + return (Rt == Rn) ? 3 : 2; + } + + case ARM::STRD: { + unsigned Rm = MI->getOperand(3).getReg(); + if (Rm) + return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; + return 2; + } + + case ARM::LDRD_POST: + case ARM::t2LDRD_POST: + return 3; + + case ARM::STRD_POST: + case ARM::t2STRD_POST: + return 4; + + case ARM::LDRD_PRE: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rn = MI->getOperand(3).getReg(); + unsigned Rm = MI->getOperand(4).getReg(); + if (Rm) + return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; + return (Rt == Rn) ? 4 : 3; + } + + case ARM::t2LDRD_PRE: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rn = MI->getOperand(3).getReg(); + return (Rt == Rn) ? 4 : 3; + } + + case ARM::STRD_PRE: { + unsigned Rm = MI->getOperand(4).getReg(); + if (Rm) + return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; + return 3; + } + + case ARM::t2STRD_PRE: + return 3; + + case ARM::t2LDR_POST: + case ARM::t2LDRB_POST: + case ARM::t2LDRB_PRE: + case ARM::t2LDRSBi12: + case ARM::t2LDRSBi8: + case ARM::t2LDRSBpci: + case ARM::t2LDRSBs: + case ARM::t2LDRH_POST: + case ARM::t2LDRH_PRE: + case ARM::t2LDRSBT: + case ARM::t2LDRSB_POST: + case ARM::t2LDRSB_PRE: + case ARM::t2LDRSH_POST: + case ARM::t2LDRSH_PRE: + case ARM::t2LDRSHi12: + case ARM::t2LDRSHi8: + case ARM::t2LDRSHpci: + case ARM::t2LDRSHs: + return 2; + + case ARM::t2LDRDi8: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rn = MI->getOperand(2).getReg(); + return (Rt == Rn) ? 3 : 2; + } + + case ARM::t2STRB_POST: + case ARM::t2STRB_PRE: + case ARM::t2STRBs: + case ARM::t2STRDi8: + case ARM::t2STRH_POST: + case ARM::t2STRH_PRE: + case ARM::t2STRHs: + case ARM::t2STR_POST: + case ARM::t2STR_PRE: + case ARM::t2STRs: + return 2; + } +} + +// Return the number of 32-bit words loaded by LDM or stored by STM. If this +// can't be easily determined return 0 (missing MachineMemOperand). +// +// FIXME: The current MachineInstr design does not support relying on machine +// mem operands to determine the width of a memory access. Instead, we expect +// the target to provide this information based on the instruction opcode and +// operands. However, using MachineMemOperand is a the best solution now for +// two reasons: +// +// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI +// operands. This is much more dangerous than using the MachineMemOperand +// sizes because CodeGen passes can insert/remove optional machine operands. In +// fact, it's totally incorrect for preRA passes and appears to be wrong for +// postRA passes as well. +// +// 2) getNumLDMAddresses is only used by the scheduling machine model and any +// machine model that calls this should handle the unknown (zero size) case. +// +// Long term, we should require a target hook that verifies MachineMemOperand +// sizes during MC lowering. That target hook should be local to MC lowering +// because we can't ensure that it is aware of other MI forms. Doing this will +// ensure that MachineMemOperands are correctly propagated through all passes. +unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const { + unsigned Size = 0; + for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), + E = MI->memoperands_end(); I != E; ++I) { + Size += (*I)->getSize(); + } + return Size / 4; +} + unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr *MI) const { @@ -2351,8 +2644,12 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, const MCInstrDesc &Desc = MI->getDesc(); unsigned Class = Desc.getSchedClass(); int ItinUOps = ItinData->getNumMicroOps(Class); - if (ItinUOps >= 0) + if (ItinUOps >= 0) { + if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore())) + return getNumMicroOpsSwiftLdSt(ItinData, MI); + return ItinUOps; + } unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -2421,7 +2718,43 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, case ARM::t2STMIA_UPD: case ARM::t2STMDB_UPD: { unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; - if (Subtarget.isCortexA8()) { + if (Subtarget.isSwift()) { + // rdar://8402126 + int UOps = 1 + NumRegs; // One for address computation, one for each ld / st. + switch (Opc) { + default: break; + case ARM::VLDMDIA_UPD: + case ARM::VLDMDDB_UPD: + case ARM::VLDMSIA_UPD: + case ARM::VLDMSDB_UPD: + case ARM::VSTMDIA_UPD: + case ARM::VSTMDDB_UPD: + case ARM::VSTMSIA_UPD: + case ARM::VSTMSDB_UPD: + case ARM::LDMIA_UPD: + case ARM::LDMDA_UPD: + case ARM::LDMDB_UPD: + case ARM::LDMIB_UPD: + case ARM::STMIA_UPD: + case ARM::STMDA_UPD: + case ARM::STMDB_UPD: + case ARM::STMIB_UPD: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA_UPD: + case ARM::t2LDMIA_UPD: + case ARM::t2LDMDB_UPD: + case ARM::t2STMIA_UPD: + case ARM::t2STMDB_UPD: + ++UOps; // One for base register writeback. + break; + case ARM::LDMIA_RET: + case ARM::tPOP_RET: + case ARM::t2LDMIA_RET: + UOps += 2; // One for base reg wb, one for write to pc. + break; + } + return UOps; + } else if (Subtarget.isCortexA8()) { if (NumRegs < 4) return 2; // 4 registers would be issued: 2, 2. @@ -2430,7 +2763,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, if (NumRegs % 2) ++A8UOps; return A8UOps; - } else if (Subtarget.isCortexA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { int A9UOps = (NumRegs / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. @@ -2463,7 +2796,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, DefCycle = RegNo / 2 + 1; if (RegNo % 2) ++DefCycle; - } else if (Subtarget.isCortexA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { DefCycle = RegNo; bool isSLoad = false; @@ -2507,7 +2840,7 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, DefCycle = 1; // Result latency is issue cycle + 2: E2. DefCycle += 2; - } else if (Subtarget.isCortexA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { DefCycle = (RegNo / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. @@ -2538,7 +2871,7 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, UseCycle = RegNo / 2 + 1; if (RegNo % 2) ++UseCycle; - } else if (Subtarget.isCortexA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { UseCycle = RegNo; bool isSStore = false; @@ -2579,7 +2912,7 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, UseCycle = 2; // Read in E3. UseCycle += 2; - } else if (Subtarget.isCortexA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { UseCycle = (RegNo / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. @@ -2764,7 +3097,7 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget, const MachineInstr *DefMI, const MCInstrDesc *DefMCID, unsigned DefAlign) { int Adjust = 0; - if (Subtarget.isCortexA8() || Subtarget.isCortexA9()) { + if (Subtarget.isCortexA8() || Subtarget.isLikeA9()) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. switch (DefMCID->getOpcode()) { @@ -2789,9 +3122,40 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget, break; } } + } else if (Subtarget.isSwift()) { + // FIXME: Properly handle all of the latency adjustments for address + // writeback. + switch (DefMCID->getOpcode()) { + default: break; + case ARM::LDRrs: + case ARM::LDRBrs: { + unsigned ShOpVal = DefMI->getOperand(3).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + Adjust -= 2; + else if (!isSub && + ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) + --Adjust; + break; + } + case ARM::t2LDRs: + case ARM::t2LDRBs: + case ARM::t2LDRHs: + case ARM::t2LDRSHs: { + // Thumb2 mode: lsl only. + unsigned ShAmt = DefMI->getOperand(3).getImm(); + if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3) + Adjust -= 2; + break; + } + } } - if (DefAlign < 8 && Subtarget.isCortexA9()) { + if (DefAlign < 8 && Subtarget.isLikeA9()) { switch (DefMCID->getOpcode()) { default: break; case ARM::VLD1q8: @@ -2949,7 +3313,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, if (Reg == ARM::CPSR) { if (DefMI->getOpcode() == ARM::FMSTAT) { // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) - return Subtarget.isCortexA9() ? 1 : 20; + return Subtarget.isLikeA9() ? 1 : 20; } // CPSR set and branch can be paired in the same cycle. @@ -2965,7 +3329,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // instructions). if (Latency > 0 && Subtarget.isThumb2()) { const MachineFunction *MF = DefMI->getParent()->getParent(); - if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + if (MF->getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize)) --Latency; } return Latency; @@ -3015,7 +3381,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, if (!UseNode->isMachineOpcode()) { int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); - if (Subtarget.isCortexA9()) + if (Subtarget.isLikeA9() || Subtarget.isSwift()) return Latency <= 2 ? 1 : Latency - 1; else return Latency <= 3 ? 1 : Latency - 2; @@ -3032,7 +3398,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, UseMCID, UseIdx, UseAlign); if (Latency > 1 && - (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { + (Subtarget.isCortexA8() || Subtarget.isLikeA9())) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. switch (DefMCID.getOpcode()) { @@ -3059,9 +3425,36 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, break; } } + } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) { + // FIXME: Properly handle all of the latency adjustments for address + // writeback. + switch (DefMCID.getOpcode()) { + default: break; + case ARM::LDRrs: + case ARM::LDRBrs: { + unsigned ShOpVal = + cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) + Latency -= 2; + else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) + --Latency; + break; + } + case ARM::t2LDRs: + case ARM::t2LDRBs: + case ARM::t2LDRHs: + case ARM::t2LDRSHs: { + // Thumb2 mode: lsl 0-3 only. + Latency -= 2; + break; + } + } } - if (DefAlign < 8 && Subtarget.isCortexA9()) + if (DefAlign < 8 && Subtarget.isLikeA9()) switch (DefMCID.getOpcode()) { default: break; case ARM::VLD1q8: @@ -3185,18 +3578,6 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } -unsigned -ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *DepMI) const { - unsigned Reg = DefMI->getOperand(DefIdx).getReg(); - if (DepMI->readsRegister(Reg, &getRegisterInfo()) || !isPredicated(DepMI)) - return 1; - - // If the second MI is predicated, then there is an implicit use dependency. - return getInstrLatency(ItinData, DefMI); -} - unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost) const { @@ -3354,9 +3735,9 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); - // Cortex-A9 is particularly picky about mixing the two and wants these + // A9-like cores are particularly picky about mixing the two and want these // converted. - if (Subtarget.isCortexA9() && !isPredicated(MI) && + if (Subtarget.isLikeA9() && !isPredicated(MI) && (MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR || MI->getOpcode() == ARM::VMOVS)) @@ -3394,12 +3775,54 @@ static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, return DReg; } +/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, +/// set ImplicitSReg to a register number that must be marked as implicit-use or +/// zero if no register needs to be defined as implicit-use. +/// +/// If the function cannot determine if an SPR should be marked implicit use or +/// not, it returns false. +/// +/// This function handles cases where an instruction is being modified from taking +/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict +/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other +/// lane of the DPR). +/// +/// If the other SPR is defined, an implicit-use of it should be added. Else, +/// (including the case where the DPR itself is defined), it should not. +/// +static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, + MachineInstr *MI, + unsigned DReg, unsigned Lane, + unsigned &ImplicitSReg) { + // If the DPR is defined or used already, the other SPR lane will be chained + // correctly, so there is nothing to be done. + if (MI->definesRegister(DReg, TRI) || MI->readsRegister(DReg, TRI)) { + ImplicitSReg = 0; + return true; + } + + // Otherwise we need to go searching to see if the SPR is set explicitly. + ImplicitSReg = TRI->getSubReg(DReg, + (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1); + MachineBasicBlock::LivenessQueryResult LQR = + MI->getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI); + + if (LQR == MachineBasicBlock::LQR_Live) + return true; + else if (LQR == MachineBasicBlock::LQR_Unknown) + return false; + + // If the register is known not to be live, there is no need to add an + // implicit-use. + ImplicitSReg = 0; + return true; +} void ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { unsigned DstReg, SrcReg, DReg; unsigned Lane; - MachineInstrBuilder MIB(MI); + MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); const TargetRegisterInfo *TRI = &getRegisterInfo(); switch (MI->getOpcode()) { default: @@ -3451,7 +3874,7 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // was dead before here. MIB.addReg(SrcReg, RegState::Implicit); break; - case ARM::VMOVSR: + case ARM::VMOVSR: { if (Domain != ExeNEON) break; assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); @@ -3462,12 +3885,9 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane); - // If we insert both a novel <def> and an <undef> on the DReg, we break - // any existing dependency chain on the unused lane. Either already being - // present means this instruction is in that chain anyway so we can make - // the transformation. - if (!MI->definesRegister(DReg, TRI) && !MI->readsRegister(DReg, TRI)) - break; + unsigned ImplicitSReg; + if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg)) + break; for (unsigned i = MI->getDesc().getNumOperands(); i; --i) MI->RemoveOperand(i-1); @@ -3484,7 +3904,10 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // The narrower destination must be marked as set to keep previous chains // in place. MIB.addReg(DstReg, RegState::Define | RegState::Implicit); + if (ImplicitSReg != 0) + MIB.addReg(ImplicitSReg, RegState::Implicit); break; + } case ARM::VMOVS: { if (Domain != ExeNEON) break; @@ -3497,12 +3920,9 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane); DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane); - // If we insert both a novel <def> and an <undef> on the DReg, we break - // any existing dependency chain on the unused lane. Either already being - // present means this instruction is in that chain anyway so we can make - // the transformation. - if (!MI->definesRegister(DDst, TRI) && !MI->readsRegister(DDst, TRI)) - break; + unsigned ImplicitSReg; + if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg)) + break; for (unsigned i = MI->getDesc().getNumOperands(); i; --i) MI->RemoveOperand(i-1); @@ -3520,6 +3940,8 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // more, so add them in manually. MIB.addReg(DstReg, RegState::Implicit | RegState::Define); MIB.addReg(SrcReg, RegState::Implicit); + if (ImplicitSReg != 0) + MIB.addReg(ImplicitSReg, RegState::Implicit); break; } @@ -3578,12 +4000,130 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // As before, the original destination is no longer represented, add it // implicitly. MIB.addReg(DstReg, RegState::Define | RegState::Implicit); + if (ImplicitSReg != 0) + MIB.addReg(ImplicitSReg, RegState::Implicit); break; } } } +//===----------------------------------------------------------------------===// +// Partial register updates +//===----------------------------------------------------------------------===// +// +// Swift renames NEON registers with 64-bit granularity. That means any +// instruction writing an S-reg implicitly reads the containing D-reg. The +// problem is mostly avoided by translating f32 operations to v2f32 operations +// on D-registers, but f32 loads are still a problem. +// +// These instructions can load an f32 into a NEON register: +// +// VLDRS - Only writes S, partial D update. +// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops. +// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. +// +// FCONSTD can be used as a dependency-breaking instruction. + + +unsigned ARMBaseInstrInfo:: +getPartialRegUpdateClearance(const MachineInstr *MI, + unsigned OpNum, + const TargetRegisterInfo *TRI) const { + // Only Swift has partial register update problems. + if (!SwiftPartialUpdateClearance || !Subtarget.isSwift()) + return 0; + + assert(TRI && "Need TRI instance"); + + const MachineOperand &MO = MI->getOperand(OpNum); + if (MO.readsReg()) + return 0; + unsigned Reg = MO.getReg(); + int UseOp = -1; + + switch(MI->getOpcode()) { + // Normal instructions writing only an S-register. + case ARM::VLDRS: + case ARM::FCONSTS: + case ARM::VMOVSR: + // rdar://problem/8791586 + case ARM::VMOVv8i8: + case ARM::VMOVv4i16: + case ARM::VMOVv2i32: + case ARM::VMOVv2f32: + case ARM::VMOVv1i64: + UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI); + break; + + // Explicitly reads the dependency. + case ARM::VLD1LNd32: + UseOp = 1; + break; + default: + return 0; + } + + // If this instruction actually reads a value from Reg, there is no unwanted + // dependency. + if (UseOp != -1 && MI->getOperand(UseOp).readsReg()) + return 0; + + // We must be able to clobber the whole D-reg. + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + // Virtual register must be a foo:ssub_0<def,undef> operand. + if (!MO.getSubReg() || MI->readsVirtualRegister(Reg)) + return 0; + } else if (ARM::SPRRegClass.contains(Reg)) { + // Physical register: MI must define the full D-reg. + unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0, + &ARM::DPRRegClass); + if (!DReg || !MI->definesRegister(DReg, TRI)) + return 0; + } + + // MI has an unwanted D-register dependency. + // Avoid defs in the previous N instructrions. + return SwiftPartialUpdateClearance; +} + +// Break a partial register dependency after getPartialRegUpdateClearance +// returned non-zero. +void ARMBaseInstrInfo:: +breakPartialRegDependency(MachineBasicBlock::iterator MI, + unsigned OpNum, + const TargetRegisterInfo *TRI) const { + assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def"); + assert(TRI && "Need TRI instance"); + + const MachineOperand &MO = MI->getOperand(OpNum); + unsigned Reg = MO.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Can't break virtual register dependencies."); + unsigned DReg = Reg; + + // If MI defines an S-reg, find the corresponding D super-register. + if (ARM::SPRRegClass.contains(Reg)) { + DReg = ARM::D0 + (Reg - ARM::S0) / 2; + assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken"); + } + + assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps"); + assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg"); + + // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines + // the full D-register by loading the same value to both lanes. The + // instruction is micro-coded with 2 uops, so don't do this until we can + // properly schedule micro-coded instuctions. The dispatcher stalls cause + // too big regressions. + + // Insert the dependency-breaking FCONSTD before MI. + // 96 is the encoding of 0.5, but the actual value doesn't matter here. + AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + get(ARM::FCONSTD), DReg).addImm(96)); + MI->addRegisterKilled(DReg, TRI, true); +} + bool ARMBaseInstrInfo::hasNOP() const { return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; } |
