aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2011-12-14 20:00:08 +0000
committerEvan Cheng <evan.cheng@apple.com>2011-12-14 20:00:08 +0000
commit020f4106f820648fd7e91956859844a80de13974 (patch)
treecdf6a36ab7bed9a0c468813406c2d3403997e886
parente90ac9bce9aa6de288568df9bf6133c08534ae2f (diff)
downloadexternal_llvm-020f4106f820648fd7e91956859844a80de13974.zip
external_llvm-020f4106f820648fd7e91956859844a80de13974.tar.gz
external_llvm-020f4106f820648fd7e91956859844a80de13974.tar.bz2
Model ARM predicated write as read-mod-write. e.g.
r0 = mov #0 r0 = moveq #1 Then the second instruction has an implicit data dependency on the first instruction. Sadly I have yet to come up with a small test case that demonstrate the post-ra scheduler taking advantage of this. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@146583 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Target/TargetInstrInfo.h5
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp4
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp55
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h4
4 files changed, 49 insertions, 19 deletions
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 957a89a..8409229 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -652,9 +652,8 @@ public:
/// a given pair of defs which both target the same register. This is usually
/// one.
virtual unsigned getOutputLatency(const InstrItineraryData *ItinData,
- const MachineInstr *DefMI1,
- const MachineInstr *DefMI2,
- unsigned Reg) const {
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *DepMI) const {
return 1;
}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 47c5339..4418f40 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -281,8 +281,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
if (Kind == SDep::Anti)
DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/Reg));
else {
- unsigned AOLat = TII->getOutputLatency(InstrItins, MI,
- DefSU->getInstr(), Reg);
+ unsigned AOLat = TII->getOutputLatency(InstrItins, MI, j,
+ DefSU->getInstr());
DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/Reg));
}
}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 5ee2dc8..8bf5475 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -2360,7 +2360,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
}
static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
- const MachineInstr *MI,
+ const MachineInstr *MI, unsigned Reg,
unsigned &DefIdx, unsigned &Dist) {
Dist = 0;
@@ -2370,7 +2370,6 @@ static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
assert(II->isInsideBundle() && "Empty bundle?");
int Idx = -1;
- unsigned Reg = MI->getOperand(DefIdx).getReg();
while (II->isInsideBundle()) {
Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
if (Idx != -1)
@@ -2385,7 +2384,7 @@ static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
}
static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
- const MachineInstr *MI,
+ const MachineInstr *MI, unsigned Reg,
unsigned &UseIdx, unsigned &Dist) {
Dist = 0;
@@ -2395,7 +2394,6 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
// FIXME: This doesn't properly handle multiple uses.
int Idx = -1;
- unsigned Reg = MI->getOperand(UseIdx).getReg();
while (II != E && II->isInsideBundle()) {
Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
if (Idx != -1)
@@ -2405,7 +2403,11 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
++II;
}
- assert(Idx != -1 && "Cannot find bundled definition!");
+ if (Idx == -1) {
+ Dist = 0;
+ return 0;
+ }
+
UseIdx = Idx;
return II;
}
@@ -2424,7 +2426,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MCInstrDesc *DefMCID = &DefMI->getDesc();
const MCInstrDesc *UseMCID = &UseMI->getDesc();
const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
- if (DefMO.getReg() == ARM::CPSR) {
+ unsigned Reg = DefMO.getReg();
+ if (Reg == ARM::CPSR) {
if (DefMI->getOpcode() == ARM::FMSTAT) {
// fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
return Subtarget.isCortexA9() ? 1 : 20;
@@ -2436,11 +2439,16 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// Otherwise it takes the instruction latency (generally one).
int Latency = getInstrLatency(ItinData, DefMI);
- // For Thumb2, prefer scheduling CPSR setting instruction close to its uses.
- // Instructions which are otherwise scheduled between them may incur a code
- // size penalty (not able to use the CPSR setting 16-bit instructions).
- if (Latency > 0 && Subtarget.isThumb2())
- --Latency;
+
+ // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
+ // its uses. Instructions which are otherwise scheduled between them may
+ // incur a code size penalty (not able to use the CPSR setting 16-bit
+ // instructions).
+ if (Latency > 0 && Subtarget.isThumb2()) {
+ const MachineFunction *MF = DefMI->getParent()->getParent();
+ if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ --Latency;
+ }
return Latency;
}
@@ -2451,7 +2459,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
unsigned DefAdj = 0;
if (DefMI->isBundle()) {
- DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, DefIdx, DefAdj);
+ DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
DefMI->isRegSequence() || DefMI->isImplicitDef())
return 1;
@@ -2459,8 +2467,14 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
}
unsigned UseAdj = 0;
if (UseMI->isBundle()) {
- UseMI = getBundledUseMI(&getRegisterInfo(), UseMI, UseIdx, UseAdj);
- UseMCID = &UseMI->getDesc();
+ unsigned NewUseIdx;
+ const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
+ Reg, NewUseIdx, UseAdj);
+ if (NewUseMI) {
+ UseMI = NewUseMI;
+ UseIdx = NewUseIdx;
+ UseMCID = &UseMI->getDesc();
+ }
}
int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
@@ -2797,6 +2811,19 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
+unsigned
+ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *DepMI) const {
+ unsigned Reg = DefMI->getOperand(DefIdx).getReg();
+ if (DepMI->readsRegister(Reg, &getRegisterInfo()) || !isPredicated(DepMI))
+ return 1;
+
+ // If the second MI is predicated, then there is an implicit use dependency.
+ return getOperandLatency(ItinData, DefMI, DefIdx, DepMI,
+ DepMI->getNumOperands());
+}
+
int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 4ce7461..68e8208 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -210,6 +210,10 @@ public:
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode, unsigned UseIdx) const;
+ virtual unsigned getOutputLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *DepMI) const;
+
/// VFP/NEON execution domains.
std::pair<uint16_t, uint16_t>
getExecutionDomain(const MachineInstr *MI) const;