aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen/RegAllocGreedy.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/RegAllocGreedy.cpp')
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp643
1 files changed, 425 insertions, 218 deletions
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 889bca3..8d06325 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -62,7 +62,6 @@ class RAGreedy : public MachineFunctionPass,
// context
MachineFunction *MF;
- BitVector ReservedRegs;
// analyses
SlotIndexes *Indexes;
@@ -72,6 +71,7 @@ class RAGreedy : public MachineFunctionPass,
MachineLoopRanges *LoopRanges;
EdgeBundles *Bundles;
SpillPlacement *SpillPlacer;
+ LiveDebugVariables *DebugVars;
// state
std::auto_ptr<Spiller> SpillerInstance;
@@ -94,12 +94,13 @@ class RAGreedy : public MachineFunctionPass,
RS_New, ///< Never seen before.
RS_First, ///< First time in the queue.
RS_Second, ///< Second time in the queue.
- RS_Region, ///< Produced by region splitting.
- RS_Block, ///< Produced by per-block splitting.
+ RS_Global, ///< Produced by global splitting.
RS_Local, ///< Produced by local splitting.
RS_Spill ///< Produced by spilling.
};
+ static const char *const StageName[];
+
IndexedMap<unsigned char, VirtReg2IndexFunctor> LRStage;
LiveRangeStage getStage(const LiveInterval &VirtReg) const {
@@ -116,6 +117,15 @@ class RAGreedy : public MachineFunctionPass,
}
}
+ // Eviction. Sometimes an assigned live range can be evicted without
+ // conditions, but other times it must be split after being evicted to avoid
+ // infinite loops.
+ enum CanEvict {
+ CE_Never, ///< Can never evict.
+ CE_Always, ///< Can always evict.
+ CE_WithSplit ///< Can evict only if range is also split or spilled.
+ };
+
// splitting state.
std::auto_ptr<SplitAnalysis> SA;
std::auto_ptr<SplitEditor> SE;
@@ -126,14 +136,17 @@ class RAGreedy : public MachineFunctionPass,
/// All basic blocks where the current register has uses.
SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints;
- /// All basic blocks where the current register is live-through and
- /// interference free.
- SmallVector<unsigned, 8> TransparentBlocks;
-
/// Global live range splitting candidate info.
struct GlobalSplitCandidate {
unsigned PhysReg;
BitVector LiveBundles;
+ SmallVector<unsigned, 8> ActiveBlocks;
+
+ void reset(unsigned Reg) {
+ PhysReg = Reg;
+ LiveBundles.clear();
+ ActiveBlocks.clear();
+ }
};
/// Candidate info for for each PhysReg in AllocationOrder.
@@ -141,10 +154,6 @@ class RAGreedy : public MachineFunctionPass,
/// class.
SmallVector<GlobalSplitCandidate, 32> GlobalCand;
- /// For every instruction in SA->UseSlots, store the previous non-copy
- /// instruction.
- SmallVector<SlotIndex, 8> PrevSlot;
-
public:
RAGreedy();
@@ -173,18 +182,21 @@ private:
void LRE_WillShrinkVirtReg(unsigned);
void LRE_DidCloneVirtReg(unsigned, unsigned);
- bool addSplitConstraints(unsigned, float&);
- float calcGlobalSplitCost(unsigned, const BitVector&);
- void splitAroundRegion(LiveInterval&, unsigned, const BitVector&,
+ float calcSpillCost();
+ bool addSplitConstraints(InterferenceCache::Cursor, float&);
+ void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
+ void growRegion(GlobalSplitCandidate &Cand, InterferenceCache::Cursor);
+ float calcGlobalSplitCost(GlobalSplitCandidate&, InterferenceCache::Cursor);
+ void splitAroundRegion(LiveInterval&, GlobalSplitCandidate&,
SmallVectorImpl<LiveInterval*>&);
void calcGapWeights(unsigned, SmallVectorImpl<float>&);
- SlotIndex getPrevMappedIndex(const MachineInstr*);
- void calcPrevSlots();
- unsigned nextSplitPoint(unsigned);
+ CanEvict canEvict(LiveInterval &A, LiveInterval &B);
bool canEvictInterference(LiveInterval&, unsigned, float&);
+ unsigned tryAssign(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
unsigned tryEvict(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<LiveInterval*>&);
+ SmallVectorImpl<LiveInterval*>&, unsigned = ~0u);
unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
SmallVectorImpl<LiveInterval*>&);
unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
@@ -196,6 +208,22 @@ private:
char RAGreedy::ID = 0;
+#ifndef NDEBUG
+const char *const RAGreedy::StageName[] = {
+ "RS_New",
+ "RS_First",
+ "RS_Second",
+ "RS_Global",
+ "RS_Local",
+ "RS_Spill"
+};
+#endif
+
+// Hysteresis to use when comparing floats.
+// This helps stabilize decisions based on float comparisons.
+const float Hysteresis = 0.98f;
+
+
FunctionPass* llvm::createGreedyRegisterAllocator() {
return new RAGreedy();
}
@@ -287,6 +315,7 @@ void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
void RAGreedy::releaseMemory() {
SpillerInstance.reset(0);
LRStage.clear();
+ GlobalCand.clear();
RegAllocBase::releaseMemory();
}
@@ -329,28 +358,85 @@ LiveInterval *RAGreedy::dequeue() {
return LI;
}
+
+//===----------------------------------------------------------------------===//
+// Direct Assignment
+//===----------------------------------------------------------------------===//
+
+/// tryAssign - Try to assign VirtReg to an available register.
+unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ Order.rewind();
+ unsigned PhysReg;
+ while ((PhysReg = Order.next()))
+ if (!checkPhysRegInterference(VirtReg, PhysReg))
+ break;
+ if (!PhysReg || Order.isHint(PhysReg))
+ return PhysReg;
+
+ // PhysReg is available. Try to evict interference from a cheaper alternative.
+ unsigned Cost = TRI->getCostPerUse(PhysReg);
+
+ // Most registers have 0 additional cost.
+ if (!Cost)
+ return PhysReg;
+
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost
+ << '\n');
+ unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost);
+ return CheapReg ? CheapReg : PhysReg;
+}
+
+
//===----------------------------------------------------------------------===//
// Interference eviction
//===----------------------------------------------------------------------===//
+/// canEvict - determine if A can evict the assigned live range B. The eviction
+/// policy defined by this function together with the allocation order defined
+/// by enqueue() decides which registers ultimately end up being split and
+/// spilled.
+///
+/// This function must define a non-circular relation when it returns CE_Always,
+/// otherwise infinite eviction loops are possible. When evicting a <= RS_Second
+/// range, it is possible to return CE_WithSplit which forces the evicted
+/// register to be split or spilled before it can evict anything again. That
+/// guarantees progress.
+RAGreedy::CanEvict RAGreedy::canEvict(LiveInterval &A, LiveInterval &B) {
+ return A.weight > B.weight ? CE_Always : CE_Never;
+}
+
/// canEvict - Return true if all interferences between VirtReg and PhysReg can
-/// be evicted. Set maxWeight to the maximal spill weight of an interference.
+/// be evicted.
+/// Return false if any interference is heavier than MaxWeight.
+/// On return, set MaxWeight to the maximal spill weight of an interference.
bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
float &MaxWeight) {
float Weight = 0;
for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
- // If there is 10 or more interferences, chances are one is smaller.
- if (Q.collectInterferingVRegs(10) >= 10)
+ // If there is 10 or more interferences, chances are one is heavier.
+ if (Q.collectInterferingVRegs(10, MaxWeight) >= 10)
return false;
- // Check if any interfering live range is heavier than VirtReg.
- for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
- LiveInterval *Intf = Q.interferingVRegs()[i];
+ // Check if any interfering live range is heavier than MaxWeight.
+ for (unsigned i = Q.interferingVRegs().size(); i; --i) {
+ LiveInterval *Intf = Q.interferingVRegs()[i - 1];
if (TargetRegisterInfo::isPhysicalRegister(Intf->reg))
return false;
- if (Intf->weight >= VirtReg.weight)
+ if (Intf->weight >= MaxWeight)
return false;
+ switch (canEvict(VirtReg, *Intf)) {
+ case CE_Always:
+ break;
+ case CE_Never:
+ return false;
+ case CE_WithSplit:
+ if (getStage(*Intf) > RS_Second)
+ return false;
+ break;
+ }
Weight = std::max(Weight, Intf->weight);
}
}
@@ -364,21 +450,28 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
/// @return Physreg to assign VirtReg, or 0.
unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
AllocationOrder &Order,
- SmallVectorImpl<LiveInterval*> &NewVRegs){
+ SmallVectorImpl<LiveInterval*> &NewVRegs,
+ unsigned CostPerUseLimit) {
NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
// Keep track of the lightest single interference seen so far.
- float BestWeight = 0;
+ float BestWeight = HUGE_VALF;
unsigned BestPhys = 0;
Order.rewind();
while (unsigned PhysReg = Order.next()) {
- float Weight = 0;
+ if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
+ continue;
+ // The first use of a register in a function has cost 1.
+ if (CostPerUseLimit == 1 && !MRI->isPhysRegUsed(PhysReg))
+ continue;
+
+ float Weight = BestWeight;
if (!canEvictInterference(VirtReg, PhysReg, Weight))
continue;
// This is an eviction candidate.
- DEBUG(dbgs() << "max " << PrintReg(PhysReg, TRI) << " interference = "
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " interference = "
<< Weight << '\n');
if (BestPhys && Weight >= BestWeight)
continue;
@@ -403,6 +496,11 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
unassign(*Intf, VRM->getPhys(Intf->reg));
++NumEvicted;
NewVRegs.push_back(Intf);
+ // Prevent looping by forcing the evicted ranges to be split before they
+ // can evict anything else.
+ if (getStage(*Intf) < RS_Second &&
+ canEvict(VirtReg, *Intf) == CE_WithSplit)
+ LRStage[Intf->reg] = RS_Second;
}
}
return BestPhys;
@@ -417,9 +515,9 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
/// interference pattern in Physreg and its aliases. Add the constraints to
/// SpillPlacement and return the static cost of this split in Cost, assuming
/// that all preferences in SplitConstraints are met.
-/// If it is evident that no bundles will be live, abort early and return false.
-bool RAGreedy::addSplitConstraints(unsigned PhysReg, float &Cost) {
- InterferenceCache::Cursor Intf(IntfCache, PhysReg);
+/// Return false if there are no bundles with positive bias.
+bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
+ float &Cost) {
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
// Reset interference dependent info.
@@ -446,7 +544,7 @@ bool RAGreedy::addSplitConstraints(unsigned PhysReg, float &Cost) {
BC.Entry = SpillPlacement::MustSpill, ++Ins;
else if (Intf.first() < BI.FirstUse)
BC.Entry = SpillPlacement::PrefSpill, ++Ins;
- else if (Intf.first() < (BI.LiveThrough ? BI.LastUse : BI.Kill))
+ else if (Intf.first() < BI.LastUse)
++Ins;
}
@@ -456,7 +554,7 @@ bool RAGreedy::addSplitConstraints(unsigned PhysReg, float &Cost) {
BC.Exit = SpillPlacement::MustSpill, ++Ins;
else if (Intf.last() > BI.LastUse)
BC.Exit = SpillPlacement::PrefSpill, ++Ins;
- else if (Intf.last() > (BI.LiveThrough ? BI.FirstUse : BI.Def))
+ else if (Intf.last() > BI.FirstUse)
++Ins;
}
@@ -464,35 +562,41 @@ bool RAGreedy::addSplitConstraints(unsigned PhysReg, float &Cost) {
if (Ins)
StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number);
}
+ Cost = StaticCost;
// Add constraints for use-blocks. Note that these are the only constraints
// that may add a positive bias, it is downhill from here.
SpillPlacer->addConstraints(SplitConstraints);
- if (SpillPlacer->getPositiveNodes() == 0)
- return false;
+ return SpillPlacer->scanActiveBundles();
+}
- Cost = StaticCost;
- // Now handle the live-through blocks without uses. These can only add
- // negative bias, so we can abort whenever there are no more positive nodes.
- // Compute constraints for a group of 8 blocks at a time.
+/// addThroughConstraints - Add constraints and links to SpillPlacer from the
+/// live-through blocks in Blocks.
+void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
+ ArrayRef<unsigned> Blocks) {
const unsigned GroupSize = 8;
SpillPlacement::BlockConstraint BCS[GroupSize];
- unsigned B = 0;
- TransparentBlocks.clear();
+ unsigned TBS[GroupSize];
+ unsigned B = 0, T = 0;
- ArrayRef<unsigned> ThroughBlocks = SA->getThroughBlocks();
- for (unsigned i = 0; i != ThroughBlocks.size(); ++i) {
- unsigned Number = ThroughBlocks[i];
- assert(B < GroupSize && "Array overflow");
- BCS[B].Number = Number;
+ for (unsigned i = 0; i != Blocks.size(); ++i) {
+ unsigned Number = Blocks[i];
Intf.moveToBlock(Number);
if (!Intf.hasInterference()) {
- TransparentBlocks.push_back(Number);
+ assert(T < GroupSize && "Array overflow");
+ TBS[T] = Number;
+ if (++T == GroupSize) {
+ SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T));
+ T = 0;
+ }
continue;
}
+ assert(B < GroupSize && "Array overflow");
+ BCS[B].Number = Number;
+
// Interference for the live-in value.
if (Intf.first() <= Indexes->getMBBStartIdx(Number))
BCS[B].Entry = SpillPlacement::MustSpill;
@@ -509,30 +613,94 @@ bool RAGreedy::addSplitConstraints(unsigned PhysReg, float &Cost) {
ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
SpillPlacer->addConstraints(Array);
B = 0;
- // Abort early when all hope is lost.
- if (SpillPlacer->getPositiveNodes() == 0)
- return false;
}
}
ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
SpillPlacer->addConstraints(Array);
- if (SpillPlacer->getPositiveNodes() == 0)
- return false;
+ SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T));
+}
- // There is still some positive bias. Add all the links.
- SpillPlacer->addLinks(TransparentBlocks);
- return true;
+void RAGreedy::growRegion(GlobalSplitCandidate &Cand,
+ InterferenceCache::Cursor Intf) {
+ // Keep track of through blocks that have not been added to SpillPlacer.
+ BitVector Todo = SA->getThroughBlocks();
+ SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
+ unsigned AddedTo = 0;
+#ifndef NDEBUG
+ unsigned Visited = 0;
+#endif
+
+ for (;;) {
+ ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
+ if (NewBundles.empty())
+ break;
+ // Find new through blocks in the periphery of PrefRegBundles.
+ for (int i = 0, e = NewBundles.size(); i != e; ++i) {
+ unsigned Bundle = NewBundles[i];
+ // Look at all blocks connected to Bundle in the full graph.
+ ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
+ for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
+ I != E; ++I) {
+ unsigned Block = *I;
+ if (!Todo.test(Block))
+ continue;
+ Todo.reset(Block);
+ // This is a new through block. Add it to SpillPlacer later.
+ ActiveBlocks.push_back(Block);
+#ifndef NDEBUG
+ ++Visited;
+#endif
+ }
+ }
+ // Any new blocks to add?
+ if (ActiveBlocks.size() > AddedTo) {
+ ArrayRef<unsigned> Add(&ActiveBlocks[AddedTo],
+ ActiveBlocks.size() - AddedTo);
+ addThroughConstraints(Intf, Add);
+ AddedTo = ActiveBlocks.size();
+ }
+ // Perhaps iterating can enable more bundles?
+ SpillPlacer->iterate();
+ }
+ DEBUG(dbgs() << ", v=" << Visited);
}
+/// calcSpillCost - Compute how expensive it would be to split the live range in
+/// SA around all use blocks instead of forming bundle regions.
+float RAGreedy::calcSpillCost() {
+ float Cost = 0;
+ const LiveInterval &LI = SA->getParent();
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ unsigned Number = BI.MBB->getNumber();
+ // We normally only need one spill instruction - a load or a store.
+ Cost += SpillPlacer->getBlockFrequency(Number);
+
+ // Unless the value is redefined in the block.
+ if (BI.LiveIn && BI.LiveOut) {
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = Indexes->getMBBRange(Number);
+ LiveInterval::const_iterator I = LI.find(Start);
+ assert(I != LI.end() && "Expected live-in value");
+ // Is there a different live-out value? If so, we need an extra spill
+ // instruction.
+ if (I->end < Stop)
+ Cost += SpillPlacer->getBlockFrequency(Number);
+ }
+ }
+ return Cost;
+}
/// calcGlobalSplitCost - Return the global split cost of following the split
/// pattern in LiveBundles. This cost should be added to the local cost of the
/// interference pattern in SplitConstraints.
///
-float RAGreedy::calcGlobalSplitCost(unsigned PhysReg,
- const BitVector &LiveBundles) {
+float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
+ InterferenceCache::Cursor Intf) {
float GlobalCost = 0;
+ const BitVector &LiveBundles = Cand.LiveBundles;
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
for (unsigned i = 0; i != UseBlocks.size(); ++i) {
const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
@@ -549,11 +717,8 @@ float RAGreedy::calcGlobalSplitCost(unsigned PhysReg,
GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number);
}
- InterferenceCache::Cursor Intf(IntfCache, PhysReg);
- ArrayRef<unsigned> ThroughBlocks = SA->getThroughBlocks();
- SplitConstraints.resize(UseBlocks.size() + ThroughBlocks.size());
- for (unsigned i = 0; i != ThroughBlocks.size(); ++i) {
- unsigned Number = ThroughBlocks[i];
+ for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
+ unsigned Number = Cand.ActiveBlocks[i];
bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)];
bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
if (!RegIn && !RegOut)
@@ -578,23 +743,25 @@ float RAGreedy::calcGlobalSplitCost(unsigned PhysReg,
/// avoiding interference. The 'stack' interval is the complement constructed by
/// SplitEditor. It will contain the rest.
///
-void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
- const BitVector &LiveBundles,
+void RAGreedy::splitAroundRegion(LiveInterval &VirtReg,
+ GlobalSplitCandidate &Cand,
SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ const BitVector &LiveBundles = Cand.LiveBundles;
+
DEBUG({
- dbgs() << "Splitting around region for " << PrintReg(PhysReg, TRI)
+ dbgs() << "Splitting around region for " << PrintReg(Cand.PhysReg, TRI)
<< " with bundles";
for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i))
dbgs() << " EB#" << i;
dbgs() << ".\n";
});
- InterferenceCache::Cursor Intf(IntfCache, PhysReg);
+ InterferenceCache::Cursor Intf(IntfCache, Cand.PhysReg);
LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
SE->reset(LREdit);
// Create the main cross-block interval.
- SE->openIntv();
+ const unsigned MainIntv = SE->openIntv();
// First add all defs that are live out of a block.
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
@@ -603,6 +770,14 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
+ // Create separate intervals for isolated blocks with multiple uses.
+ if (!RegIn && !RegOut && BI.FirstUse != BI.LastUse) {
+ DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n");
+ SE->splitSingleBlock(BI);
+ SE->selectIntv(MainIntv);
+ continue;
+ }
+
// Should the register be live out?
if (!BI.LiveOut || !RegOut)
continue;
@@ -628,7 +803,7 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
DEBUG(dbgs() << ", no interference");
if (!BI.LiveThrough) {
DEBUG(dbgs() << ", not live-through.\n");
- SE->useIntv(SE->enterIntvBefore(BI.Def), Stop);
+ SE->useIntv(SE->enterIntvBefore(BI.FirstUse), Stop);
continue;
}
if (!RegIn) {
@@ -645,10 +820,10 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
// Block has interference.
DEBUG(dbgs() << ", interference to " << Intf.last());
- if (!BI.LiveThrough && Intf.last() <= BI.Def) {
+ if (!BI.LiveThrough && Intf.last() <= BI.FirstUse) {
// The interference doesn't reach the outgoing segment.
- DEBUG(dbgs() << " doesn't affect def from " << BI.Def << '\n');
- SE->useIntv(BI.Def, Stop);
+ DEBUG(dbgs() << " doesn't affect def from " << BI.FirstUse << '\n');
+ SE->useIntv(BI.FirstUse, Stop);
continue;
}
@@ -704,7 +879,7 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
DEBUG(dbgs() << ", no interference");
if (!BI.LiveThrough) {
DEBUG(dbgs() << ", killed in block.\n");
- SE->useIntv(Start, SE->leaveIntvAfter(BI.Kill));
+ SE->useIntv(Start, SE->leaveIntvAfter(BI.LastUse));
continue;
}
if (!RegOut) {
@@ -737,10 +912,10 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
// Block has interference.
DEBUG(dbgs() << ", interference from " << Intf.first());
- if (!BI.LiveThrough && Intf.first() >= BI.Kill) {
+ if (!BI.LiveThrough && Intf.first() >= BI.LastUse) {
// The interference doesn't reach the outgoing segment.
- DEBUG(dbgs() << " doesn't affect kill at " << BI.Kill << '\n');
- SE->useIntv(Start, BI.Kill);
+ DEBUG(dbgs() << " doesn't affect kill at " << BI.LastUse << '\n');
+ SE->useIntv(Start, BI.LastUse);
continue;
}
@@ -766,9 +941,8 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
}
// Handle live-through blocks.
- ArrayRef<unsigned> ThroughBlocks = SA->getThroughBlocks();
- for (unsigned i = 0; i != ThroughBlocks.size(); ++i) {
- unsigned Number = ThroughBlocks[i];
+ for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
+ unsigned Number = Cand.ActiveBlocks[i];
bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)];
bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
DEBUG(dbgs() << "Live through BB#" << Number << '\n');
@@ -787,70 +961,113 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
SE->enterIntvAtEnd(*MBB);
}
- SE->closeIntv();
-
- // FIXME: Should we be more aggressive about splitting the stack region into
- // per-block segments? The current approach allows the stack region to
- // separate into connected components. Some components may be allocatable.
- SE->finish();
++NumGlobalSplits;
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+ DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
+
+ LRStage.resize(MRI->getNumVirtRegs());
+ unsigned OrigBlocks = SA->getNumLiveBlocks();
+
+ // Sort out the new intervals created by splitting. We get four kinds:
+ // - Remainder intervals should not be split again.
+ // - Candidate intervals can be assigned to Cand.PhysReg.
+ // - Block-local splits are candidates for local splitting.
+ // - DCE leftovers should go back on the queue.
+ for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
+ unsigned Reg = LREdit.get(i)->reg;
+
+ // Ignore old intervals from DCE.
+ if (LRStage[Reg] != RS_New)
+ continue;
+
+ // Remainder interval. Don't try splitting again, spill if it doesn't
+ // allocate.
+ if (IntvMap[i] == 0) {
+ LRStage[Reg] = RS_Global;
+ continue;
+ }
+
+ // Main interval. Allow repeated splitting as long as the number of live
+ // blocks is strictly decreasing.
+ if (IntvMap[i] == MainIntv) {
+ if (SA->countLiveBlocks(LREdit.get(i)) >= OrigBlocks) {
+ DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
+ << " blocks as original.\n");
+ // Don't allow repeated splitting as a safe guard against looping.
+ LRStage[Reg] = RS_Global;
+ }
+ continue;
+ }
+
+ // Other intervals are treated as new. This includes local intervals created
+ // for blocks with multiple uses, and anything created by DCE.
+ }
+
if (VerifyEnabled)
MF->verify(this, "After splitting live range around region");
}
unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<LiveInterval*> &NewVRegs) {
- BitVector LiveBundles, BestBundles;
- float BestCost = 0;
- unsigned BestReg = 0;
+ float BestCost = Hysteresis * calcSpillCost();
+ DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n');
+ const unsigned NoCand = ~0u;
+ unsigned BestCand = NoCand;
Order.rewind();
for (unsigned Cand = 0; unsigned PhysReg = Order.next(); ++Cand) {
if (GlobalCand.size() <= Cand)
GlobalCand.resize(Cand+1);
- GlobalCand[Cand].PhysReg = PhysReg;
+ GlobalCand[Cand].reset(PhysReg);
- SpillPlacer->prepare(LiveBundles);
+ SpillPlacer->prepare(GlobalCand[Cand].LiveBundles);
float Cost;
- if (!addSplitConstraints(PhysReg, Cost)) {
- DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bias\n");
+ InterferenceCache::Cursor Intf(IntfCache, PhysReg);
+ if (!addSplitConstraints(Intf, Cost)) {
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n");
continue;
}
- DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tbiased = "
- << SpillPlacer->getPositiveNodes() << ", static = " << Cost);
- if (BestReg && Cost >= BestCost) {
- DEBUG(dbgs() << " worse than " << PrintReg(BestReg, TRI) << '\n');
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost);
+ if (Cost >= BestCost) {
+ DEBUG({
+ if (BestCand == NoCand)
+ dbgs() << " worse than no bundles\n";
+ else
+ dbgs() << " worse than "
+ << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
+ });
continue;
}
+ growRegion(GlobalCand[Cand], Intf);
SpillPlacer->finish();
// No live bundles, defer to splitSingleBlocks().
- if (!LiveBundles.any()) {
+ if (!GlobalCand[Cand].LiveBundles.any()) {
DEBUG(dbgs() << " no bundles.\n");
continue;
}
- Cost += calcGlobalSplitCost(PhysReg, LiveBundles);
+ Cost += calcGlobalSplitCost(GlobalCand[Cand], Intf);
DEBUG({
dbgs() << ", total = " << Cost << " with bundles";
- for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i))
+ for (int i = GlobalCand[Cand].LiveBundles.find_first(); i>=0;
+ i = GlobalCand[Cand].LiveBundles.find_next(i))
dbgs() << " EB#" << i;
dbgs() << ".\n";
});
- if (!BestReg || Cost < BestCost) {
- BestReg = PhysReg;
- BestCost = 0.98f * Cost; // Prevent rounding effects.
- BestBundles.swap(LiveBundles);
+ if (Cost < BestCost) {
+ BestCand = Cand;
+ BestCost = Hysteresis * Cost; // Prevent rounding effects.
}
}
- if (!BestReg)
+ if (BestCand == NoCand)
return 0;
- splitAroundRegion(VirtReg, BestReg, BestBundles, NewVRegs);
- setStage(NewVRegs.begin(), NewVRegs.end(), RS_Region);
+ splitAroundRegion(VirtReg, GlobalCand[BestCand], NewVRegs);
return 0;
}
@@ -913,47 +1130,6 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
}
}
-/// getPrevMappedIndex - Return the slot index of the last non-copy instruction
-/// before MI that has a slot index. If MI is the first mapped instruction in
-/// its block, return the block start index instead.
-///
-SlotIndex RAGreedy::getPrevMappedIndex(const MachineInstr *MI) {
- assert(MI && "Missing MachineInstr");
- const MachineBasicBlock *MBB = MI->getParent();
- MachineBasicBlock::const_iterator B = MBB->begin(), I = MI;
- while (I != B)
- if (!(--I)->isDebugValue() && !I->isCopy())
- return Indexes->getInstructionIndex(I);
- return Indexes->getMBBStartIdx(MBB);
-}
-
-/// calcPrevSlots - Fill in the PrevSlot array with the index of the previous
-/// real non-copy instruction for each instruction in SA->UseSlots.
-///
-void RAGreedy::calcPrevSlots() {
- const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
- PrevSlot.clear();
- PrevSlot.reserve(Uses.size());
- for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
- const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]);
- PrevSlot.push_back(getPrevMappedIndex(MI).getDefIndex());
- }
-}
-
-/// nextSplitPoint - Find the next index into SA->UseSlots > i such that it may
-/// be beneficial to split before UseSlots[i].
-///
-/// 0 is always a valid split point
-unsigned RAGreedy::nextSplitPoint(unsigned i) {
- const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
- const unsigned Size = Uses.size();
- assert(i != Size && "No split points after the end");
- // Allow split before i when Uses[i] is not adjacent to the previous use.
- while (++i != Size && PrevSlot[i].getBaseIndex() <= Uses[i-1].getBaseIndex())
- ;
- return i;
-}
-
/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
/// basic block.
///
@@ -981,11 +1157,27 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
dbgs() << '\n';
});
- // For every use, find the previous mapped non-copy instruction.
- // We use this to detect valid split points, and to estimate new interval
- // sizes.
- calcPrevSlots();
+ // Since we allow local split results to be split again, there is a risk of
+ // creating infinite loops. It is tempting to require that the new live
+ // ranges have less instructions than the original. That would guarantee
+ // convergence, but it is too strict. A live range with 3 instructions can be
+ // split 2+3 (including the COPY), and we want to allow that.
+ //
+ // Instead we use these rules:
+ //
+ // 1. Allow any split for ranges with getStage() < RS_Local. (Except for the
+ // noop split, of course).
+ // 2. Require progress be made for ranges with getStage() >= RS_Local. All
+ // the new ranges must have fewer instructions than before the split.
+ // 3. New ranges with the same number of instructions are marked RS_Local,
+ // smaller ranges are marked RS_New.
+ //
+ // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
+ // excessive splitting and infinite loops.
+ //
+ bool ProgressRequired = getStage(VirtReg) >= RS_Local;
+ // Best split candidate.
unsigned BestBefore = NumGaps;
unsigned BestAfter = 0;
float BestDiff = 0;
@@ -1003,13 +1195,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// The new spill weight must be larger than any gap interference.
// We will split before Uses[SplitBefore] and after Uses[SplitAfter].
- unsigned SplitBefore = 0, SplitAfter = nextSplitPoint(1) - 1;
+ unsigned SplitBefore = 0, SplitAfter = 1;
// MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
// It is the spill weight that needs to be evicted.
float MaxGap = GapWeight[0];
- for (unsigned i = 1; i != SplitAfter; ++i)
- MaxGap = std::max(MaxGap, GapWeight[i]);
for (;;) {
// Live before/after split?
@@ -1027,41 +1217,31 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
}
// Should the interval be extended or shrunk?
bool Shrink = true;
- if (MaxGap < HUGE_VALF) {
- // Estimate the new spill weight.
- //
- // Each instruction reads and writes the register, except the first
- // instr doesn't read when !FirstLive, and the last instr doesn't write
- // when !LastLive.
- //
- // We will be inserting copies before and after, so the total number of
- // reads and writes is 2 * EstUses.
- //
- const unsigned EstUses = 2*(SplitAfter - SplitBefore) +
- 2*(LiveBefore + LiveAfter);
- // Try to guess the size of the new interval. This should be trivial,
- // but the slot index of an inserted copy can be a lot smaller than the
- // instruction it is inserted before if there are many dead indexes
- // between them.
- //
- // We measure the distance from the instruction before SplitBefore to
- // get a conservative estimate.
- //
- // The final distance can still be different if inserting copies
- // triggers a slot index renumbering.
+ // How many gaps would the new range have?
+ unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
+
+ // Legally, without causing looping?
+ bool Legal = !ProgressRequired || NewGaps < NumGaps;
+
+ if (Legal && MaxGap < HUGE_VALF) {
+ // Estimate the new spill weight. Each instruction reads or writes the
+ // register. Conservatively assume there are no read-modify-write
+ // instructions.
//
- const float EstWeight = normalizeSpillWeight(blockFreq * EstUses,
- PrevSlot[SplitBefore].distance(Uses[SplitAfter]));
+ // Try to guess the size of the new interval.
+ const float EstWeight = normalizeSpillWeight(blockFreq * (NewGaps + 1),
+ Uses[SplitBefore].distance(Uses[SplitAfter]) +
+ (LiveBefore + LiveAfter)*SlotIndex::InstrDist);
// Would this split be possible to allocate?
// Never allocate all gaps, we wouldn't be making progress.
- float Diff = EstWeight - MaxGap;
- DEBUG(dbgs() << " w=" << EstWeight << " d=" << Diff);
- if (Diff > 0) {
+ DEBUG(dbgs() << " w=" << EstWeight);
+ if (EstWeight * Hysteresis >= MaxGap) {
Shrink = false;
+ float Diff = EstWeight - MaxGap;
if (Diff > BestDiff) {
DEBUG(dbgs() << " (best)");
- BestDiff = Diff;
+ BestDiff = Hysteresis * Diff;
BestBefore = SplitBefore;
BestAfter = SplitAfter;
}
@@ -1070,8 +1250,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// Try to shrink.
if (Shrink) {
- SplitBefore = nextSplitPoint(SplitBefore);
- if (SplitBefore < SplitAfter) {
+ if (++SplitBefore < SplitAfter) {
DEBUG(dbgs() << " shrink\n");
// Recompute the max when necessary.
if (GapWeight[SplitBefore - 1] >= MaxGap) {
@@ -1091,10 +1270,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
}
DEBUG(dbgs() << " extend\n");
- for (unsigned e = nextSplitPoint(SplitAfter + 1) - 1;
- SplitAfter != e; ++SplitAfter)
- MaxGap = std::max(MaxGap, GapWeight[SplitAfter]);
- continue;
+ MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
}
}
@@ -1113,9 +1289,27 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]);
SE->useIntv(SegStart, SegStop);
- SE->closeIntv();
- SE->finish();
- setStage(NewVRegs.begin(), NewVRegs.end(), RS_Local);
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+ DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
+
+ // If the new range has the same number of instructions as before, mark it as
+ // RS_Local so the next split will be forced to make progress. Otherwise,
+ // leave the new intervals as RS_New so they can compete.
+ bool LiveBefore = BestBefore != 0 || BI.LiveIn;
+ bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
+ unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
+ if (NewGaps >= NumGaps) {
+ DEBUG(dbgs() << "Tagging non-progress ranges: ");
+ assert(!ProgressRequired && "Didn't make progress when it was required.");
+ LRStage.resize(MRI->getNumVirtRegs());
+ for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
+ if (IntvMap[i] == 1) {
+ LRStage[LREdit.get(i)->reg] = RS_Local;
+ DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg));
+ }
+ DEBUG(dbgs() << '\n');
+ }
++NumLocalSplits;
return 0;
@@ -1141,30 +1335,36 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
// Don't iterate global splitting.
// Move straight to spilling if this range was produced by a global split.
- LiveRangeStage Stage = getStage(VirtReg);
- if (Stage >= RS_Block)
+ if (getStage(VirtReg) >= RS_Global)
return 0;
SA->analyze(&VirtReg);
- // First try to split around a region spanning multiple blocks.
- if (Stage < RS_Region) {
- unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
- if (PhysReg || !NewVRegs.empty())
+ // FIXME: SplitAnalysis may repair broken live ranges coming from the
+ // coalescer. That may cause the range to become allocatable which means that
+ // tryRegionSplit won't be making progress. This check should be replaced with
+ // an assertion when the coalescer is fixed.
+ if (SA->didRepairRange()) {
+ // VirtReg has changed, so all cached queries are invalid.
+ invalidateVirtRegs();
+ if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
return PhysReg;
}
+ // First try to split around a region spanning multiple blocks.
+ unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+
// Then isolate blocks with multiple uses.
- if (Stage < RS_Block) {
- SplitAnalysis::BlockPtrSet Blocks;
- if (SA->getMultiUseBlocks(Blocks)) {
- LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
- SE->reset(LREdit);
- SE->splitSingleBlocks(Blocks);
- setStage(NewVRegs.begin(), NewVRegs.end(), RS_Block);
- if (VerifyEnabled)
- MF->verify(this, "After splitting live range around basic blocks");
- }
+ SplitAnalysis::BlockPtrSet Blocks;
+ if (SA->getMultiUseBlocks(Blocks)) {
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+ SE->reset(LREdit);
+ SE->splitSingleBlocks(Blocks);
+ setStage(NewVRegs.begin(), NewVRegs.end(), RS_Global);
+ if (VerifyEnabled)
+ MF->verify(this, "After splitting live range around basic blocks");
}
// Don't assign any physregs.
@@ -1179,21 +1379,25 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
SmallVectorImpl<LiveInterval*> &NewVRegs) {
// First try assigning a free register.
- AllocationOrder Order(VirtReg.reg, *VRM, ReservedRegs);
- while (unsigned PhysReg = Order.next()) {
- if (!checkPhysRegInterference(VirtReg, PhysReg))
- return PhysReg;
- }
-
- if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs))
+ AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+ if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
return PhysReg;
+ LiveRangeStage Stage = getStage(VirtReg);
+ DEBUG(dbgs() << StageName[Stage] << '\n');
+
+ // Try to evict a less worthy live range, but only for ranges from the primary
+ // queue. The RS_Second ranges already failed to do this, and they should not
+ // get a second chance until they have been split.
+ if (Stage != RS_Second)
+ if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs))
+ return PhysReg;
+
assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
// The first time we see a live range, don't try to split or spill.
// Wait until the second time, when all smaller ranges have been allocated.
// This gives a better picture of the interference to split around.
- LiveRangeStage Stage = getStage(VirtReg);
if (Stage == RS_First) {
LRStage[VirtReg.reg] = RS_Second;
DEBUG(dbgs() << "wait for second round\n");
@@ -1201,7 +1405,10 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
return 0;
}
- assert(Stage < RS_Spill && "Cannot allocate after spilling");
+ // If we couldn't allocate a register from spilling, there is probably some
+ // invalid inline assembly. The base class wil report it.
+ if (Stage >= RS_Spill)
+ return ~0u;
// Try splitting VirtReg or interferences.
unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
@@ -1234,12 +1441,12 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
Indexes = &getAnalysis<SlotIndexes>();
DomTree = &getAnalysis<MachineDominatorTree>();
- ReservedRegs = TRI->getReservedRegs(*MF);
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
Loops = &getAnalysis<MachineLoopInfo>();
LoopRanges = &getAnalysis<MachineLoopRanges>();
Bundles = &getAnalysis<EdgeBundles>();
SpillPlacer = &getAnalysis<SpillPlacement>();
+ DebugVars = &getAnalysis<LiveDebugVariables>();
SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree));
@@ -1258,7 +1465,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
}
// Write out new DBG_VALUE instructions.
- getAnalysis<LiveDebugVariables>().emitDebugValues(VRM);
+ DebugVars->emitDebugValues(VRM);
// The pass output is in VirtRegMap. Release all the transient data.
releaseMemory();