aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen/PrologEpilogInserter.cpp
diff options
context:
space:
mode:
authorJohn Mosby <ojomojo@gmail.com>2009-03-27 06:09:40 +0000
committerJohn Mosby <ojomojo@gmail.com>2009-03-27 06:09:40 +0000
commitea4d351fc690bd6558fe9ca61db88ee809f0572f (patch)
tree97273e8725691e7c6b8d51d8599e8a71c90f2936 /lib/CodeGen/PrologEpilogInserter.cpp
parentcb2480d0a8460a40ccad1bc18dafdc07f4f4b312 (diff)
downloadexternal_llvm-ea4d351fc690bd6558fe9ca61db88ee809f0572f.zip
external_llvm-ea4d351fc690bd6558fe9ca61db88ee809f0572f.tar.gz
external_llvm-ea4d351fc690bd6558fe9ca61db88ee809f0572f.tar.bz2
Shrink wrapping in PEI: initial release. Finishing development, enable with --shrink-wrap.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@67828 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen/PrologEpilogInserter.cpp')
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp956
1 files changed, 913 insertions, 43 deletions
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 564f9a4..d3e1839 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -14,9 +14,34 @@
// This pass must be run after register allocation. After this pass is
// executed, it is illegal to construct MO_FrameIndex operands.
//
+// This pass implements a shrink wrapping variant of prolog/epilog insertion:
+// - Places callee saved register (CSR) spills and restores in the CFG to
+// tightly surround uses so that execution paths that do not use CSRs do not
+// pay the spill/restore penalty.
+//
+// - Avoiding placment of spills/restores in loops: if a CSR is used inside a
+// loop(nest), the spills are placed in the loop preheader, and restores are
+// placed in the loop exit nodes (the successors of the loop _exiting_ nodes).
+//
+// - Covering paths without CSR uses: e.g. if a restore is placed in a join
+// block, a matching spill is added to the end of all immediate predecessor
+// blocks that are not reached by a spill. Similarly for saves placed in
+// branch blocks.
+//
+// Shrink wrapping uses an analysis similar to the one in GVNPRE to determine
+// which basic blocks require callee-saved register save/restore code.
+//
+// This pass uses MachineDominators and MachineLoopInfo. Loop information
+// is used to prevent shrink wrapping of callee-saved register save/restore
+// code into loops.
+//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "shrink-wrap"
+
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -27,11 +52,24 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/ADT/STLExtras.h"
#include <climits>
+#include <sstream>
+
using namespace llvm;
+// Shrink Wrapping:
+static cl::opt<bool>
+ShrinkWrapping("shrink-wrap",
+ cl::desc("Apply shrink wrapping to callee-saved register spills/restores"));
+
namespace {
struct VISIBILITY_HIDDEN PEI : public MachineFunctionPass {
static char ID;
@@ -42,8 +80,13 @@ namespace {
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreservedID(MachineLoopInfoID);
- AU.addPreservedID(MachineDominatorsID);
+ AU.setPreservesCFG();
+ if (ShrinkWrapping) {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ }
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -69,8 +112,16 @@ namespace {
// information and eliminates call frame pseudo instructions.
calculateCalleeSavedRegisters(Fn);
+ // Determine placement of CSR spill/restore code:
+ // - with shrink wrapping, place spills and restores to tightly
+ // enclose regions in the Machine CFG of the function where
+ // they are used. Without shrink wrapping
+ // - default (no shrink wrapping), place all spills in the
+ // entry block, all restores in return blocks.
+ placeCSRSpillsAndRestores(Fn);
+
// Add the code to save and restore the callee saved registers
- saveCalleeSavedRegisters(Fn);
+ insertCSRSpillsAndRestores(Fn);
// Allow the target machine to make final modifications to the function
// before the frame layout is finalized.
@@ -94,7 +145,7 @@ namespace {
delete RS;
return true;
}
-
+
private:
RegScavenger *RS;
@@ -102,22 +153,702 @@ namespace {
// stack frame indexes.
unsigned MinCSFrameIndex, MaxCSFrameIndex;
+ // Analysis info for spill/restore placement.
+ // "CSR": "callee saved register".
+
+ // CSRegSet contains indices into the Callee Saved Register Info
+ // vector built by calculateCalleeSavedRegisters() and accessed
+ // via MF.getFrameInfo()->getCalleeSavedInfo().
+ typedef SparseBitVector<> CSRegSet;
+
+ // CSRegBlockMap maps MachineBasicBlocks to sets of callee
+ // saved register indices.
+ typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap;
+
+ // Set and maps for computing CSR spill/restore placement:
+ // used in function (UsedCSRegs)
+ // used in a basic block (CSRUsed)
+ // anticipatable in a basic block (Antic{In,Out})
+ // available in a basic block (Avail{In,Out})
+ // to be spilled at the entry to a basic block (CSRSave)
+ // to be restored at the end of a basic block (CSRRestore)
+
+ CSRegSet UsedCSRegs;
+ CSRegBlockMap CSRUsed;
+ CSRegBlockMap AnticIn, AnticOut;
+ CSRegBlockMap AvailIn, AvailOut;
+ CSRegBlockMap CSRSave;
+ CSRegBlockMap CSRRestore;
+
+ // Entry and return blocks of the current function.
+ MachineBasicBlock* EntryBlock;
+ SmallVector<MachineBasicBlock*, 4> ReturnBlocks;
+
+ // Flag to control shrink wrapping per-function:
+ // may choose to skip shrink wrapping for certain
+ // functions.
+ bool ShrinkWrapThisFunction;
+
+ bool calculateSets(MachineFunction &Fn);
+ void calculateAnticAvail(MachineFunction &Fn);
+ MachineBasicBlock* moveSpillsOutOfLoops(MachineFunction &Fn,
+ MachineBasicBlock* MBB);
+ void addRestoresForSBranchBlock(MachineFunction &Fn,
+ MachineBasicBlock* MBB);
+ void moveRestoresOutOfLoops(MachineFunction& Fn,
+ MachineBasicBlock* MBB,
+ std::vector<MachineBasicBlock*>& SBLKS);
+ void addSavesForRJoinBlocks(MachineFunction& Fn,
+ std::vector<MachineBasicBlock*>& SBLKS);
+ void placeSpillsAndRestores(MachineFunction &Fn);
+ void placeCSRSpillsAndRestores(MachineFunction &Fn);
void calculateCalleeSavedRegisters(MachineFunction &Fn);
- void saveCalleeSavedRegisters(MachineFunction &Fn);
+ void insertCSRSpillsAndRestores(MachineFunction &Fn);
void calculateFrameObjectOffsets(MachineFunction &Fn);
void replaceFrameIndices(MachineFunction &Fn);
void insertPrologEpilogCode(MachineFunction &Fn);
+
+ // Initialize all shrink wrapping data.
+ void initShrinkWrappingInfo() {
+ UsedCSRegs.clear();
+ CSRUsed.clear();
+ AnticIn.clear();
+ AnticOut.clear();
+ AvailIn.clear();
+ AvailOut.clear();
+ CSRSave.clear();
+ CSRRestore.clear();
+ EntryBlock = 0;
+ if (! ReturnBlocks.empty())
+ ReturnBlocks.clear();
+ ShrinkWrapThisFunction = ShrinkWrapping;
+ }
+
+ // Convienences for dealing with machine loops.
+ MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP) {
+ assert(LP && "Machine loop is NULL.");
+ MachineBasicBlock* PHDR = LP->getLoopPreheader();
+ MachineLoop* PLP = LP->getParentLoop();
+ while (PLP) {
+ PHDR = PLP->getLoopPreheader();
+ PLP = PLP->getParentLoop();
+ }
+ return PHDR;
+ }
+
+ MachineLoop* getTopLevelLoopParent(MachineLoop *LP) {
+ if (LP == 0)
+ return 0;
+ MachineLoop* PLP = LP->getParentLoop();
+ while (PLP) {
+ LP = PLP;
+ PLP = PLP->getParentLoop();
+ }
+ return LP;
+ }
+
+#ifndef NDEBUG
+ // Debugging methods.
+ static std::string getBasicBlockName(const MachineBasicBlock* MBB) {
+ std::ostringstream name;
+ if (MBB) {
+ if (MBB->getBasicBlock())
+ name << MBB->getBasicBlock()->getName();
+ else
+ name << "_MBB_" << MBB->getNumber();
+ }
+ return name.str();
+ }
+
+ static std::string stringifyCSRegSet(const CSRegSet& s,
+ MachineFunction &Fn) {
+ const TargetRegisterInfo* TRI = Fn.getTarget().getRegisterInfo();
+ const std::vector<CalleeSavedInfo> CSI =
+ Fn.getFrameInfo()->getCalleeSavedInfo();
+
+ std::ostringstream srep;
+ if (CSI.size() == 0) {
+ srep << "[]";
+ return srep.str();
+ }
+ srep << "[";
+ CSRegSet::iterator I = s.begin(), E = s.end();
+ if (I != E) {
+ unsigned reg = CSI[*I].getReg();
+ srep << TRI->getName(reg);
+ for (++I; I != E; ++I) {
+ reg = CSI[*I].getReg();
+ srep << ",";
+ srep << TRI->getName(reg);
+ }
+ }
+ srep << "]";
+ return srep.str();
+ }
+
+ static void dumpSet(const CSRegSet& s, MachineFunction &Fn) {
+ DOUT << stringifyCSRegSet(s, Fn) << "\n";
+ }
+#endif
+
};
char PEI::ID = 0;
}
-
/// createPrologEpilogCodeInserter - This function returns a pass that inserts
/// prolog and epilog code, and eliminates abstract frame references.
///
FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
+/// placeCSRSpillsAndRestores - determine which MBBs of the function
+/// need save, restore code for callee-saved registers by doing a DF analysis
+/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs
+/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo
+/// is used to ensure that CSR save/restore code is not placed inside loops.
+/// This function computes the maps of MBBs -> CSRs to spill and restore
+/// in CSRSave, CSRRestore.
+///
+/// If shrink wrapping is not being performed, place all spills in
+/// the entry block, all restores in return blocks. In this case,
+/// CSRSave has a single mapping, CSRRestore has mappings for each
+/// return block.
+///
+void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
+
+#ifndef NDEBUG
+ DOUT << "Place CSR spills/restores for "
+ << Fn.getFunction()->getName() << "\n";
+#endif
+
+ initShrinkWrappingInfo();
+
+ if (calculateSets(Fn))
+ placeSpillsAndRestores(Fn);
+}
+
+/// calculateAnticAvail - helper for computing the data flow
+/// sets required for determining spill/restore placements.
+///
+void PEI::calculateAnticAvail(MachineFunction &Fn) {
+
+ // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
+ bool changed = true;
+ unsigned iterations = 0;
+ while (changed) {
+ changed = false;
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+
+ // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCC(MBB))
+ MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end();
+ if (SI != SE) {
+ CSRegSet prevAnticOut = AnticOut[MBB];
+ MachineBasicBlock* SUCC = *SI;
+ AnticOut[MBB] = AnticIn[SUCC];
+ for (++SI; SI != SE; ++SI) {
+ SUCC = *SI;
+ AnticOut[MBB] &= AnticIn[SUCC];
+ }
+ if (prevAnticOut != AnticOut[MBB])
+ changed = true;
+ }
+ // AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
+ CSRegSet prevAnticIn = AnticIn[MBB];
+ AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
+ if (prevAnticIn |= AnticIn[MBB])
+ changed = true;
+
+ // AvailIn[MBB] = INTERSECT(AvailOut[S] for S in PRED(MBB))
+ MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end();
+ if (PI != PE) {
+ CSRegSet prevAvailIn = AvailIn[MBB];
+ MachineBasicBlock* PRED = *PI;
+ AvailIn[MBB] = AvailOut[PRED];
+ for (++PI; PI != PE; ++PI) {
+ PRED = *PI;
+ AvailIn[MBB] &= AvailOut[PRED];
+ }
+ if (prevAvailIn != AvailIn[MBB])
+ changed = true;
+ }
+ // AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
+ CSRegSet prevAvailOut = AvailOut[MBB];
+ AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
+ if (prevAvailOut |= AvailOut[MBB])
+ changed = true;
+ }
+ ++iterations;
+ }
+
+ // EXP
+ AnticIn[EntryBlock].clear();
+ AnticOut[EntryBlock].clear();
+
+#ifndef NDEBUG
+ DOUT << "-----------------------------------------------------------\n";
+ DOUT << "iterations = " << iterations << "\n";
+ DOUT << "-----------------------------------------------------------\n";
+ DOUT << "MBB | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n";
+ DOUT << "-----------------------------------------------------------\n";
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+
+ DOUT << getBasicBlockName(MBB) << " | "
+ << stringifyCSRegSet(AnticIn[MBB], Fn)
+ << " | "
+ << stringifyCSRegSet(AnticOut[MBB], Fn)
+ << " | "
+ << stringifyCSRegSet(AvailIn[MBB], Fn)
+ << " | "
+ << stringifyCSRegSet(AvailOut[MBB], Fn)
+ << "\n";
+ }
+#endif
+}
+
+/// calculateSets - helper function for placeCSRSpillsAndRestores,
+/// collect the CSRs used in this function, develop the DF sets that
+/// describe the minimal regions in the Machine CFG around which spills,
+/// restores must be placed.
+///
+/// This function decides if shrink wrapping should actually be done:
+/// if all CSR uses are in the entry block, no shrink wrapping is possible,
+/// so ShrinkWrapping is turned off (for the current function) and the
+/// function returns false.
+///
+bool PEI::calculateSets(MachineFunction &Fn) {
+
+ // Sets used to compute spill, restore placement sets.
+ const std::vector<CalleeSavedInfo> CSI =
+ Fn.getFrameInfo()->getCalleeSavedInfo();
+
+ // If no CSRs used, we are done.
+ if (CSI.empty()) {
+#ifndef NDEBUG
+ DOUT << Fn.getFunction()->getName()
+ << " uses no callee-saved registers.\n";
+#endif
+ return false;
+ }
+
+#ifndef NDEBUG
+ DOUT << "-----------------------------------------------------------\n";
+#endif
+
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ bool allCSRUsesInEntryBlock = true;
+
+ // Initialize UsedCSRegs set, CSRUsed map.
+ // At the same time, put entry block directly into
+ // CSRSave, CSRRestore sets if any CSRs are used.
+ //
+ // Quick exit option (not implemented):
+ // Given N CSR uses in entry block,
+ // revert to default behavior, skip the placement
+ // step and put all saves in entry, restores in
+ // return blocks.
+
+ // Set up entry and return blocks.
+ EntryBlock = Fn.begin();
+ for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
+ MBB != E; ++MBB)
+ if (!MBB->empty() && MBB->back().getDesc().isReturn())
+ ReturnBlocks.push_back(MBB);
+
+ // TODO -- check for a use of a CSR in each imm. successor of EntryBlock,
+ // do not shrink wrap this function if this is the case.
+
+ // If not shrink wrapping (this function) at this point, set bits in
+ // CSR{Save,Restore}[] and UsedCSRegs, then return.
+ if (! ShrinkWrapThisFunction) {
+ for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
+ UsedCSRegs.set(inx);
+ CSRSave[EntryBlock].set(inx);
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+ CSRRestore[ReturnBlocks[ri]].set(inx);
+ }
+ return false;
+ }
+
+ // Walk instructions in all MBBs, create basic sets, choose
+ // whether or not to shrink wrap this function.
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) {
+ for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
+ unsigned Reg = CSI[inx].getReg();
+ // If instruction I reads or modifies Reg, add it to UsedCSRegs,
+ // CSRUsed map for the current block.
+ for (unsigned opInx = 0, opEnd = I->getNumOperands();
+ opInx != opEnd; ++opInx) {
+ const MachineOperand &MO = I->getOperand(opInx);
+ if (! (MO.isReg() && (MO.isUse() || MO.isDef())))
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MOReg == Reg ||
+ (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TRI->isSubRegister(MOReg, Reg))) {
+ // CSR Reg is defined/used in block MBB.
+ UsedCSRegs.set(inx);
+ CSRUsed[MBB].set(inx);
+ // Short-circuit analysis for entry, return blocks:
+ // if a CSR is used in the entry block, add it directly
+ // to CSRSave[EntryBlock] and to CSRRestore[R] for R
+ // in ReturnBlocks. Note CSR uses in non-entry blocks.
+ if (ShrinkWrapThisFunction) {
+ if (MBB == EntryBlock) {
+ CSRSave[MBB].set(inx);
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+ CSRRestore[ReturnBlocks[ri]].set(inx);
+ } else
+ allCSRUsesInEntryBlock = false;
+ } else {
+ // Not shrink wrapping => ensure saves/restores are correctly
+ // added for entry, return blocks.
+ CSRSave[EntryBlock].set(inx);
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+ CSRRestore[ReturnBlocks[ri]].set(inx);
+ }
+ }
+ }
+ }
+ }
+#ifndef NDEBUG
+ DOUT << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRUsed[MBB], Fn) << "\n";
+#endif
+ }
+
+#ifndef NDEBUG
+ DOUT << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs, Fn) << "\n";
+#endif
+
+ // Early exit:
+ // 1. Not asked to do shrink wrapping => just "place" all spills(restores)
+ // in the entry(return) block(s), already done above.
+ // 2. All CSR uses in entry block => same as case 1, but say we will
+ // not shrink wrap the current function.
+ ShrinkWrapThisFunction = (ShrinkWrapping &&
+ ShrinkWrapThisFunction &&
+ ! allCSRUsesInEntryBlock);
+ if (! ShrinkWrapThisFunction) {
+ return false;
+ }
+
+ calculateAnticAvail(Fn);
+
+ return true;
+}
+
+/// moveSpillsOutOfLoops - helper for placeSpillsAndRestores() which
+/// relocates a spill from a subgraph in a loop to the loop preheader.
+/// Returns the MBB to which saves have been moved, or the given MBB
+/// if it is a branch point.
+///
+MachineBasicBlock* PEI::moveSpillsOutOfLoops(MachineFunction &Fn,
+ MachineBasicBlock* MBB) {
+ if (MBB == 0 || CSRSave[MBB].empty())
+ return 0;
+
+ // Block to which saves are moved.
+ MachineBasicBlock* DEST = 0;
+ MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
+
+ if (MachineLoop* LP = LI.getLoopFor(MBB)) {
+ MachineBasicBlock* LPH = getTopLevelLoopPreheader(LP);
+ assert(LPH && "Loop has no top level preheader?");
+
+#ifndef NDEBUG
+ DOUT << "Moving saves of "
+ << stringifyCSRegSet(CSRSave[MBB], Fn)
+ << " from " << getBasicBlockName(MBB)
+ << " to " << getBasicBlockName(LPH) << "\n";
+#endif
+ // Add CSRegSet from MBB to LPH, empty out MBB's CSRegSet.
+ CSRSave[LPH] |= CSRSave[MBB];
+ // If saves moved to entry block, add restores to returns.
+ if (LPH == EntryBlock) {
+ for (unsigned i = 0, e = ReturnBlocks.size(); i != e; ++i)
+ CSRRestore[ReturnBlocks[i]] |= CSRSave[MBB];
+ } else {
+ // Remember where we moved the save so we can add
+ // restores on successor paths if necessary.
+ if (LPH->succ_size() > 1)
+ DEST = LPH;
+ }
+ CSRSave[MBB].clear();
+ } else if (MBB->succ_size() > 1)
+ DEST = MBB;
+ return DEST;
+}
+
+/// addRestoresForSBranchBlock - helper for placeSpillsAndRestores() which
+/// adds restores of CSRs saved in branch point MBBs to the front of any
+/// successor blocks connected to regions with no uses of the saved CSRs.
+///
+void PEI::addRestoresForSBranchBlock(MachineFunction &Fn,
+ MachineBasicBlock* MBB) {
+
+ if (MBB == 0 || CSRSave[MBB].empty() || MBB->succ_size() < 2)
+ return;
+
+ // Add restores of CSRs saved in branch point MBBs to the
+ // front of any succ blocks flowing into regions that
+ // have no uses of MBB's CSRs.
+ bool hasCSRUses = false;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock* SUCC = *SI;
+ bool needsRestore = false;
+ if (CSRUsed[SUCC].intersects(CSRSave[MBB])) {
+ hasCSRUses = true;
+ continue;
+ }
+ needsRestore = true;
+ for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC),
+ BE = df_end(SUCC); BI != BE; ++BI) {
+ MachineBasicBlock* SBB = *BI;
+ if (CSRUsed[SBB].intersects(CSRSave[MBB])) {
+ hasCSRUses = true;
+ needsRestore = false;
+ break;
+ }
+ }
+ // Additional restores are needed for SUCC iff there is at least
+ // one CSR use reachable from the successors of MBB and there
+ // are no uses in or below SUCC.
+ if (needsRestore && hasCSRUses) {
+#ifndef NDEBUG
+ DOUT << "MBB " << getBasicBlockName(MBB)
+ << " needs a restore on path to successor "
+ << getBasicBlockName(SUCC) << "\n";
+#endif
+ // Add restores to SUCC for all CSRs saved in MBB...
+ CSRRestore[SUCC] = CSRSave[MBB];
+ }
+ }
+}
+
+/// moveRestoresOutOfLoops - helper for placeSpillsAndRestores() which
+/// relocates restores from a subgraph in a loop to the loop exit blocks.
+/// This function records the MBBs to which restores have been moved in
+/// SBLKS. If no restores are moved, SBLKS contains the input MBB if it
+/// is a join point in the Machine CFG.
+///
+void PEI::moveRestoresOutOfLoops(MachineFunction& Fn,
+ MachineBasicBlock* MBB,
+ std::vector<MachineBasicBlock*>& SBLKS) {
+
+ SBLKS.clear();
+ if (MBB == 0 || CSRRestore[MBB].empty())
+ return;
+
+ MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
+
+ if (MachineLoop* LP = LI.getLoopFor(MBB)) {
+ LP = getTopLevelLoopParent(LP);
+ assert(LP && "Loop with no top level parent?");
+
+ SmallVector<MachineBasicBlock*, 4> exitBlocks;
+
+ LP->getExitBlocks(exitBlocks);
+ assert(exitBlocks.size() > 0 &&
+ "Loop has no top level exit blocks?");
+ for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) {
+ MachineBasicBlock* EXB = exitBlocks[i];
+
+#ifndef NDEBUG
+ DOUT << "Moving restores of "
+ << stringifyCSRegSet(CSRRestore[MBB], Fn)
+ << " from " << getBasicBlockName(MBB)
+ << " to " << getBasicBlockName(EXB) << "\n";
+#endif
+
+ // Add CSRegSet from MBB to LPE, empty out MBB's CSRegSet.
+ CSRRestore[EXB] |= CSRRestore[MBB];
+ if (EXB->pred_size() > 1)
+ SBLKS.push_back(EXB);
+ }
+ CSRRestore[MBB].clear();
+ } else if (MBB->pred_size() > 1)
+ SBLKS.push_back(MBB);
+}
+
+/// addSavesForRJoinBlocks - Add saves of CSRs restored in join point MBBs
+/// to the ends of any pred blocks that flow into MBB from regions that
+/// have no uses of MBB's CSRs.
+///
+void PEI::addSavesForRJoinBlocks(MachineFunction& Fn,
+ std::vector<MachineBasicBlock*>& SBLKS) {
+
+ if (SBLKS.empty())
+ return;
+
+ for (unsigned i = 0, e = SBLKS.size(); i != e; ++i) {
+ MachineBasicBlock* MBB = SBLKS[i];
+ if (MBB->pred_size() > 1) {
+ bool needsSave = false;
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock* PRED = *PI;
+
+ // Walk back up in the CFG from the preds of MBB, look for
+ // a block that uses any CSR that is restored in MBB.
+ if (CSRUsed[PRED].intersects(CSRRestore[MBB]))
+ continue;
+ needsSave = true;
+ for (idf_iterator<MachineBasicBlock*> PPI = idf_begin(PRED),
+ PPE = idf_end(PRED); PPI != PPE; ++PPI) {
+ MachineBasicBlock* PBB = *PPI;
+ if (CSRUsed[PBB].intersects(CSRRestore[MBB])) {
+ needsSave = false;
+ break;
+ }
+ }
+ if (needsSave) {
+ // Add saves to PRED for all CSRs restored in MBB...
+#ifndef NDEBUG
+ DOUT << "MBB " << getBasicBlockName(MBB)
+ << " needs a save on path from predecessor "
+ << getBasicBlockName(PRED) << "\n";
+#endif
+ CSRSave[PRED] = CSRRestore[MBB];
+ }
+ }
+ }
+ }
+}
+
+/// placeSpillsAndRestores - decide which MBBs need spills, restores
+/// of CSRs.
+///
+void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
+
+#ifndef NDEBUG
+ DOUT << "-----------------------------------------------------------\n";
+#endif
+
+ // Calculate CSR{Save,Restore} using Antic, Avail on the Machine-CFG.
+ for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock* MBB = MBBI;
+ // Entry block saves are recorded in UsedCSRegs pass above.
+ if (MBB != EntryBlock) {
+ // Intersect (CSRegs - AnticIn[P]) for all predecessors P of MBB
+ CSRegSet anticInPreds;
+ MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end();
+ if (PI != PE) {
+ MachineBasicBlock* PRED = *PI;
+ anticInPreds = UsedCSRegs - AnticIn[PRED];
+ for (++PI; PI != PE; ++PI) {
+ PRED = *PI;
+ // Handle self loop.
+ if (PRED != MBB)
+ anticInPreds &= (UsedCSRegs - AnticIn[PRED]);
+ }
+ }
+ // CSRSave[MBB] = (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds
+ CSRSave[MBB] = (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds;
+
+ // Remove the CSRs that are saved in the entry block
+ if (! CSRSave[MBB].empty() && ! CSRSave[EntryBlock].empty())
+ CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock];
+
+ // Move saves inside loops to the preheaders of the outermost
+ // containing loops, add restores to blocks reached by saves
+ // placed at branch points where necessary.
+ if (MachineBasicBlock* DESTBB = moveSpillsOutOfLoops(Fn, MBB)) {
+ // Add restores to blocks reached by saves placed at branch
+ // points where necessary.
+ addRestoresForSBranchBlock(Fn, DESTBB);
+ }
+ }
+
+#ifndef NDEBUG
+ if (! CSRSave[MBB].empty())
+ DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRSave[MBB], Fn) << "\n";
+#endif
+
+ // Compute CSRRestore, which may already be set for return blocks.
+ if (! CSRRestore[MBB].empty() || MBB->pred_size() == 0)
+ continue;
+
+ // Intersect (CSRegs - AvailOut[S]) for all successors S of MBB
+ CSRegSet availOutSucc;
+ MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end();
+ if (SI != SE) {
+ MachineBasicBlock* SUCC = *SI;
+ availOutSucc = UsedCSRegs - AvailOut[SUCC];
+ for (++SI; SI != SE; ++SI) {
+ SUCC = *SI;
+ // Handle self loop.
+ if (SUCC != MBB)
+ availOutSucc &= (UsedCSRegs - AvailOut[SUCC]);
+ }
+ } else if (! CSRUsed[MBB].empty()) {
+ // Take care of uses in return blocks (which have no successors).
+ availOutSucc = UsedCSRegs;
+ }
+ // CSRRestore[MBB] = (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc
+ CSRRestore[MBB] = (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc;
+
+ // Remove the CSRs that are restored in the return blocks.
+ // Lest this be confusing, note that:
+ // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks.
+ if (! CSRRestore[MBB].empty() && ! CSRSave[EntryBlock].empty())
+ CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock];
+
+ // Move restores inside loops to the exits of the outermost (top level)
+ // containing loops.
+ std::vector<MachineBasicBlock*> saveBlocks;
+ moveRestoresOutOfLoops(Fn, MBB, saveBlocks);
+
+ // Add saves of CSRs restored in join point MBBs to the ends
+ // of any pred blocks that flow into MBB from regions that
+ // have no uses of MBB's CSRs.
+ addSavesForRJoinBlocks(Fn, saveBlocks);
+
+#ifndef NDEBUG
+ if (! CSRRestore[MBB].empty())
+ DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRRestore[MBB], Fn) << "\n";
+#endif
+ }
+
+#ifndef NDEBUG
+ DOUT << "-----------------------------------------------------------\n";
+ DOUT << "Final SAVE, RESTORE:\n";
+ DOUT << "-----------------------------------------------------------\n";
+ for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
+ MBB != E; ++MBB) {
+ if (! CSRSave[MBB].empty()) {
+ DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRSave[MBB], Fn);
+ if (CSRRestore[MBB].empty())
+ DOUT << "\n";
+ }
+ if (! CSRRestore[MBB].empty()) {
+ if (! CSRSave[MBB].empty())
+ DOUT << " ";
+ DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(CSRRestore[MBB], Fn) << "\n";
+ }
+ }
+#endif
+}
+
/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
/// registers. Also calculate the MaxCallFrameSize and HasCalls variables for
/// the function's frame information and eliminates call frame pseudo
@@ -221,7 +952,8 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
unsigned Align = RC->getAlignment();
unsigned StackAlign = TFI->getStackAlignment();
// We may not be able to sastify the desired alignment specification of
- // the TargetRegisterClass if the stack alignment is smaller. Use the min.
+ // the TargetRegisterClass if the stack alignment is smaller.
+ // Use the min.
Align = std::min(Align, StackAlign);
FrameIdx = FFI->CreateStackObject(RC->getSize(), Align);
if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
@@ -236,41 +968,104 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
FFI->setCalleeSavedInfo(CSI);
}
-/// saveCalleeSavedRegisters - Insert spill code for any callee saved registers
-/// that are modified in the function.
+/// insertCSRSpillsAndRestores - Insert spill and restore code for
+/// callee saved registers used in the function, handling shrink wrapping.
///
-void PEI::saveCalleeSavedRegisters(MachineFunction &Fn) {
+void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
// Get callee saved register information.
MachineFrameInfo *FFI = Fn.getFrameInfo();
const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
-
+
// Early exit if no callee saved registers are modified!
if (CSI.empty())
return;
const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+ MachineBasicBlock::iterator I;
+ std::vector<CalleeSavedInfo> blockCSI;
+
+#ifndef NDEBUG
+ DOUT << "Inserting spill/restore code for CSRs in function "
+ << Fn.getFunction()->getName() << "\n";
+#endif
- // Now that we have a stack slot for each register to be saved, insert spill
- // code into the entry block.
- MachineBasicBlock *MBB = Fn.begin();
- MachineBasicBlock::iterator I = MBB->begin();
+ // Insert spills.
+ for (CSRegBlockMap::iterator
+ BI = CSRSave.begin(), BE = CSRSave.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet save = BI->second;
- if (!TII.spillCalleeSavedRegisters(*MBB, I, CSI)) {
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- // Add the callee-saved register as live-in. It's killed at the spill.
- MBB->addLiveIn(CSI[i].getReg());
+ if (save.empty())
+ continue;
+
+ if (! ShrinkWrapThisFunction) {
+ // Spill using target interface.
+ I = MBB->begin();
+ if (!TII.spillCalleeSavedRegisters(*MBB, I, CSI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB->addLiveIn(CSI[i].getReg());
+
+ // Insert the spill to the stack frame.
+ TII.storeRegToStackSlot(*MBB, I, CSI[i].getReg(), true,
+ CSI[i].getFrameIdx(), CSI[i].getRegClass());
+ }
+ }
+ } else {
+#ifndef NDEBUG
+ DOUT << "CSRSave[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(save, Fn) << "\n";
+#endif
+
+ blockCSI.clear();
+ for (CSRegSet::iterator RI = save.begin(),
+ RE = save.end(); RI != RE; ++RI) {
+ blockCSI.push_back(CSI[*RI]);
+ }
+ assert(blockCSI.size() > 0 &&
+ "Could not collect callee saved register info");
+
+ // If MBB has no uses of CSRs being saved, this means saves
+ // must be inserted at the _end_.
+ if (! MBB->empty() && ! CSRUsed[MBB].intersects(save)) {
+ I = MBB->end();
+ --I;
+ if (I->getDesc().isCall()) {
+ ++I;
+ } else {
+ MachineBasicBlock::iterator I2 = I;
+ while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+ I = I2;
+ }
+ } else {
+ I = MBB->begin();
+ }
- // Insert the spill to the stack frame.
- TII.storeRegToStackSlot(*MBB, I, CSI[i].getReg(), true,
- CSI[i].getFrameIdx(), CSI[i].getRegClass());
+ // When shrink wrapping, use stack slot stores/loads.
+ for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+ // Add the callee-saved register as live-in.
+ // It's killed at the spill.
+ MBB->addLiveIn(blockCSI[i].getReg());
+
+ // Insert the spill to the stack frame.
+ TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(),
+ true,
+ blockCSI[i].getFrameIdx(),
+ blockCSI[i].getRegClass());
+ }
}
}
-
- // Add code to restore the callee-save registers in each exiting block.
- for (MachineFunction::iterator FI = Fn.begin(), E = Fn.end(); FI != E; ++FI)
- // If last instruction is a return instruction, add an epilogue.
- if (!FI->empty() && FI->back().getDesc().isReturn()) {
- MBB = FI;
+ // Use CSRRestore to add code to restore the callee-saved registers in
+ // each block.
+ for (CSRegBlockMap::iterator
+ BI = CSRRestore.begin(), BE = CSRRestore.end(); BI != BE; ++BI) {
+ MachineBasicBlock* MBB = BI->first;
+ CSRegSet restore = BI->second;
+
+ if (restore.empty())
+ continue;
+ if (! ShrinkWrapThisFunction) {
+ // Restore using target interface.
I = MBB->end(); --I;
// Skip over all terminator instructions, which are part of the return
@@ -283,18 +1078,18 @@ void PEI::saveCalleeSavedRegisters(MachineFunction &Fn) {
MachineBasicBlock::iterator BeforeI = I;
if (!AtStart)
--BeforeI;
-
- // Restore all registers immediately before the return and any terminators
- // that preceed it.
+
+ // Restore all registers immediately before the return and any
+ // terminators that preceed it.
if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI)) {
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(),
- CSI[i].getFrameIdx(),
- CSI[i].getRegClass());
+ CSI[i].getFrameIdx(),
+ CSI[i].getRegClass());
assert(I != MBB->begin() &&
"loadRegFromStackSlot didn't insert any code!");
- // Insert in reverse order. loadRegFromStackSlot can insert multiple
- // instructions.
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
if (AtStart)
I = MBB->begin();
else {
@@ -303,7 +1098,79 @@ void PEI::saveCalleeSavedRegisters(MachineFunction &Fn) {
}
}
}
+ } else {
+#ifndef NDEBUG
+ DOUT << "CSRRestore[" << getBasicBlockName(MBB) << "] = "
+ << stringifyCSRegSet(restore, Fn) << "\n";
+#endif
+
+ blockCSI.clear();
+ for (CSRegSet::iterator RI = restore.begin(),
+ RE = restore.end(); RI != RE; ++RI) {
+ blockCSI.push_back(CSI[*RI]);
+ }
+ assert(blockCSI.size() > 0 &&
+ "Could not find callee saved register info");
+
+ // If MBB uses no CSRs but has restores, this means
+ // it must have restores inserted at the _beginning_.
+ // N.B. -- not necessary if edge splitting done.
+ if (MBB->empty() || ! CSRUsed[MBB].intersects(restore)) {
+ I = MBB->begin();
+ } else {
+ I = MBB->end();
+ --I;
+
+ // EXP iff spill/restore implemented with push/pop:
+ // append restore to block unless it ends in a
+ // barrier terminator instruction.
+
+ // Skip over all terminator instructions, which are part of the
+ // return sequence.
+ if (I->getDesc().isCall()) {
+ ++I;
+ } else {
+ MachineBasicBlock::iterator I2 = I;
+ while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+ I = I2;
+ }
+ }
+
+ bool AtStart = I == MBB->begin();
+ MachineBasicBlock::iterator BeforeI = I;
+ if (!AtStart)
+ --BeforeI;
+
+#ifndef NDEBUG
+ if (! MBB->empty() && ! CSRUsed[MBB].intersects(restore)) {
+ MachineInstr* MI = BeforeI;
+ DOUT << "adding restore after ";
+ DEBUG(MI->dump());
+ } else {
+ DOUT << "adding restore to beginning of "
+ << getBasicBlockName(MBB) << "\n";
+ }
+#endif
+
+ // Restore all registers immediately before the return and any
+ // terminators that preceed it.
+ for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+ TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(),
+ blockCSI[i].getFrameIdx(),
+ blockCSI[i].getRegClass());
+ assert(I != MBB->begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ if (AtStart)
+ I = MBB->begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
+ }
}
+ }
}
/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
@@ -358,8 +1225,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// If there are fixed sized objects that are preallocated in the local area,
// non-fixed objects can't be allocated right at the start of local area.
- // We currently don't support filling in holes in between fixed sized objects,
- // so we adjust 'Offset' to point to the end of last fixed sized
+ // We currently don't support filling in holes in between fixed sized
+ // objects, so we adjust 'Offset' to point to the end of last fixed sized
// preallocated object.
for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
int64_t FixedOff;
@@ -385,8 +1252,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
Offset += FFI->getObjectSize(i);
unsigned Align = FFI->getObjectAlignment(i);
- // If the alignment of this object is greater than that of the stack, then
- // increase the stack alignment to match.
+ // If the alignment of this object is greater than that of the stack,
+ // then increase the stack alignment to match.
MaxAlign = std::max(MaxAlign, Align);
// Adjust to alignment boundary
Offset = (Offset+Align-1)/Align*Align;
@@ -397,8 +1264,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
for (int i = MaxCSFI; i >= MinCSFI ; --i) {
unsigned Align = FFI->getObjectAlignment(i);
- // If the alignment of this object is greater than that of the stack, then
- // increase the stack alignment to match.
+ // If the alignment of this object is greater than that of the stack,
+ // then increase the stack alignment to match.
MaxAlign = std::max(MaxAlign, Align);
// Adjust to alignment boundary
Offset = (Offset+Align-1)/Align*Align;
@@ -453,7 +1320,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// offsets will be relative to SP not FP; round up the stack size so this
// works.
if (!RegInfo->targetHandlesStackFrameRounding() &&
- (FFI->hasCalls() || FFI->hasVarSizedObjects() ||
+ (FFI->hasCalls() || FFI->hasVarSizedObjects() ||
(RegInfo->needsStackRealignment(Fn) &&
FFI->getObjectIndexEnd() != 0))) {
// If we have reserved argument space for call sites in the function
@@ -509,9 +1376,11 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
int FrameSetupOpcode = TRI.getCallFrameSetupOpcode();
int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode();
- for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ for (MachineFunction::iterator BB = Fn.begin(),
+ E = Fn.end(); BB != E; ++BB) {
int SPAdj = 0; // SP offset due to call frame setup / destroy.
if (RS) RS->enterBasicBlock(BB);
+
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
if (I->getOpcode() == TargetInstrInfo::DECLARE) {
// Ignore it.
@@ -560,6 +1429,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
// If this instruction has a FrameIndex operand, we need to
// use that target machine register info object to eliminate
// it.
+
TRI.eliminateFrameIndex(MI, SPAdj, RS);
// Reset the iterator if we were at the beginning of the BB.