aboutsummaryrefslogtreecommitdiffstats
path: root/lib/CodeGen/MachineBlockPlacement.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/MachineBlockPlacement.cpp')
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp431
1 files changed, 229 insertions, 202 deletions
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 1b5c1f1..ecc50c9 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -40,13 +41,14 @@
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
-#define DEBUG_TYPE "block-placement2"
+#define DEBUG_TYPE "block-placement"
STATISTIC(NumCondBranches, "Number of conditional branches");
STATISTIC(NumUncondBranches, "Number of uncondittional branches");
@@ -61,11 +63,23 @@ static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
cl::init(0), cl::Hidden);
// FIXME: Find a good default for this flag and remove the flag.
-static cl::opt<unsigned>
-ExitBlockBias("block-placement-exit-block-bias",
- cl::desc("Block frequency percentage a loop exit block needs "
- "over the original exit to be considered the new exit."),
- cl::init(0), cl::Hidden);
+static cl::opt<unsigned> ExitBlockBias(
+ "block-placement-exit-block-bias",
+ cl::desc("Block frequency percentage a loop exit block needs "
+ "over the original exit to be considered the new exit."),
+ cl::init(0), cl::Hidden);
+
+static cl::opt<bool> OutlineOptionalBranches(
+ "outline-optional-branches",
+ cl::desc("Put completely optional branches, i.e. branches with a common "
+ "post dominator, out of line."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> OutlineOptionalThreshold(
+ "outline-optional-threshold",
+ cl::desc("Don't outline optional branches that are a single block with an "
+ "instruction count below this threshold"),
+ cl::init(4), cl::Hidden);
namespace {
class BlockChain;
@@ -107,7 +121,7 @@ public:
/// function. It also registers itself as the chain that block participates
/// in with the BlockToChain mapping.
BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
- : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
+ : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
assert(BB && "Cannot create a chain with a null basic block");
BlockToChain[BB] = this;
}
@@ -144,19 +158,18 @@ public:
// Update the incoming blocks to point to this chain, and add them to the
// chain structure.
- for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end();
- BI != BE; ++BI) {
- Blocks.push_back(*BI);
- assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain");
- BlockToChain[*BI] = this;
+ for (MachineBasicBlock *ChainBB : *Chain) {
+ Blocks.push_back(ChainBB);
+ assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain");
+ BlockToChain[ChainBB] = this;
}
}
#ifndef NDEBUG
/// \brief Dump the blocks in this chain.
LLVM_DUMP_METHOD void dump() {
- for (iterator I = begin(), E = end(); I != E; ++I)
- (*I)->dump();
+ for (MachineBasicBlock *MBB : *this)
+ MBB->dump();
}
#endif // NDEBUG
@@ -188,6 +201,13 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// \brief A handle to the target's lowering info.
const TargetLoweringBase *TLI;
+ /// \brief A handle to the post dominator tree.
+ MachineDominatorTree *MDT;
+
+ /// \brief A set of blocks that are unavoidably execute, i.e. they dominate
+ /// all terminators of the MachineFunction.
+ SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
+
/// \brief Allocator and owner of BlockChain structures.
///
/// We build BlockChains lazily while processing the loop structure of
@@ -205,28 +225,26 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// between basic blocks.
DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
- void markChainSuccessors(BlockChain &Chain,
- MachineBasicBlock *LoopHeaderBB,
+ void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter = nullptr);
MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
BlockChain &Chain,
const BlockFilterSet *BlockFilter);
- MachineBasicBlock *selectBestCandidateBlock(
- BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
- const BlockFilterSet *BlockFilter);
- MachineBasicBlock *getFirstUnplacedBlock(
- MachineFunction &F,
- const BlockChain &PlacedChain,
- MachineFunction::iterator &PrevUnplacedBlockIt,
- const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *
+ selectBestCandidateBlock(BlockChain &Chain,
+ SmallVectorImpl<MachineBasicBlock *> &WorkList,
+ const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *
+ getFirstUnplacedBlock(MachineFunction &F, const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter);
void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter = nullptr);
MachineBasicBlock *findBestLoopTop(MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
- MachineBasicBlock *findBestLoopExit(MachineFunction &F,
- MachineLoop &L,
+ MachineBasicBlock *findBestLoopExit(MachineFunction &F, MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
void buildLoopChains(MachineFunction &F, MachineLoop &L);
void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
@@ -244,6 +262,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -252,12 +271,13 @@ public:
char MachineBlockPlacement::ID = 0;
char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
-INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2",
+INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement",
"Branch Probability Basic Block Placement", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
+INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement",
"Branch Probability Basic Block Placement", false, false)
#ifndef NDEBUG
@@ -267,8 +287,8 @@ INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
static std::string getBlockName(MachineBasicBlock *BB) {
std::string Result;
raw_string_ostream OS(Result);
- OS << "BB#" << BB->getNumber()
- << " (derived from LLVM BB '" << BB->getName() << "')";
+ OS << "BB#" << BB->getNumber();
+ OS << " (derived from LLVM BB '" << BB->getName() << "')";
OS.flush();
return Result;
}
@@ -292,26 +312,22 @@ static std::string getBlockNum(MachineBasicBlock *BB) {
/// having one fewer active predecessor. It also adds any successors of this
/// chain which reach the zero-predecessor state to the worklist passed in.
void MachineBlockPlacement::markChainSuccessors(
- BlockChain &Chain,
- MachineBasicBlock *LoopHeaderBB,
+ BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter) {
// Walk all the blocks in this chain, marking their successors as having
// a predecessor placed.
- for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end();
- CBI != CBE; ++CBI) {
+ for (MachineBasicBlock *MBB : Chain) {
// Add any successors for which this is the only un-placed in-loop
// predecessor to the worklist as a viable candidate for CFG-neutral
// placement. No subsequent placement of this block will violate the CFG
// shape, so we get to use heuristics to choose a favorable placement.
- for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(),
- SE = (*CBI)->succ_end();
- SI != SE; ++SI) {
- if (BlockFilter && !BlockFilter->count(*SI))
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ if (BlockFilter && !BlockFilter->count(Succ))
continue;
- BlockChain &SuccChain = *BlockToChain[*SI];
+ BlockChain &SuccChain = *BlockToChain[Succ];
// Disregard edges within a fixed chain, or edges to the loop header.
- if (&Chain == &SuccChain || *SI == LoopHeaderBB)
+ if (&Chain == &SuccChain || Succ == LoopHeaderBB)
continue;
// This is a cross-chain edge that is within the loop, so decrement the
@@ -331,9 +347,10 @@ void MachineBlockPlacement::markChainSuccessors(
/// very hot successor edges.
///
/// \returns The best successor block found, or null if none are viable.
-MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
- MachineBasicBlock *BB, BlockChain &Chain,
- const BlockFilterSet *BlockFilter) {
+MachineBasicBlock *
+MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
+ BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
const BranchProbability HotProb(4, 5); // 80%
MachineBasicBlock *BestSucc = nullptr;
@@ -363,6 +380,30 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ);
BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+ // If we outline optional branches, look whether Succ is unavoidable, i.e.
+ // dominates all terminators of the MachineFunction. If it does, other
+ // successors must be optional. Don't do this for cold branches.
+ if (OutlineOptionalBranches && SuccProb > HotProb.getCompl() &&
+ UnavoidableBlocks.count(Succ) > 0) {
+ auto HasShortOptionalBranch = [&]() {
+ for (MachineBasicBlock *Pred : Succ->predecessors()) {
+ // Check whether there is an unplaced optional branch.
+ if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
+ BlockToChain[Pred] == &Chain)
+ continue;
+ // Check whether the optional branch has exactly one BB.
+ if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB)
+ continue;
+ // Check whether the optional branch is small.
+ if (Pred->size() < OutlineOptionalThreshold)
+ return true;
+ }
+ return false;
+ };
+ if (!HasShortOptionalBranch())
+ return Succ;
+ }
+
// Only consider successors which are either "hot", or wouldn't violate
// any CFG constraints.
if (SuccChain.LoopPredecessors != 0) {
@@ -426,29 +467,26 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
// some code complexity) into the loop below.
WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
[&](MachineBasicBlock *BB) {
- return BlockToChain.lookup(BB) == &Chain;
- }),
+ return BlockToChain.lookup(BB) == &Chain;
+ }),
WorkList.end());
MachineBasicBlock *BestBlock = nullptr;
BlockFrequency BestFreq;
- for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(),
- WBE = WorkList.end();
- WBI != WBE; ++WBI) {
- BlockChain &SuccChain = *BlockToChain[*WBI];
+ for (MachineBasicBlock *MBB : WorkList) {
+ BlockChain &SuccChain = *BlockToChain[MBB];
if (&SuccChain == &Chain) {
- DEBUG(dbgs() << " " << getBlockName(*WBI)
- << " -> Already merged!\n");
+ DEBUG(dbgs() << " " << getBlockName(MBB) << " -> Already merged!\n");
continue;
}
assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
- BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI);
- DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> ";
- MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
+ BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
+ DEBUG(dbgs() << " " << getBlockName(MBB) << " -> ";
+ MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
if (BestBlock && BestFreq >= CandidateFreq)
continue;
- BestBlock = *WBI;
+ BestBlock = MBB;
BestFreq = CandidateFreq;
}
return BestBlock;
@@ -481,8 +519,7 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
}
void MachineBlockPlacement::buildChain(
- MachineBasicBlock *BB,
- BlockChain &Chain,
+ MachineBasicBlock *BB, BlockChain &Chain,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter) {
assert(BB);
@@ -509,8 +546,8 @@ void MachineBlockPlacement::buildChain(
BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
if (!BestSucc) {
- BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt,
- BlockFilter);
+ BestSucc =
+ getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, BlockFilter);
if (!BestSucc)
break;
@@ -523,8 +560,8 @@ void MachineBlockPlacement::buildChain(
// Zero out LoopPredecessors for the successor we're about to merge in case
// we selected a successor that didn't fit naturally into the CFG.
SuccChain.LoopPredecessors = 0;
- DEBUG(dbgs() << "Merging from " << getBlockNum(BB)
- << " to " << getBlockNum(BestSucc) << "\n");
+ DEBUG(dbgs() << "Merging from " << getBlockNum(BB) << " to "
+ << getBlockNum(BestSucc) << "\n");
markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
Chain.merge(BestSucc, &SuccChain);
BB = *std::prev(Chain.end());
@@ -554,20 +591,17 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
if (!LoopBlockSet.count(*HeaderChain.begin()))
return L.getHeader();
- DEBUG(dbgs() << "Finding best loop top for: "
- << getBlockName(L.getHeader()) << "\n");
+ DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(L.getHeader())
+ << "\n");
BlockFrequency BestPredFreq;
MachineBasicBlock *BestPred = nullptr;
- for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(),
- PE = L.getHeader()->pred_end();
- PI != PE; ++PI) {
- MachineBasicBlock *Pred = *PI;
+ for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {
if (!LoopBlockSet.count(Pred))
continue;
DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", "
<< Pred->succ_size() << " successors, ";
- MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
+ MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
if (Pred->succ_size() > 1)
continue;
@@ -594,15 +628,13 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
return BestPred;
}
-
/// \brief Find the best loop exiting block for layout.
///
/// This routine implements the logic to analyze the loop looking for the best
/// block to layout at the top of the loop. Typically this is done to maximize
/// fallthrough opportunities.
MachineBasicBlock *
-MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
- MachineLoop &L,
+MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
// We don't want to layout the loop linearly in all cases. If the loop header
// is just a normal basic block in the loop, we want to look for what block
@@ -624,15 +656,13 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
// blocks where rotating to exit with that block will reach an outer loop.
SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
- DEBUG(dbgs() << "Finding best loop exit for: "
- << getBlockName(L.getHeader()) << "\n");
- for (MachineLoop::block_iterator I = L.block_begin(),
- E = L.block_end();
- I != E; ++I) {
- BlockChain &Chain = *BlockToChain[*I];
+ DEBUG(dbgs() << "Finding best loop exit for: " << getBlockName(L.getHeader())
+ << "\n");
+ for (MachineBasicBlock *MBB : L.getBlocks()) {
+ BlockChain &Chain = *BlockToChain[MBB];
// Ensure that this block is at the end of a chain; otherwise it could be
// mid-way through an inner loop or a successor of an analyzable branch.
- if (*I != *std::prev(Chain.end()))
+ if (MBB != *std::prev(Chain.end()))
continue;
// Now walk the successors. We need to establish whether this has a viable
@@ -646,43 +676,40 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
// the MBPI analysis, we use the internal weights and manually compute the
// probabilities to avoid quadratic behavior.
uint32_t WeightScale = 0;
- uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale);
- for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(),
- SE = (*I)->succ_end();
- SI != SE; ++SI) {
- if ((*SI)->isLandingPad())
+ uint32_t SumWeight = MBPI->getSumForBlock(MBB, WeightScale);
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ if (Succ->isLandingPad())
continue;
- if (*SI == *I)
+ if (Succ == MBB)
continue;
- BlockChain &SuccChain = *BlockToChain[*SI];
+ BlockChain &SuccChain = *BlockToChain[Succ];
// Don't split chains, either this chain or the successor's chain.
if (&Chain == &SuccChain) {
- DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> "
- << getBlockName(*SI) << " (chain conflict)\n");
+ DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
+ << getBlockName(Succ) << " (chain conflict)\n");
continue;
}
- uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI);
- if (LoopBlockSet.count(*SI)) {
- DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> "
- << getBlockName(*SI) << " (" << SuccWeight << ")\n");
+ uint32_t SuccWeight = MBPI->getEdgeWeight(MBB, Succ);
+ if (LoopBlockSet.count(Succ)) {
+ DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> "
+ << getBlockName(Succ) << " (" << SuccWeight << ")\n");
HasLoopingSucc = true;
continue;
}
unsigned SuccLoopDepth = 0;
- if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) {
+ if (MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) {
SuccLoopDepth = ExitLoop->getLoopDepth();
if (ExitLoop->contains(&L))
- BlocksExitingToOuterLoop.insert(*I);
+ BlocksExitingToOuterLoop.insert(MBB);
}
BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
- BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb;
- DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> "
- << getBlockName(*SI) << " [L:" << SuccLoopDepth
- << "] (";
- MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
+ BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
+ DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
+ << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] (";
+ MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
// Note that we bias this toward an existing layout successor to retain
// incoming order in the absence of better information. The exit must have
// a frequency higher than the current exit before we consider breaking
@@ -690,10 +717,10 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
BranchProbability Bias(100 - ExitBlockBias, 100);
if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth ||
ExitEdgeFreq > BestExitEdgeFreq ||
- ((*I)->isLayoutSuccessor(*SI) &&
+ (MBB->isLayoutSuccessor(Succ) &&
!(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
BestExitEdgeFreq = ExitEdgeFreq;
- ExitingBB = *I;
+ ExitingBB = MBB;
}
}
@@ -734,12 +761,10 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
MachineBasicBlock *Top = *LoopChain.begin();
bool ViableTopFallthrough = false;
- for (MachineBasicBlock::pred_iterator PI = Top->pred_begin(),
- PE = Top->pred_end();
- PI != PE; ++PI) {
- BlockChain *PredChain = BlockToChain[*PI];
- if (!LoopBlockSet.count(*PI) &&
- (!PredChain || *PI == *std::prev(PredChain->end()))) {
+ for (MachineBasicBlock *Pred : Top->predecessors()) {
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (!LoopBlockSet.count(Pred) &&
+ (!PredChain || Pred == *std::prev(PredChain->end()))) {
ViableTopFallthrough = true;
break;
}
@@ -750,18 +775,16 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
// introduce an unnecessary branch.
if (ViableTopFallthrough) {
MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
- for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(),
- SE = Bottom->succ_end();
- SI != SE; ++SI) {
- BlockChain *SuccChain = BlockToChain[*SI];
- if (!LoopBlockSet.count(*SI) &&
- (!SuccChain || *SI == *SuccChain->begin()))
+ for (MachineBasicBlock *Succ : Bottom->successors()) {
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if (!LoopBlockSet.count(Succ) &&
+ (!SuccChain || Succ == *SuccChain->begin()))
return;
}
}
- BlockChain::iterator ExitIt = std::find(LoopChain.begin(), LoopChain.end(),
- ExitingBB);
+ BlockChain::iterator ExitIt =
+ std::find(LoopChain.begin(), LoopChain.end(), ExitingBB);
if (ExitIt == LoopChain.end())
return;
@@ -778,8 +801,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
MachineLoop &L) {
// First recurse through any nested loops, building chains for those inner
// loops.
- for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
- buildLoopChains(F, **LI);
+ for (MachineLoop *InnerLoop : L)
+ buildLoopChains(F, *InnerLoop);
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
@@ -805,21 +828,16 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
SmallPtrSet<BlockChain *, 4> UpdatedPreds;
assert(LoopChain.LoopPredecessors == 0);
UpdatedPreds.insert(&LoopChain);
- for (MachineLoop::block_iterator BI = L.block_begin(),
- BE = L.block_end();
- BI != BE; ++BI) {
- BlockChain &Chain = *BlockToChain[*BI];
+ for (MachineBasicBlock *LoopBB : L.getBlocks()) {
+ BlockChain &Chain = *BlockToChain[LoopBB];
if (!UpdatedPreds.insert(&Chain).second)
continue;
assert(Chain.LoopPredecessors == 0);
- for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
- BCI != BCE; ++BCI) {
- assert(BlockToChain[*BCI] == &Chain);
- for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
- PE = (*BCI)->pred_end();
- PI != PE; ++PI) {
- if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI))
+ for (MachineBasicBlock *ChainBB : Chain) {
+ assert(BlockToChain[ChainBB] == &Chain);
+ for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
+ if (BlockToChain[Pred] == &Chain || !LoopBlockSet.count(Pred))
continue;
++Chain.LoopPredecessors;
}
@@ -841,29 +859,26 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
<< " Loop header: " << getBlockName(*L.block_begin()) << "\n"
<< " Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
}
- for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end();
- BCI != BCE; ++BCI) {
- dbgs() << " ... " << getBlockName(*BCI) << "\n";
- if (!LoopBlockSet.erase(*BCI)) {
+ for (MachineBasicBlock *ChainBB : LoopChain) {
+ dbgs() << " ... " << getBlockName(ChainBB) << "\n";
+ if (!LoopBlockSet.erase(ChainBB)) {
// We don't mark the loop as bad here because there are real situations
// where this can occur. For example, with an unanalyzable fallthrough
// from a loop block to a non-loop block or vice versa.
dbgs() << "Loop chain contains a block not contained by the loop!\n"
<< " Loop header: " << getBlockName(*L.block_begin()) << "\n"
<< " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
- << " Bad block: " << getBlockName(*BCI) << "\n";
+ << " Bad block: " << getBlockName(ChainBB) << "\n";
}
}
if (!LoopBlockSet.empty()) {
BadLoop = true;
- for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(),
- LBE = LoopBlockSet.end();
- LBI != LBE; ++LBI)
+ for (MachineBasicBlock *LoopBB : LoopBlockSet)
dbgs() << "Loop contains blocks never placed into a chain!\n"
<< " Loop header: " << getBlockName(*L.block_begin()) << "\n"
<< " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
- << " Bad block: " << getBlockName(*LBI) << "\n";
+ << " Bad block: " << getBlockName(LoopBB) << "\n";
}
assert(!BadLoop && "Detected problems with the placement of this loop.");
});
@@ -875,8 +890,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
MachineBasicBlock *BB = FI;
- BlockChain *Chain
- = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
+ BlockChain *Chain =
+ new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
// Also, merge any blocks which we cannot reason about and must preserve
// the exact fallthrough behavior for.
for (;;) {
@@ -899,28 +914,44 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
}
}
+ if (OutlineOptionalBranches) {
+ // Find the nearest common dominator of all of F's terminators.
+ MachineBasicBlock *Terminator = nullptr;
+ for (MachineBasicBlock &MBB : F) {
+ if (MBB.succ_size() == 0) {
+ if (Terminator == nullptr)
+ Terminator = &MBB;
+ else
+ Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
+ }
+ }
+
+ // MBBs dominating this common dominator are unavoidable.
+ UnavoidableBlocks.clear();
+ for (MachineBasicBlock &MBB : F) {
+ if (MDT->dominates(&MBB, Terminator)) {
+ UnavoidableBlocks.insert(&MBB);
+ }
+ }
+ }
+
// Build any loop-based chains.
- for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
- ++LI)
- buildLoopChains(F, **LI);
+ for (MachineLoop *L : *MLI)
+ buildLoopChains(F, *L);
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
SmallPtrSet<BlockChain *, 4> UpdatedPreds;
- for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
- MachineBasicBlock *BB = &*FI;
- BlockChain &Chain = *BlockToChain[BB];
+ for (MachineBasicBlock &MBB : F) {
+ BlockChain &Chain = *BlockToChain[&MBB];
if (!UpdatedPreds.insert(&Chain).second)
continue;
assert(Chain.LoopPredecessors == 0);
- for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
- BCI != BCE; ++BCI) {
- assert(BlockToChain[*BCI] == &Chain);
- for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
- PE = (*BCI)->pred_end();
- PI != PE; ++PI) {
- if (BlockToChain[*PI] == &Chain)
+ for (MachineBasicBlock *ChainBB : Chain) {
+ assert(BlockToChain[ChainBB] == &Chain);
+ for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
+ if (BlockToChain[Pred] == &Chain)
continue;
++Chain.LoopPredecessors;
}
@@ -940,46 +971,40 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// Crash at the end so we get all of the debugging output first.
bool BadFunc = false;
FunctionBlockSetType FunctionBlockSet;
- for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
- FunctionBlockSet.insert(FI);
+ for (MachineBasicBlock &MBB : F)
+ FunctionBlockSet.insert(&MBB);
- for (BlockChain::iterator BCI = FunctionChain.begin(),
- BCE = FunctionChain.end();
- BCI != BCE; ++BCI)
- if (!FunctionBlockSet.erase(*BCI)) {
+ for (MachineBasicBlock *ChainBB : FunctionChain)
+ if (!FunctionBlockSet.erase(ChainBB)) {
BadFunc = true;
dbgs() << "Function chain contains a block not in the function!\n"
- << " Bad block: " << getBlockName(*BCI) << "\n";
+ << " Bad block: " << getBlockName(ChainBB) << "\n";
}
if (!FunctionBlockSet.empty()) {
BadFunc = true;
- for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(),
- FBE = FunctionBlockSet.end();
- FBI != FBE; ++FBI)
+ for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
dbgs() << "Function contains blocks never placed into a chain!\n"
- << " Bad block: " << getBlockName(*FBI) << "\n";
+ << " Bad block: " << getBlockName(RemainingBB) << "\n";
}
assert(!BadFunc && "Detected problems with the block placement.");
});
// Splice the blocks into place.
MachineFunction::iterator InsertPos = F.begin();
- for (BlockChain::iterator BI = FunctionChain.begin(),
- BE = FunctionChain.end();
- BI != BE; ++BI) {
- DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain "
- : " ... ")
- << getBlockName(*BI) << "\n");
- if (InsertPos != MachineFunction::iterator(*BI))
- F.splice(InsertPos, *BI);
+ for (MachineBasicBlock *ChainBB : FunctionChain) {
+ DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain "
+ : " ... ")
+ << getBlockName(ChainBB) << "\n");
+ if (InsertPos != MachineFunction::iterator(ChainBB))
+ F.splice(InsertPos, ChainBB);
else
++InsertPos;
// Update the terminator of the previous block.
- if (BI == FunctionChain.begin())
+ if (ChainBB == *FunctionChain.begin())
continue;
- MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(*BI));
+ MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(ChainBB));
// FIXME: It would be awesome of updateTerminator would just return rather
// than assert when the branch cannot be analyzed in order to remove this
@@ -989,16 +1014,16 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
// The "PrevBB" is not yet updated to reflect current code layout, so,
// o. it may fall-through to a block without explict "goto" instruction
- // before layout, and no longer fall-through it after layout; or
+ // before layout, and no longer fall-through it after layout; or
// o. just opposite.
- //
+ //
// AnalyzeBranch() may return erroneous value for FBB when these two
// situations take place. For the first scenario FBB is mistakenly set
// NULL; for the 2nd scenario, the FBB, which is expected to be NULL,
// is mistakenly pointing to "*BI".
//
bool needUpdateBr = true;
- if (!Cond.empty() && (!FBB || FBB == *BI)) {
+ if (!Cond.empty() && (!FBB || FBB == ChainBB)) {
PrevBB->updateTerminator();
needUpdateBr = false;
Cond.clear();
@@ -1018,7 +1043,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
<< getBlockName(PrevBB) << "\n");
DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB)
<< " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n");
- DebugLoc dl; // FIXME: this is nowhere
+ DebugLoc dl; // FIXME: this is nowhere
TII->RemoveBranch(*PrevBB);
TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
needUpdateBr = true;
@@ -1042,29 +1067,30 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
return;
if (FunctionChain.begin() == FunctionChain.end())
- return; // Empty chain.
+ return; // Empty chain.
const BranchProbability ColdProb(1, 5); // 20%
BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
- for (BlockChain::iterator BI = std::next(FunctionChain.begin()),
- BE = FunctionChain.end();
- BI != BE; ++BI) {
+ for (MachineBasicBlock *ChainBB : FunctionChain) {
+ if (ChainBB == *FunctionChain.begin())
+ continue;
+
// Don't align non-looping basic blocks. These are unlikely to execute
// enough times to matter in practice. Note that we'll still handle
// unnatural CFGs inside of a natural outer loop (the common case) and
// rotated loops.
- MachineLoop *L = MLI->getLoopFor(*BI);
+ MachineLoop *L = MLI->getLoopFor(ChainBB);
if (!L)
continue;
unsigned Align = TLI->getPrefLoopAlignment(L);
if (!Align)
- continue; // Don't care about loop alignment.
+ continue; // Don't care about loop alignment.
// If the block is cold relative to the function entry don't waste space
// aligning it.
- BlockFrequency Freq = MBFI->getBlockFreq(*BI);
+ BlockFrequency Freq = MBFI->getBlockFreq(ChainBB);
if (Freq < WeightedEntryFreq)
continue;
@@ -1077,12 +1103,13 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// Check for the existence of a non-layout predecessor which would benefit
// from aligning this block.
- MachineBasicBlock *LayoutPred = *std::prev(BI);
+ MachineBasicBlock *LayoutPred =
+ &*std::prev(MachineFunction::iterator(ChainBB));
// Force alignment if all the predecessors are jumps. We already checked
// that the block isn't cold above.
- if (!LayoutPred->isSuccessor(*BI)) {
- (*BI)->setAlignment(Align);
+ if (!LayoutPred->isSuccessor(ChainBB)) {
+ ChainBB->setAlignment(Align);
continue;
}
@@ -1090,10 +1117,11 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// cold relative to the block. When this is true, other predecessors make up
// all of the hot entries into the block and thus alignment is likely to be
// important.
- BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI);
+ BranchProbability LayoutProb =
+ MBPI->getEdgeProbability(LayoutPred, ChainBB);
BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
if (LayoutEdgeFreq <= (Freq * ColdProb))
- (*BI)->setAlignment(Align);
+ ChainBB->setAlignment(Align);
}
}
@@ -1110,6 +1138,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
MLI = &getAnalysis<MachineLoopInfo>();
TII = F.getSubtarget().getInstrInfo();
TLI = F.getSubtarget().getTargetLowering();
+ MDT = &getAnalysis<MachineDominatorTree>();
assert(BlockToChain.empty());
buildCFGChains(F);
@@ -1119,9 +1148,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
if (AlignAllBlock)
// Align all of the blocks in the function to a specific alignment.
- for (MachineFunction::iterator FI = F.begin(), FE = F.end();
- FI != FE; ++FI)
- FI->setAlignment(AlignAllBlock);
+ for (MachineBasicBlock &MBB : F)
+ MBB.setAlignment(AlignAllBlock);
// We always return true as we have no way to track whether the final order
// differs from the original order.
@@ -1176,20 +1204,19 @@ bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
- for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- BlockFrequency BlockFreq = MBFI->getBlockFreq(I);
- Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches
- : NumUncondBranches;
- Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq
- : UncondBranchTakenFreq;
- for (MachineBasicBlock::succ_iterator SI = I->succ_begin(),
- SE = I->succ_end();
- SI != SE; ++SI) {
+ for (MachineBasicBlock &MBB : F) {
+ BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
+ Statistic &NumBranches =
+ (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;
+ Statistic &BranchTakenFreq =
+ (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
+ for (MachineBasicBlock *Succ : MBB.successors()) {
// Skip if this successor is a fallthrough.
- if (I->isLayoutSuccessor(*SI))
+ if (MBB.isLayoutSuccessor(Succ))
continue;
- BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI);
+ BlockFrequency EdgeFreq =
+ BlockFreq * MBPI->getEdgeProbability(&MBB, Succ);
++NumBranches;
BranchTakenFreq += EdgeFreq.getFrequency();
}