aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms
diff options
context:
space:
mode:
authorCameron Zwarich <zwarich@apple.com>2011-01-17 01:08:59 +0000
committerCameron Zwarich <zwarich@apple.com>2011-01-17 01:08:59 +0000
commitebed6de7b10d20721f5bd30ed3730cadefed7963 (patch)
tree0f000eba9f826b76a7b83cdb2064a2d8009d9f94 /lib/Transforms
parente7a820c208415094723adabd8336d3a036692ea4 (diff)
downloadexternal_llvm-ebed6de7b10d20721f5bd30ed3730cadefed7963.zip
external_llvm-ebed6de7b10d20721f5bd30ed3730cadefed7963.tar.gz
external_llvm-ebed6de7b10d20721f5bd30ed3730cadefed7963.tar.bz2
Eliminate the use of dominance frontiers in PromoteMemToReg. In addition to
eliminating a potentially quadratic data structure, this also gives a 17% speedup when running -scalarrepl on test-suite + SPEC2000 + SPEC2006. My initial experiment gave a greater speedup around 25%, but I moved the dominator tree level computation from dominator tree construction to PromoteMemToReg. Since this approach to computing IDFs has a much lower overhead than the old code using precomputed DFs, it is worth looking at using this new code for the second scalarrepl pass as well. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123609 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp9
-rw-r--r--lib/Transforms/Utils/Mem2Reg.cpp5
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp167
3 files changed, 121 insertions, 60 deletions
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index a068556..5c90a36 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -152,7 +152,6 @@ namespace {
// will not alter the CFG, so say so.
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
- AU.addRequired<DominanceFrontier>();
AU.setPreservesCFG();
}
};
@@ -180,7 +179,6 @@ char SROA_SSAUp::ID = 0;
INITIALIZE_PASS_BEGIN(SROA_DF, "scalarrepl",
"Scalar Replacement of Aggregates (DF)", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
-INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
INITIALIZE_PASS_END(SROA_DF, "scalarrepl",
"Scalar Replacement of Aggregates (DF)", false, false)
@@ -877,11 +875,8 @@ public:
bool SROA::performPromotion(Function &F) {
std::vector<AllocaInst*> Allocas;
DominatorTree *DT = 0;
- DominanceFrontier *DF = 0;
- if (HasDomFrontiers) {
+ if (HasDomFrontiers)
DT = &getAnalysis<DominatorTree>();
- DF = &getAnalysis<DominanceFrontier>();
- }
BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
@@ -900,7 +895,7 @@ bool SROA::performPromotion(Function &F) {
if (Allocas.empty()) break;
if (HasDomFrontiers)
- PromoteMemToReg(Allocas, *DT, *DF);
+ PromoteMemToReg(Allocas, *DT);
else {
SSAUpdater SSA;
for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 2b1364c..ea2f8fc 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -40,7 +40,6 @@ namespace {
//
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
- AU.addRequired<DominanceFrontier>();
AU.setPreservesCFG();
// This is a cluster of orthogonal Transforms
AU.addPreserved<UnifyFunctionExitNodes>();
@@ -54,7 +53,6 @@ char PromotePass::ID = 0;
INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",
false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
-INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",
false, false)
@@ -66,7 +64,6 @@ bool PromotePass::runOnFunction(Function &F) {
bool Changed = false;
DominatorTree &DT = getAnalysis<DominatorTree>();
- DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
while (1) {
Allocas.clear();
@@ -80,7 +77,7 @@ bool PromotePass::runOnFunction(Function &F) {
if (Allocas.empty()) break;
- PromoteMemToReg(Allocas, DT, DF);
+ PromoteMemToReg(Allocas, DT);
NumPromoted += Allocas.size();
Changed = true;
}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index d00f870..a4d7c15 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -9,10 +9,19 @@
//
// This file promotes memory references to be register references. It promotes
// alloca instructions which only have loads and stores as uses. An alloca is
-// transformed by using dominator frontiers to place PHI nodes, then traversing
-// the function in depth-first order to rewrite loads and stores as appropriate.
-// This is just the standard SSA construction algorithm to construct "pruned"
-// SSA form.
+// transformed by using iterated dominator frontiers to place PHI nodes, then
+// traversing the function in depth-first order to rewrite loads and stores as
+// appropriate.
+//
+// The algorithm used here is based on:
+//
+// Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
+// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
+// Programming Languages
+// POPL '95. ACM, New York, NY, 62-73.
+//
+// It has been modified to not explicitly use the DJ graph data structure and to
+// directly compute pruned SSA using per-variable liveness information.
//
//===----------------------------------------------------------------------===//
@@ -35,6 +44,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/CFG.h"
#include <algorithm>
+#include <queue>
using namespace llvm;
STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
@@ -179,7 +189,6 @@ namespace {
///
std::vector<AllocaInst*> Allocas;
DominatorTree &DT;
- DominanceFrontier &DF;
DIFactory *DIF;
/// AST - An AliasSetTracker object to update. If null, don't update it.
@@ -217,12 +226,15 @@ namespace {
/// non-determinstic behavior.
DenseMap<BasicBlock*, unsigned> BBNumbers;
+ /// DomLevels - Maps DomTreeNodes to their level in the dominator tree.
+ DenseMap<DomTreeNode*, unsigned> DomLevels;
+
/// BBNumPreds - Lazily compute the number of predecessors a block has.
DenseMap<const BasicBlock*, unsigned> BBNumPreds;
public:
PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
- DominanceFrontier &df, AliasSetTracker *ast)
- : Allocas(A), DT(dt), DF(df), DIF(0), AST(ast) {}
+ AliasSetTracker *ast)
+ : Allocas(A), DT(dt), DIF(0), AST(ast) {}
~PromoteMem2Reg() {
delete DIF;
}
@@ -329,7 +341,7 @@ namespace {
void PromoteMem2Reg::run() {
- Function &F = *DF.getRoot()->getParent();
+ Function &F = *DT.getRoot()->getParent();
if (AST) PointerAllocaValues.resize(Allocas.size());
AllocaDbgDeclares.resize(Allocas.size());
@@ -422,7 +434,26 @@ void PromoteMem2Reg::run() {
continue;
}
}
-
+
+ // If we haven't computed dominator tree levels, do so now.
+ if (DomLevels.empty()) {
+ SmallVector<DomTreeNode*, 32> Worklist;
+
+ DomTreeNode *Root = DT.getRootNode();
+ DomLevels[Root] = 0;
+ Worklist.push_back(Root);
+
+ while (!Worklist.empty()) {
+ DomTreeNode *Node = Worklist.pop_back_val();
+ unsigned ChildLevel = DomLevels[Node] + 1;
+ for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
+ CI != CE; ++CI) {
+ DomLevels[*CI] = ChildLevel;
+ Worklist.push_back(*CI);
+ }
+ }
+ }
+
// If we haven't computed a numbering for the BB's in the function, do so
// now.
if (BBNumbers.empty()) {
@@ -657,6 +688,14 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
}
}
+typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair;
+
+struct DomTreeNodeCompare {
+ bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
+ return LHS.second <= RHS.second;
+ }
+};
+
/// DetermineInsertionPoint - At this point, we're committed to promoting the
/// alloca using IDF's, and the standard SSA construction algorithm. Determine
/// which blocks need phi nodes and see if we can optimize out some work by
@@ -673,46 +712,77 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
SmallPtrSet<BasicBlock*, 32> LiveInBlocks;
ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
- // Compute the locations where PhiNodes need to be inserted. Look at the
- // dominance frontier of EACH basic-block we have a write in.
- unsigned CurrentVersion = 0;
+ // Use a priority queue keyed on dominator tree level so that inserted nodes
+ // are handled from the bottom of the dominator tree upwards.
+ typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
+ DomTreeNodeCompare> IDFPriorityQueue;
+ IDFPriorityQueue PQ;
+
+ for (unsigned i = 0, e = Info.DefiningBlocks.size(); i != e; ++i) {
+ if (DomTreeNode *Node = DT.getNode(Info.DefiningBlocks[i]))
+ PQ.push(std::make_pair(Node, DomLevels[Node]));
+ }
+
std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks;
- while (!Info.DefiningBlocks.empty()) {
- BasicBlock *BB = Info.DefiningBlocks.back();
- Info.DefiningBlocks.pop_back();
-
- // Look up the DF for this write, add it to defining blocks.
- DominanceFrontier::const_iterator it = DF.find(BB);
- if (it == DF.end()) continue;
-
- const DominanceFrontier::DomSetType &S = it->second;
-
- // In theory we don't need the indirection through the DFBlocks vector.
- // In practice, the order of calling QueuePhiNode would depend on the
- // (unspecified) ordering of basic blocks in the dominance frontier,
- // which would give PHI nodes non-determinstic subscripts. Fix this by
- // processing blocks in order of the occurance in the function.
- for (DominanceFrontier::DomSetType::const_iterator P = S.begin(),
- PE = S.end(); P != PE; ++P) {
- // If the frontier block is not in the live-in set for the alloca, don't
- // bother processing it.
- if (!LiveInBlocks.count(*P))
- continue;
-
- DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P));
- }
-
- // Sort by which the block ordering in the function.
- if (DFBlocks.size() > 1)
- std::sort(DFBlocks.begin(), DFBlocks.end());
-
- for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) {
- BasicBlock *BB = DFBlocks[i].second;
- if (QueuePhiNode(BB, AllocaNum, CurrentVersion))
- Info.DefiningBlocks.push_back(BB);
+ SmallPtrSet<DomTreeNode*, 32> Visited;
+ SmallVector<DomTreeNode*, 32> Worklist;
+ while (!PQ.empty()) {
+ DomTreeNodePair RootPair = PQ.top();
+ PQ.pop();
+ DomTreeNode *Root = RootPair.first;
+ unsigned RootLevel = RootPair.second;
+
+ // Walk all dominator tree children of Root, inspecting their CFG edges with
+ // targets elsewhere on the dominator tree. Only targets whose level is at
+ // most Root's level are added to the iterated dominance frontier of the
+ // definition set.
+
+ Worklist.clear();
+ Worklist.push_back(Root);
+
+ while (!Worklist.empty()) {
+ DomTreeNode *Node = Worklist.pop_back_val();
+ BasicBlock *BB = Node->getBlock();
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
+ ++SI) {
+ DomTreeNode *SuccNode = DT.getNode(*SI);
+
+ // Quickly skip all CFG edges that are also dominator tree edges instead
+ // of catching them below.
+ if (SuccNode->getIDom() == Node)
+ continue;
+
+ unsigned SuccLevel = DomLevels[SuccNode];
+ if (SuccLevel > RootLevel)
+ continue;
+
+ if (!Visited.insert(SuccNode))
+ continue;
+
+ BasicBlock *SuccBB = SuccNode->getBlock();
+ if (!LiveInBlocks.count(SuccBB))
+ continue;
+
+ DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
+ if (!DefBlocks.count(SuccBB))
+ PQ.push(std::make_pair(SuccNode, SuccLevel));
+ }
+
+ for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
+ ++CI) {
+ if (!Visited.count(*CI))
+ Worklist.push_back(*CI);
+ }
}
- DFBlocks.clear();
}
+
+ if (DFBlocks.size() > 1)
+ std::sort(DFBlocks.begin(), DFBlocks.end());
+
+ unsigned CurrentVersion = 0;
+ for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
+ QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
}
/// RewriteSingleStoreAlloca - If there is only a single store to this value,
@@ -1040,10 +1110,9 @@ NextIteration:
/// made to the IR.
///
void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
- DominatorTree &DT, DominanceFrontier &DF,
- AliasSetTracker *AST) {
+ DominatorTree &DT, AliasSetTracker *AST) {
// If there is nothing to do, bail out...
if (Allocas.empty()) return;
- PromoteMem2Reg(Allocas, DT, DF, AST).run();
+ PromoteMem2Reg(Allocas, DT, AST).run();
}