aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakob Stoklund Olesen <stoklund@2pi.dk>2013-06-28 22:40:43 +0000
committerJakob Stoklund Olesen <stoklund@2pi.dk>2013-06-28 22:40:43 +0000
commit97be1d608e0fd8f0578342932b3b8116e5028a02 (patch)
tree5ece32ea44d40cf58ec988e29428dac2e81215e9
parent6a636a813f33b46b3271ec8517ee1936a0c92c9f (diff)
downloadexternal_llvm-97be1d608e0fd8f0578342932b3b8116e5028a02.zip
external_llvm-97be1d608e0fd8f0578342932b3b8116e5028a02.tar.gz
external_llvm-97be1d608e0fd8f0578342932b3b8116e5028a02.tar.bz2
Minimize precision loss when computing cyclic probabilities.
Allow block frequencies to exceed 32 bits by using the new BlockFrequency division function. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185236 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Analysis/BlockFrequencyImpl.h84
-rw-r--r--test/Analysis/BlockFrequencyInfo/basic.ll42
2 files changed, 91 insertions, 35 deletions
diff --git a/include/llvm/Analysis/BlockFrequencyImpl.h b/include/llvm/Analysis/BlockFrequencyImpl.h
index c4c1601..c9067a0 100644
--- a/include/llvm/Analysis/BlockFrequencyImpl.h
+++ b/include/llvm/Analysis/BlockFrequencyImpl.h
@@ -85,31 +85,16 @@ class BlockFrequencyImpl {
<< " --> " << Freqs[BB] << "\n");
}
- /// divBlockFreq - Divide BB block frequency by PROB. If Prob = 0 do nothing.
- ///
- void divBlockFreq(BlockT *BB, BranchProbability Prob) {
- uint64_t N = Prob.getNumerator();
- assert(N && "Illegal division by zero!");
- uint64_t D = Prob.getDenominator();
- uint64_t Freq = (Freqs[BB].getFrequency() * D) / N;
-
- // Should we assert it?
- if (Freq > UINT32_MAX)
- Freq = UINT32_MAX;
-
- Freqs[BB] = BlockFrequency(Freq);
- DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") /= (" << Prob
- << ") --> " << Freqs[BB] << "\n");
- }
-
// All blocks in postorder.
std::vector<BlockT *> POT;
// Map Block -> Position in reverse-postorder list.
DenseMap<BlockT *, unsigned> RPO;
- // Cycle Probability for each bloch.
- DenseMap<BlockT *, uint32_t> CycleProb;
+ // For each loop header, record the per-iteration probability of exiting the
+ // loop. This is the reciprocal of the expected number of loop iterations.
+ typedef DenseMap<BlockT*, BranchProbability> LoopExitProbMap;
+ LoopExitProbMap LoopExitProb;
// (reverse-)postorder traversal iterators.
typedef typename std::vector<BlockT *>::iterator pot_iterator;
@@ -203,10 +188,13 @@ class BlockFrequencyImpl {
if (!isLoopHead)
return;
- assert(EntryFreq >= CycleProb[BB]);
- uint32_t CProb = CycleProb[BB];
- uint32_t Numerator = EntryFreq - CProb ? EntryFreq - CProb : 1;
- divBlockFreq(BB, BranchProbability(Numerator, EntryFreq));
+ // This block is a loop header, so boost its frequency by the expected
+ // number of loop iterations. The loop blocks will be revisited so they all
+ // get this boost.
+ typename LoopExitProbMap::const_iterator I = LoopExitProb.find(BB);
+ assert(I != LoopExitProb.end() && "Loop header missing from table");
+ Freqs[BB] /= I->second;
+ DEBUG(dbgs() << "Loop header scaled to " << Freqs[BB] << ".\n");
}
/// doLoop - Propagate block frequency down through the loop.
@@ -226,24 +214,50 @@ class BlockFrequencyImpl {
}
// Compute loop's cyclic probability using backedges probabilities.
+ BlockFrequency BackFreq;
for (typename GT::ChildIteratorType
PI = GraphTraits< Inverse<BlockT *> >::child_begin(Head),
PE = GraphTraits< Inverse<BlockT *> >::child_end(Head);
PI != PE; ++PI) {
BlockT *Pred = *PI;
assert(Pred);
- if (isBackedge(Pred, Head)) {
- uint64_t N = getEdgeFreq(Pred, Head).getFrequency();
- uint64_t D = getBlockFreq(Head).getFrequency();
- assert(N <= EntryFreq && "Backedge frequency must be <= EntryFreq!");
- uint64_t Res = (N * EntryFreq) / D;
-
- assert(Res <= UINT32_MAX);
- CycleProb[Head] += (uint32_t) Res;
- DEBUG(dbgs() << " CycleProb[" << getBlockName(Head) << "] += " << Res
- << " --> " << CycleProb[Head] << "\n");
- }
+ if (isBackedge(Pred, Head))
+ BackFreq += getEdgeFreq(Pred, Head);
+ }
+
+ // The cyclic probability is freq(BackEdges) / freq(Head), where freq(Head)
+ // only counts edges entering the loop, not the loop backedges.
+ // The probability of leaving the loop on each iteration is:
+ //
+ // ExitProb = 1 - CyclicProb
+ //
+ // The Expected number of loop iterations is:
+ //
+ // Iterations = 1 / ExitProb
+ //
+ uint64_t D = std::max(getBlockFreq(Head).getFrequency(), 1ull);
+ uint64_t N = std::max(BackFreq.getFrequency(), 1ull);
+ if (N < D)
+ N = D - N;
+ else
+ // We'd expect N < D, but rounding and saturation means that can't be
+ // guaranteed.
+ N = 1;
+
+ // Now ExitProb = N / D, make sure it fits in an i32/i32 fraction.
+ assert(N <= D);
+ if (D > UINT32_MAX) {
+ unsigned Shift = 32 - countLeadingZeros(D);
+ D >>= Shift;
+ N >>= Shift;
+ if (N == 0)
+ N = 1;
}
+ BranchProbability LEP = BranchProbability(N, D);
+ LoopExitProb.insert(std::make_pair(Head, LEP));
+ DEBUG(dbgs() << "LoopExitProb[" << getBlockName(Head) << "] = " << LEP
+ << " from 1 - " << BackFreq << " / " << getBlockFreq(Head)
+ << ".\n");
}
friend class BlockFrequencyInfo;
@@ -258,7 +272,7 @@ class BlockFrequencyImpl {
// Clear everything.
RPO.clear();
POT.clear();
- CycleProb.clear();
+ LoopExitProb.clear();
Freqs.clear();
BlockT *EntryBlock = fn->begin();
diff --git a/test/Analysis/BlockFrequencyInfo/basic.ll b/test/Analysis/BlockFrequencyInfo/basic.ll
index 43dd264..ce29fb5 100644
--- a/test/Analysis/BlockFrequencyInfo/basic.ll
+++ b/test/Analysis/BlockFrequencyInfo/basic.ll
@@ -90,3 +90,45 @@ exit:
}
!1 = metadata !{metadata !"branch_weights", i32 4, i32 4, i32 64, i32 4, i32 4}
+
+; CHECK: Printing analysis {{.*}} for function 'nested_loops'
+; CHECK: entry = 1.0
+; This test doesn't seem to be assigning sensible frequencies to nested loops.
+define void @nested_loops(i32 %a) {
+entry:
+ br label %for.cond1.preheader
+
+for.cond1.preheader:
+ %x.024 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ]
+ br label %for.cond4.preheader
+
+for.cond4.preheader:
+ %y.023 = phi i32 [ 0, %for.cond1.preheader ], [ %inc9, %for.inc8 ]
+ %add = add i32 %y.023, %x.024
+ br label %for.body6
+
+for.body6:
+ %z.022 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
+ %add7 = add i32 %add, %z.022
+ tail call void @g(i32 %add7) #2
+ %inc = add i32 %z.022, 1
+ %cmp5 = icmp ugt i32 %inc, %a
+ br i1 %cmp5, label %for.inc8, label %for.body6, !prof !2
+
+for.inc8:
+ %inc9 = add i32 %y.023, 1
+ %cmp2 = icmp ugt i32 %inc9, %a
+ br i1 %cmp2, label %for.inc11, label %for.cond4.preheader, !prof !2
+
+for.inc11:
+ %inc12 = add i32 %x.024, 1
+ %cmp = icmp ugt i32 %inc12, %a
+ br i1 %cmp, label %for.end13, label %for.cond1.preheader, !prof !2
+
+for.end13:
+ ret void
+}
+
+declare void @g(i32) #1
+
+!2 = metadata !{metadata !"branch_weights", i32 1, i32 4000}