aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Analysis
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-02-11 20:01:10 -0800
committerStephen Hines <srhines@google.com>2014-02-11 20:01:10 -0800
commitce9904c6ea8fd669978a8eefb854b330eb9828ff (patch)
tree2418ee2e96ea220977c8fb74959192036ab5b133 /lib/Analysis
parentc27b10b198c1d9e9b51f2303994313ec2778edd7 (diff)
parentdbb832b83351cec97b025b61c26536ef50c3181c (diff)
downloadexternal_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.zip
external_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.tar.gz
external_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.tar.bz2
Merge remote-tracking branch 'upstream/release_34' into merge-20140211
Conflicts: lib/Linker/LinkModules.cpp lib/Support/Unix/Signals.inc Change-Id: Ia54f291fa5dc828052d2412736e8495c1282aa64
Diffstat (limited to 'lib/Analysis')
-rw-r--r--lib/Analysis/AliasSetTracker.cpp2
-rw-r--r--lib/Analysis/Analysis.cpp11
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp99
-rw-r--r--lib/Analysis/BlockFrequencyInfo.cpp113
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp22
-rw-r--r--lib/Analysis/CFG.cpp128
-rw-r--r--lib/Analysis/CMakeLists.txt11
-rw-r--r--lib/Analysis/CaptureTracking.cpp6
-rw-r--r--lib/Analysis/ConstantFolding.cpp228
-rw-r--r--lib/Analysis/CostModel.cpp271
-rw-r--r--lib/Analysis/Delinearization.cpp133
-rw-r--r--lib/Analysis/DependenceAnalysis.cpp76
-rw-r--r--lib/Analysis/IPA/CallGraph.cpp230
-rw-r--r--lib/Analysis/IPA/CallGraphSCCPass.cpp2
-rw-r--r--lib/Analysis/IPA/GlobalsModRef.cpp2
-rw-r--r--lib/Analysis/IPA/IPA.cpp3
-rw-r--r--lib/Analysis/IPA/InlineCost.cpp147
-rw-r--r--lib/Analysis/InstructionSimplify.cpp20
-rw-r--r--lib/Analysis/Lint.cpp42
-rw-r--r--lib/Analysis/LoopInfo.cpp34
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp97
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp11
-rw-r--r--lib/Analysis/PathNumbering.cpp521
-rw-r--r--lib/Analysis/PathProfileInfo.cpp433
-rw-r--r--lib/Analysis/PathProfileVerifier.cpp206
-rw-r--r--lib/Analysis/ProfileDataLoader.cpp155
-rw-r--r--lib/Analysis/ProfileDataLoaderPass.cpp188
-rw-r--r--lib/Analysis/ProfileEstimatorPass.cpp426
-rw-r--r--lib/Analysis/ProfileInfo.cpp1079
-rw-r--r--lib/Analysis/ProfileInfoLoader.cpp155
-rw-r--r--lib/Analysis/ProfileInfoLoaderPass.cpp267
-rw-r--r--lib/Analysis/ProfileVerifierPass.cpp383
-rw-r--r--lib/Analysis/RegionInfo.cpp6
-rw-r--r--lib/Analysis/ScalarEvolution.cpp1051
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp110
-rw-r--r--lib/Analysis/ScalarEvolutionNormalization.cpp18
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp46
-rw-r--r--lib/Analysis/TypeBasedAliasAnalysis.cpp116
-rw-r--r--lib/Analysis/ValueTracking.cpp43
39 files changed, 2243 insertions, 4648 deletions
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 5910526..2289c12 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -299,7 +299,6 @@ bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
bool AliasSetTracker::add(LoadInst *LI) {
if (LI->getOrdering() > Monotonic) return addUnknown(LI);
AliasSet::AccessType ATy = AliasSet::Refs;
- if (!LI->isUnordered()) ATy = AliasSet::ModRef;
bool NewPtr;
AliasSet &AS = addPointer(LI->getOperand(0),
AA.getTypeStoreSize(LI->getType()),
@@ -312,7 +311,6 @@ bool AliasSetTracker::add(LoadInst *LI) {
bool AliasSetTracker::add(StoreInst *SI) {
if (SI->getOrdering() > Monotonic) return addUnknown(SI);
AliasSet::AccessType ATy = AliasSet::Mods;
- if (!SI->isUnordered()) ATy = AliasSet::ModRef;
bool NewPtr;
Value *Val = SI->getOperand(0);
AliasSet &AS = addPointer(SI->getOperand(1),
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 349c417..98f2a55 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -34,6 +34,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeCFGOnlyViewerPass(Registry);
initializeCFGOnlyPrinterPass(Registry);
initializeDependenceAnalysisPass(Registry);
+ initializeDelinearizationPass(Registry);
initializeDominanceFrontierPass(Registry);
initializeDomViewerPass(Registry);
initializeDomPrinterPass(Registry);
@@ -54,16 +55,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeMemoryDependenceAnalysisPass(Registry);
initializeModuleDebugInfoPrinterPass(Registry);
initializePostDominatorTreePass(Registry);
- initializeProfileEstimatorPassPass(Registry);
- initializeNoProfileInfoPass(Registry);
- initializeNoPathProfileInfoPass(Registry);
- initializeProfileInfoAnalysisGroup(Registry);
- initializePathProfileInfoAnalysisGroup(Registry);
- initializeLoaderPassPass(Registry);
- initializePathProfileLoaderPassPass(Registry);
- initializeProfileVerifierPassPass(Registry);
- initializePathProfileVerifierPass(Registry);
- initializeProfileMetadataLoaderPassPass(Registry);
initializeRegionInfoPass(Registry);
initializeRegionViewerPass(Registry);
initializeRegionPrinterPass(Registry);
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 9fe1362..b2c2011 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -122,7 +122,7 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
// question (in this case rewind to p), or
// - just give up. It is up to caller to make sure the pointer is pointing
// to the base address the object.
- //
+ //
// We go for 2nd option for simplicity.
if (!isIdentifiedObject(V))
return false;
@@ -130,7 +130,7 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
// This function needs to use the aligned object size because we allow
// reads a bit past the end given sufficient alignment.
uint64_t ObjectSize = getObjectSize(V, TD, TLI, /*RoundToAlign*/true);
-
+
return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size;
}
@@ -163,7 +163,7 @@ namespace {
EK_SignExt,
EK_ZeroExt
};
-
+
struct VariableGEPIndex {
const Value *V;
ExtensionKind Extension;
@@ -200,7 +200,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
Offset = 0;
return V;
}
-
+
if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
switch (BOp->getOpcode()) {
@@ -231,7 +231,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
}
}
}
-
+
// Since GEP indices are sign extended anyway, we don't care about the high
// bits of a sign or zero extended value - just scales and offsets. The
// extensions have to be consistent though.
@@ -248,10 +248,10 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
TD, Depth+1);
Scale = Scale.zext(OldWidth);
Offset = Offset.zext(OldWidth);
-
+
return Result;
}
-
+
Scale = 1;
Offset = 0;
return V;
@@ -276,7 +276,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
const DataLayout *TD) {
// Limit recursion depth to limit compile time in crazy cases.
unsigned MaxLookup = 6;
-
+
BaseOffs = 0;
do {
// See if this is a bitcast or GEP.
@@ -291,7 +291,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
}
return V;
}
-
+
if (Op->getOpcode() == Instruction::BitCast) {
V = Op->getOperand(0);
continue;
@@ -308,15 +308,14 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
V = Simplified;
continue;
}
-
+
return V;
}
-
+
// Don't attempt to analyze GEPs over unsized objects.
- if (!cast<PointerType>(GEPOp->getOperand(0)->getType())
- ->getElementType()->isSized())
+ if (!GEPOp->getOperand(0)->getType()->getPointerElementType()->isSized())
return V;
-
+
// If we are lacking DataLayout information, we can't compute the offets of
// elements computed by GEPs. However, we can handle bitcast equivalent
// GEPs.
@@ -326,7 +325,8 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
V = GEPOp->getOperand(0);
continue;
}
-
+
+ unsigned AS = GEPOp->getPointerAddressSpace();
// Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
gep_type_iterator GTI = gep_type_begin(GEPOp);
for (User::const_op_iterator I = GEPOp->op_begin()+1,
@@ -337,38 +337,37 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// For a struct, add the member offset.
unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
if (FieldNo == 0) continue;
-
+
BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo);
continue;
}
-
+
// For an array/pointer, add the element offset, explicitly scaled.
if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
if (CIdx->isZero()) continue;
BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
continue;
}
-
+
uint64_t Scale = TD->getTypeAllocSize(*GTI);
ExtensionKind Extension = EK_NotExtended;
-
+
// If the integer type is smaller than the pointer size, it is implicitly
// sign extended to pointer size.
- unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth();
- if (TD->getPointerSizeInBits() > Width)
+ unsigned Width = Index->getType()->getIntegerBitWidth();
+ if (TD->getPointerSizeInBits(AS) > Width)
Extension = EK_SignExt;
-
+
// Use GetLinearExpression to decompose the index into a C1*V+C2 form.
APInt IndexScale(Width, 0), IndexOffset(Width, 0);
Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension,
*TD, 0);
-
+
// The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
// This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
BaseOffs += IndexOffset.getSExtValue()*Scale;
Scale *= IndexScale.getSExtValue();
-
-
+
// If we already had an occurrence of this index variable, merge this
// scale into it. For example, we want to handle:
// A[x][x] -> x*16 + x*4 -> x*20
@@ -381,25 +380,25 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
break;
}
}
-
+
// Make sure that we have a scale that makes sense for this target's
// pointer size.
- if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) {
+ if (unsigned ShiftBits = 64 - TD->getPointerSizeInBits(AS)) {
Scale <<= ShiftBits;
Scale = (int64_t)Scale >> ShiftBits;
}
-
+
if (Scale) {
VariableGEPIndex Entry = {Index, Extension,
static_cast<int64_t>(Scale)};
VarIndices.push_back(Entry);
}
}
-
+
// Analyze the base pointer next.
V = GEPOp->getOperand(0);
} while (--MaxLookup);
-
+
// If the chain of expressions is too deep, just return early.
return V;
}
@@ -407,7 +406,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
/// GetIndexDifference - Dest and Src are the variable indices from two
/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
-/// difference between the two pointers.
+/// difference between the two pointers.
static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
const SmallVectorImpl<VariableGEPIndex> &Src) {
if (Src.empty()) return;
@@ -416,12 +415,12 @@ static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
const Value *V = Src[i].V;
ExtensionKind Extension = Src[i].Extension;
int64_t Scale = Src[i].Scale;
-
+
// Find V in Dest. This is N^2, but pointer indices almost never have more
// than a few variable indexes.
for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
if (Dest[j].V != V || Dest[j].Extension != Extension) continue;
-
+
// If we found it, subtract off Scale V's from the entry in Dest. If it
// goes to zero, remove the entry.
if (Dest[j].Scale != Scale)
@@ -431,7 +430,7 @@ static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
Scale = 0;
break;
}
-
+
// If we didn't consume this entry, add it to the end of the Dest list.
if (Scale) {
VariableGEPIndex Entry = { V, Extension, -Scale };
@@ -526,7 +525,7 @@ namespace {
return (AliasAnalysis*)this;
return this;
}
-
+
private:
// AliasCache - Track alias queries to guard against recursion.
typedef std::pair<Location, Location> LocPair;
@@ -696,7 +695,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
"AliasAnalysis query involving multiple functions!");
const Value *Object = GetUnderlyingObject(Loc.Ptr, TD);
-
+
// If this is a tail call and Loc.Ptr points to a stack location, we know that
// the tail call cannot access or modify the local stack.
// We cannot exclude byval arguments here; these belong to the caller of
@@ -706,7 +705,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
if (CI->isTailCall())
return NoModRef;
-
+
// If the pointer is to a locally allocated object that does not escape,
// then the call can not mod/ref the pointer unless the call takes the pointer
// as an argument, and itself doesn't capture it.
@@ -722,7 +721,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
if (!(*CI)->getType()->isPointerTy() ||
(!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo)))
continue;
-
+
// If this is a no-capture pointer argument, see if we can tell that it
// is impossible to alias the pointer we're checking. If not, we have to
// assume that the call could touch the pointer, even though it doesn't
@@ -732,7 +731,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
break;
}
}
-
+
if (!PassedAsArg)
return NoModRef;
}
@@ -821,7 +820,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
}
// We can bound the aliasing properties of memset_pattern16 just as we can
- // for memcpy/memset. This is particularly important because the
+ // for memcpy/memset. This is particularly important because the
// LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
// whenever possible.
else if (TLI.has(LibFunc::memset_pattern16) &&
@@ -925,22 +924,22 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
GEP1VariableIndices.clear();
}
}
-
+
// If we get a No or May, then return it immediately, no amount of analysis
// will improve this situation.
if (BaseAlias != MustAlias) return BaseAlias;
-
+
// Otherwise, we have a MustAlias. Since the base pointers alias each other
// exactly, see if the computed offset from the common pointer tells us
// about the relation of the resulting pointer.
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
-
+
int64_t GEP2BaseOffset;
SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD);
-
+
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
@@ -948,12 +947,12 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
"DecomposeGEPExpression and GetUnderlyingObject disagree!");
return MayAlias;
}
-
+
// Subtract the GEP2 pointer from the GEP1 pointer to find out their
// symbolic difference.
GEP1BaseOffset -= GEP2BaseOffset;
GetIndexDifference(GEP1VariableIndices, GEP2VariableIndices);
-
+
} else {
// Check to see if these two pointers are related by the getelementptr
// instruction. If one pointer is a GEP with a non-zero index of the other
@@ -975,7 +974,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
-
+
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
if (GEP1BasePtr != UnderlyingV1) {
@@ -984,7 +983,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
return MayAlias;
}
}
-
+
// In the two GEP Case, if there is no difference in the offsets of the
// computed pointers, the resultant pointers are a must alias. This
// hapens when we have two lexically identical GEP's (for example).
@@ -1226,7 +1225,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
if ((isa<ConstantPointerNull>(O2) && isKnownNonNull(O1)) ||
(isa<ConstantPointerNull>(O1) && isKnownNonNull(O2)))
return NoAlias;
-
+
// If one pointer is the result of a call/invoke or load and the other is a
// non-escaping local object within the same function, then we know the
// object couldn't escape to a point where the call could return it.
@@ -1248,7 +1247,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD, *TLI)) ||
(V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD, *TLI)))
return NoAlias;
-
+
// Check the cache before climbing up use-def chains. This also terminates
// otherwise infinitely recursive queries.
LocPair Locs(Location(V1, V1Size, V1TBAAInfo),
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index 8469556..62f3ab1 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -1,4 +1,4 @@
-//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------=======//
+//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,14 +17,97 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
using namespace llvm;
-INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis",
- true, true)
+#ifndef NDEBUG
+enum GVDAGType {
+ GVDT_None,
+ GVDT_Fraction,
+ GVDT_Integer
+};
+
+static cl::opt<GVDAGType>
+ViewBlockFreqPropagationDAG("view-block-freq-propagation-dags", cl::Hidden,
+ cl::desc("Pop up a window to show a dag displaying how block "
+ "frequencies propagation through the CFG."),
+ cl::values(
+ clEnumValN(GVDT_None, "none",
+ "do not display graphs."),
+ clEnumValN(GVDT_Fraction, "fraction", "display a graph using the "
+ "fractional block frequency representation."),
+ clEnumValN(GVDT_Integer, "integer", "display a graph using the raw "
+ "integer fractional block frequency representation."),
+ clEnumValEnd));
+
+namespace llvm {
+
+template <>
+struct GraphTraits<BlockFrequencyInfo *> {
+ typedef const BasicBlock NodeType;
+ typedef succ_const_iterator ChildIteratorType;
+ typedef Function::const_iterator nodes_iterator;
+
+ static inline const NodeType *getEntryNode(const BlockFrequencyInfo *G) {
+ return G->getFunction()->begin();
+ }
+ static ChildIteratorType child_begin(const NodeType *N) {
+ return succ_begin(N);
+ }
+ static ChildIteratorType child_end(const NodeType *N) {
+ return succ_end(N);
+ }
+ static nodes_iterator nodes_begin(const BlockFrequencyInfo *G) {
+ return G->getFunction()->begin();
+ }
+ static nodes_iterator nodes_end(const BlockFrequencyInfo *G) {
+ return G->getFunction()->end();
+ }
+};
+
+template<>
+struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {
+ explicit DOTGraphTraits(bool isSimple=false) :
+ DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const BlockFrequencyInfo *G) {
+ return G->getFunction()->getName();
+ }
+
+ std::string getNodeLabel(const BasicBlock *Node,
+ const BlockFrequencyInfo *Graph) {
+ std::string Result;
+ raw_string_ostream OS(Result);
+
+ OS << Node->getName().str() << ":";
+ switch (ViewBlockFreqPropagationDAG) {
+ case GVDT_Fraction:
+ Graph->getBlockFreq(Node).print(OS);
+ break;
+ case GVDT_Integer:
+ OS << Graph->getBlockFreq(Node).getFrequency();
+ break;
+ case GVDT_None:
+ llvm_unreachable("If we are not supposed to render a graph we should "
+ "never reach this point.");
+ }
+
+ return Result;
+ }
+};
+
+} // end namespace llvm
+#endif
+
+INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq",
+ "Block Frequency Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
-INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis",
- true, true)
+INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq",
+ "Block Frequency Analysis", true, true)
char BlockFrequencyInfo::ID = 0;
@@ -46,6 +129,10 @@ void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
bool BlockFrequencyInfo::runOnFunction(Function &F) {
BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
BFI->doFunction(&F, &BPI);
+#ifndef NDEBUG
+ if (ViewBlockFreqPropagationDAG != GVDT_None)
+ view();
+#endif
return false;
}
@@ -56,3 +143,19 @@ void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const {
BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {
return BFI->getBlockFreq(BB);
}
+
+/// Pop up a ghostview window with the current block frequency propagation
+/// rendered using dot.
+void BlockFrequencyInfo::view() const {
+// This code is only for debugging.
+#ifndef NDEBUG
+ ViewGraph(const_cast<BlockFrequencyInfo *>(this), "BlockFrequencyDAGs");
+#else
+ errs() << "BlockFrequencyInfo::view is only available in debug builds on "
+ "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+const Function *BlockFrequencyInfo::getFunction() const {
+ return BFI->Fn;
+}
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 7cdf828..86560ca 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -398,10 +398,24 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) {
// InstCombine canonicalizes X <= 0 into X < 1.
// X <= 0 -> Unlikely
isProb = false;
- } else if (CV->isAllOnesValue() && CI->getPredicate() == CmpInst::ICMP_SGT) {
- // InstCombine canonicalizes X >= 0 into X > -1.
- // X >= 0 -> Likely
- isProb = true;
+ } else if (CV->isAllOnesValue()) {
+ switch (CI->getPredicate()) {
+ case CmpInst::ICMP_EQ:
+ // X == -1 -> Unlikely
+ isProb = false;
+ break;
+ case CmpInst::ICMP_NE:
+ // X != -1 -> Likely
+ isProb = true;
+ break;
+ case CmpInst::ICMP_SGT:
+ // InstCombine canonicalizes X >= 0 into X > -1.
+ // X >= 0 -> Likely
+ isProb = true;
+ break;
+ default:
+ return false;
+ }
} else {
return false;
}
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index a5ed21a..c3f32d3 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -116,7 +116,7 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
// LoopInfo contains a mapping from basic block to the innermost loop. Find
// the outermost loop in the loop nest that contains BB.
-static const Loop *getOutermostLoop(LoopInfo *LI, const BasicBlock *BB) {
+static const Loop *getOutermostLoop(const LoopInfo *LI, const BasicBlock *BB) {
const Loop *L = LI->getLoopFor(BB);
if (L) {
while (const Loop *Parent = L->getParentLoop())
@@ -126,60 +126,17 @@ static const Loop *getOutermostLoop(LoopInfo *LI, const BasicBlock *BB) {
}
// True if there is a loop which contains both BB1 and BB2.
-static bool loopContainsBoth(LoopInfo *LI,
+static bool loopContainsBoth(const LoopInfo *LI,
const BasicBlock *BB1, const BasicBlock *BB2) {
const Loop *L1 = getOutermostLoop(LI, BB1);
const Loop *L2 = getOutermostLoop(LI, BB2);
return L1 != NULL && L1 == L2;
}
-static bool isPotentiallyReachableSameBlock(const Instruction *A,
- const Instruction *B,
- LoopInfo *LI) {
- // The same block case is special because it's the only time we're looking
- // within a single block to see which comes first. Once we start looking at
- // multiple blocks, the first instruction of the block is reachable, so we
- // only need to determine reachability between whole blocks.
-
- const BasicBlock *BB = A->getParent();
- // If the block is in a loop then we can reach any instruction in the block
- // from any other instruction in the block by going around the backedge.
- // Check whether we're in a loop (or aren't sure).
-
- // Can't be in a loop if it's the entry block -- the entry block may not
- // have predecessors.
- bool HasLoop = BB != &BB->getParent()->getEntryBlock();
-
- // Can't be in a loop if LoopInfo doesn't know about it.
- if (LI && HasLoop) {
- HasLoop = LI->getLoopFor(BB) != 0;
- }
- if (HasLoop)
- return true;
-
- // Linear scan, start at 'A', see whether we hit 'B' or the end first.
- for (BasicBlock::const_iterator I = A, E = BB->end(); I != E; ++I) {
- if (&*I == B)
- return true;
- }
- return false;
-}
-
-bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
- DominatorTree *DT, LoopInfo *LI) {
- assert(A->getParent()->getParent() == B->getParent()->getParent() &&
- "This analysis is function-local!");
-
- const BasicBlock *StopBB = B->getParent();
-
- if (A->getParent() == B->getParent())
- return isPotentiallyReachableSameBlock(A, B, LI);
-
- if (A->getParent() == &A->getParent()->getParent()->getEntryBlock())
- return true;
- if (B->getParent() == &A->getParent()->getParent()->getEntryBlock())
- return false;
-
+static bool isPotentiallyReachableInner(SmallVectorImpl<BasicBlock *> &Worklist,
+ BasicBlock *StopBB,
+ const DominatorTree *DT,
+ const LoopInfo *LI) {
// When the stop block is unreachable, it's dominated from everywhere,
// regardless of whether there's a path between the two blocks.
if (DT && !DT->isReachableFromEntry(StopBB))
@@ -188,11 +145,7 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
// Limit the number of blocks we visit. The goal is to avoid run-away compile
// times on large CFGs without hampering sensible code. Arbitrarily chosen.
unsigned Limit = 32;
-
SmallSet<const BasicBlock*, 64> Visited;
- SmallVector<BasicBlock*, 32> Worklist;
- Worklist.push_back(const_cast<BasicBlock*>(A->getParent()));
-
do {
BasicBlock *BB = Worklist.pop_back_val();
if (!Visited.insert(BB))
@@ -221,7 +174,72 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
}
} while (!Worklist.empty());
- // We have exhaustived all possible paths and are certain that 'To' can not
- // be reached from 'From'.
+ // We have exhausted all possible paths and are certain that 'To' can not be
+ // reached from 'From'.
return false;
}
+
+bool llvm::isPotentiallyReachable(const BasicBlock *A, const BasicBlock *B,
+ const DominatorTree *DT, const LoopInfo *LI) {
+ assert(A->getParent() == B->getParent() &&
+ "This analysis is function-local!");
+
+ SmallVector<BasicBlock*, 32> Worklist;
+ Worklist.push_back(const_cast<BasicBlock*>(A));
+
+ return isPotentiallyReachableInner(Worklist, const_cast<BasicBlock*>(B),
+ DT, LI);
+}
+
+bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
+ const DominatorTree *DT, const LoopInfo *LI) {
+ assert(A->getParent()->getParent() == B->getParent()->getParent() &&
+ "This analysis is function-local!");
+
+ SmallVector<BasicBlock*, 32> Worklist;
+
+ if (A->getParent() == B->getParent()) {
+ // The same block case is special because it's the only time we're looking
+ // within a single block to see which instruction comes first. Once we
+ // start looking at multiple blocks, the first instruction of the block is
+ // reachable, so we only need to determine reachability between whole
+ // blocks.
+ BasicBlock *BB = const_cast<BasicBlock *>(A->getParent());
+
+ // If the block is in a loop then we can reach any instruction in the block
+ // from any other instruction in the block by going around a backedge.
+ if (LI && LI->getLoopFor(BB) != 0)
+ return true;
+
+ // Linear scan, start at 'A', see whether we hit 'B' or the end first.
+ for (BasicBlock::const_iterator I = A, E = BB->end(); I != E; ++I) {
+ if (&*I == B)
+ return true;
+ }
+
+ // Can't be in a loop if it's the entry block -- the entry block may not
+ // have predecessors.
+ if (BB == &BB->getParent()->getEntryBlock())
+ return false;
+
+ // Otherwise, continue doing the normal per-BB CFG walk.
+ for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
+ Worklist.push_back(*I);
+
+ if (Worklist.empty()) {
+ // We've proven that there's no path!
+ return false;
+ }
+ } else {
+ Worklist.push_back(const_cast<BasicBlock*>(A->getParent()));
+ }
+
+ if (A->getParent() == &A->getParent()->getParent()->getEntryBlock())
+ return true;
+ if (B->getParent() == &A->getParent()->getParent()->getEntryBlock())
+ return false;
+
+ return isPotentiallyReachableInner(Worklist,
+ const_cast<BasicBlock*>(B->getParent()),
+ DT, LI);
+}
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 94ded34..3624aac 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_library(LLVMAnalysis
CostModel.cpp
CodeMetrics.cpp
ConstantFolding.cpp
+ Delinearization.cpp
DependenceAnalysis.cpp
DomPrinter.cpp
DominanceFrontier.cpp
@@ -35,17 +36,7 @@ add_llvm_library(LLVMAnalysis
ModuleDebugInfoPrinter.cpp
NoAliasAnalysis.cpp
PHITransAddr.cpp
- PathNumbering.cpp
- PathProfileInfo.cpp
- PathProfileVerifier.cpp
PostDominators.cpp
- ProfileEstimatorPass.cpp
- ProfileInfo.cpp
- ProfileInfoLoader.cpp
- ProfileInfoLoaderPass.cpp
- ProfileVerifierPass.cpp
- ProfileDataLoader.cpp
- ProfileDataLoaderPass.cpp
PtrUseVisitor.cpp
RegionInfo.cpp
RegionPass.cpp
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 9eb76a8..79fab1b 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -146,8 +146,14 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
case Instruction::PHI:
case Instruction::Select:
// The original value is not captured via this if the new value isn't.
+ Count = 0;
for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end();
UI != UE; ++UI) {
+ // If there are lots of uses, conservatively say that the value
+ // is captured to avoid taking too much compile time.
+ if (Count++ >= Threshold)
+ return Tracker->tooManyUses();
+
Use *U = &UI.getUse();
if (Visited.insert(U))
if (Tracker->shouldExplore(U))
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index bc0dffc..3d32232 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -224,7 +224,8 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
APInt &Offset, const DataLayout &TD) {
// Trivial case, constant is the global.
if ((GV = dyn_cast<GlobalValue>(C))) {
- Offset.clearAllBits();
+ unsigned BitWidth = TD.getPointerTypeSizeInBits(GV->getType());
+ Offset = APInt(BitWidth, 0);
return true;
}
@@ -238,16 +239,23 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD);
// i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(CE)) {
- // If the base isn't a global+constant, we aren't either.
- if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD))
- return false;
+ GEPOperator *GEP = dyn_cast<GEPOperator>(CE);
+ if (!GEP)
+ return false;
- // Otherwise, add any offset that our operands provide.
- return GEP->accumulateConstantOffset(TD, Offset);
- }
+ unsigned BitWidth = TD.getPointerTypeSizeInBits(GEP->getType());
+ APInt TmpOffset(BitWidth, 0);
- return false;
+ // If the base isn't a global+constant, we aren't either.
+ if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, TD))
+ return false;
+
+ // Otherwise, add any offset that our operands provide.
+ if (!GEP->accumulateConstantOffset(TD, TmpOffset))
+ return false;
+
+ Offset = TmpOffset;
+ return true;
}
/// ReadDataFromGlobal - Recursive helper to read bits out of global. C is the
@@ -324,12 +332,12 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
// If we read all of the bytes we needed from this element we're done.
uint64_t NextEltOffset = SL->getElementOffset(Index);
- if (BytesLeft <= NextEltOffset-CurEltOffset-ByteOffset)
+ if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset)
return true;
// Move to the next element of the struct.
- CurPtr += NextEltOffset-CurEltOffset-ByteOffset;
- BytesLeft -= NextEltOffset-CurEltOffset-ByteOffset;
+ CurPtr += NextEltOffset - CurEltOffset - ByteOffset;
+ BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset;
ByteOffset = 0;
CurEltOffset = NextEltOffset;
}
@@ -338,7 +346,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||
isa<ConstantDataSequential>(C)) {
- Type *EltTy = cast<SequentialType>(C->getType())->getElementType();
+ Type *EltTy = C->getType()->getSequentialElementType();
uint64_t EltSize = TD.getTypeAllocSize(EltTy);
uint64_t Index = ByteOffset / EltSize;
uint64_t Offset = ByteOffset - Index * EltSize;
@@ -346,7 +354,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
if (ArrayType *AT = dyn_cast<ArrayType>(C->getType()))
NumElts = AT->getNumElements();
else
- NumElts = cast<VectorType>(C->getType())->getNumElements();
+ NumElts = C->getType()->getVectorNumElements();
for (; Index != NumElts; ++Index) {
if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,
@@ -367,9 +375,10 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
if (CE->getOpcode() == Instruction::IntToPtr &&
- CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext()))
+ CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getType())) {
return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
BytesLeft, TD);
+ }
}
// Otherwise, unknown initializer type.
@@ -378,26 +387,29 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
const DataLayout &TD) {
- Type *LoadTy = cast<PointerType>(C->getType())->getElementType();
+ PointerType *PTy = cast<PointerType>(C->getType());
+ Type *LoadTy = PTy->getElementType();
IntegerType *IntType = dyn_cast<IntegerType>(LoadTy);
// If this isn't an integer load we can't fold it directly.
if (!IntType) {
+ unsigned AS = PTy->getAddressSpace();
+
// If this is a float/double load, we can try folding it as an int32/64 load
// and then bitcast the result. This can be useful for union cases. Note
// that address spaces don't matter here since we're not going to result in
// an actual new load.
Type *MapTy;
if (LoadTy->isHalfTy())
- MapTy = Type::getInt16PtrTy(C->getContext());
+ MapTy = Type::getInt16PtrTy(C->getContext(), AS);
else if (LoadTy->isFloatTy())
- MapTy = Type::getInt32PtrTy(C->getContext());
+ MapTy = Type::getInt32PtrTy(C->getContext(), AS);
else if (LoadTy->isDoubleTy())
- MapTy = Type::getInt64PtrTy(C->getContext());
+ MapTy = Type::getInt64PtrTy(C->getContext(), AS);
else if (LoadTy->isVectorTy()) {
- MapTy = IntegerType::get(C->getContext(),
- TD.getTypeAllocSizeInBits(LoadTy));
- MapTy = PointerType::getUnqual(MapTy);
+ MapTy = PointerType::getIntNPtrTy(C->getContext(),
+ TD.getTypeAllocSizeInBits(LoadTy),
+ AS);
} else
return 0;
@@ -408,10 +420,11 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
}
unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;
- if (BytesLoaded > 32 || BytesLoaded == 0) return 0;
+ if (BytesLoaded > 32 || BytesLoaded == 0)
+ return 0;
GlobalValue *GVal;
- APInt Offset(TD.getPointerSizeInBits(), 0);
+ APInt Offset;
if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD))
return 0;
@@ -422,7 +435,8 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
// If we're loading off the beginning of the global, some bytes may be valid,
// but we don't try to handle this.
- if (Offset.isNegative()) return 0;
+ if (Offset.isNegative())
+ return 0;
// If we're not accessing anything in this constant, the result is undefined.
if (Offset.getZExtValue() >=
@@ -439,7 +453,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
ResultVal = RawBytes[BytesLoaded - 1];
for (unsigned i = 1; i != BytesLoaded; ++i) {
ResultVal <<= 8;
- ResultVal |= RawBytes[BytesLoaded-1-i];
+ ResultVal |= RawBytes[BytesLoaded - 1 - i];
}
} else {
ResultVal = RawBytes[0];
@@ -464,14 +478,17 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
// If the loaded value isn't a constant expr, we can't handle it.
ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
- if (!CE) return 0;
+ if (!CE)
+ return 0;
if (CE->getOpcode() == Instruction::GetElementPtr) {
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
- if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) {
+ if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
if (Constant *V =
ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE))
return V;
+ }
+ }
}
// Instead of loading constant c string, use corresponding integer value
@@ -576,13 +593,13 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
// constant. This happens frequently when iterating over a global array.
if (Opc == Instruction::Sub && DL) {
GlobalValue *GV1, *GV2;
- unsigned PtrSize = DL->getPointerSizeInBits();
- unsigned OpSize = DL->getTypeSizeInBits(Op0->getType());
- APInt Offs1(PtrSize, 0), Offs2(PtrSize, 0);
+ APInt Offs1, Offs2;
if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *DL))
if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *DL) &&
GV1 == GV2) {
+ unsigned OpSize = DL->getTypeSizeInBits(Op0->getType());
+
// (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
// PtrToInt may change the bitwidth so we have convert to the right size
// first.
@@ -600,15 +617,18 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
Type *ResultTy, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
- if (!TD) return 0;
- Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext());
+ if (!TD)
+ return 0;
+
+ Type *IntPtrTy = TD->getIntPtrType(ResultTy);
bool Any = false;
SmallVector<Constant*, 32> NewIdxs;
for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
if ((i == 1 ||
- !isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(),
- Ops.slice(1, i-1)))) &&
+ !isa<StructType>(GetElementPtrInst::getIndexedType(
+ Ops[0]->getType(),
+ Ops.slice(1, i - 1)))) &&
Ops[i]->getType() != IntPtrTy) {
Any = true;
NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
@@ -619,13 +639,16 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
} else
NewIdxs.push_back(Ops[i]);
}
- if (!Any) return 0;
- Constant *C =
- ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (!Any)
+ return 0;
+
+ Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
+ }
+
return C;
}
@@ -640,7 +663,7 @@ static Constant* StripPtrCastKeepAS(Constant* Ptr) {
if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace()) {
NewPtrTy = NewPtrTy->getElementType()->getPointerTo(
OldPtrTy->getAddressSpace());
- Ptr = ConstantExpr::getBitCast(Ptr, NewPtrTy);
+ Ptr = ConstantExpr::getPointerCast(Ptr, NewPtrTy);
}
return Ptr;
}
@@ -651,11 +674,12 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
Type *ResultTy, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
Constant *Ptr = Ops[0];
- if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized() ||
+ if (!TD || !Ptr->getType()->getPointerElementType()->isSized() ||
!Ptr->getType()->isPointerTy())
return 0;
- Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(Ptr->getType());
+ Type *ResultElementTy = ResultTy->getPointerElementType();
// If this is a constant expr gep that is effectively computing an
// "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
@@ -664,8 +688,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// If this is "gep i8* Ptr, (sub 0, V)", fold this as:
// "inttoptr (sub (ptrtoint Ptr), V)"
- if (Ops.size() == 2 &&
- cast<PointerType>(ResultTy)->getElementType()->isIntegerTy(8)) {
+ if (Ops.size() == 2 && ResultElementTy->isIntegerTy(8)) {
ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]);
assert((CE == 0 || CE->getType() == IntPtrTy) &&
"CastGEPIndices didn't canonicalize index types!");
@@ -692,7 +715,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// If this is a GEP of a GEP, fold it all into a single GEP.
while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
- SmallVector<Value *, 4> NestedOps(GEP->op_begin()+1, GEP->op_end());
+ SmallVector<Value *, 4> NestedOps(GEP->op_begin() + 1, GEP->op_end());
// Do not try the incorporate the sub-GEP if some index is not a number.
bool AllConstantInt = true;
@@ -713,12 +736,15 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// If the base value for this address is a literal integer value, fold the
// getelementptr to the resulting integer value casted to the pointer type.
APInt BasePtr(BitWidth, 0);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
- if (CE->getOpcode() == Instruction::IntToPtr)
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+ if (CE->getOpcode() == Instruction::IntToPtr) {
if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
BasePtr = Base->getValue().zextOrTrunc(BitWidth);
+ }
+ }
+
if (Ptr->isNullValue() || BasePtr != 0) {
- Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr);
+ Constant *C = ConstantInt::get(Ptr->getContext(), Offset + BasePtr);
return ConstantExpr::getIntToPtr(C, ResultTy);
}
@@ -728,7 +754,8 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// Also, this helps GlobalOpt do SROA on GlobalVariables.
Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Forming regular GEP of non-pointer type");
- SmallVector<Constant*, 32> NewIdxs;
+ SmallVector<Constant *, 32> NewIdxs;
+
do {
if (SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
if (ATy->isPointerTy()) {
@@ -743,7 +770,6 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// Determine which element of the array the offset points into.
APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
- IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext());
if (ElemSize == 0)
// The element size is 0. This may be [0 x Ty]*, so just use a zero
// index for this level and proceed to the next level to see if it can
@@ -778,7 +804,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// We've reached some non-indexable type.
break;
}
- } while (Ty != cast<PointerType>(ResultTy)->getElementType());
+ } while (Ty != ResultElementTy);
// If we haven't used up the entire offset by descending the static
// type, then the offset is pointing into the middle of an indivisible
@@ -787,14 +813,13 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
return 0;
// Create a GEP.
- Constant *C =
- ConstantExpr::getGetElementPtr(Ptr, NewIdxs);
- assert(cast<PointerType>(C->getType())->getElementType() == Ty &&
+ Constant *C = ConstantExpr::getGetElementPtr(Ptr, NewIdxs);
+ assert(C->getType()->getPointerElementType() == Ty &&
"Computed GetElementPtr has unexpected type!");
// If we ended up indexing a member with a type that doesn't match
// the type of what the original indices indexed, add a cast.
- if (Ty != cast<PointerType>(ResultTy)->getElementType())
+ if (Ty != ResultElementTy)
C = FoldBitCast(C, ResultTy, *TD);
return C;
@@ -867,16 +892,18 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
if (const LoadInst *LI = dyn_cast<LoadInst>(I))
return ConstantFoldLoadInst(LI, TD);
- if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I))
+ if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I)) {
return ConstantExpr::getInsertValue(
cast<Constant>(IVI->getAggregateOperand()),
cast<Constant>(IVI->getInsertedValueOperand()),
IVI->getIndices());
+ }
- if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I))
+ if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I)) {
return ConstantExpr::getExtractValue(
cast<Constant>(EVI->getAggregateOperand()),
EVI->getIndices());
+ }
return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI);
}
@@ -930,9 +957,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
const TargetLibraryInfo *TLI) {
// Handle easy binops first.
if (Instruction::isBinaryOp(Opcode)) {
- if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1]))
+ if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) {
if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD))
return C;
+ }
return ConstantExpr::get(Opcode, Ops[0], Ops[1]);
}
@@ -953,10 +981,11 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
if (TD && CE->getOpcode() == Instruction::IntToPtr) {
Constant *Input = CE->getOperand(0);
unsigned InWidth = Input->getType()->getScalarSizeInBits();
- if (TD->getPointerSizeInBits() < InWidth) {
+ unsigned PtrWidth = TD->getPointerTypeSizeInBits(CE->getType());
+ if (PtrWidth < InWidth) {
Constant *Mask =
- ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth,
- TD->getPointerSizeInBits()));
+ ConstantInt::get(CE->getContext(),
+ APInt::getLowBitsSet(InWidth, PtrWidth));
Input = ConstantExpr::getAnd(Input, Mask);
}
// Do a zext or trunc to get to the dest size.
@@ -966,13 +995,22 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
case Instruction::IntToPtr:
// If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
- // the int size is >= the ptr size. This requires knowing the width of a
- // pointer, so it can't be done in ConstantExpr::getCast.
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
- if (TD &&
- TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() &&
- CE->getOpcode() == Instruction::PtrToInt)
- return FoldBitCast(CE->getOperand(0), DestTy, *TD);
+ // the int size is >= the ptr size and the address spaces are the same.
+ // This requires knowing the width of a pointer, so it can't be done in
+ // ConstantExpr::getCast.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
+ if (TD && CE->getOpcode() == Instruction::PtrToInt) {
+ Constant *SrcPtr = CE->getOperand(0);
+ unsigned SrcPtrSize = TD->getPointerTypeSizeInBits(SrcPtr->getType());
+ unsigned MidIntSize = CE->getType()->getScalarSizeInBits();
+
+ if (MidIntSize >= SrcPtrSize) {
+ unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace();
+ if (SrcAS == DestTy->getPointerAddressSpace())
+ return FoldBitCast(CE->getOperand(0), DestTy, *TD);
+ }
+ }
+ }
return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
case Instruction::Trunc:
@@ -984,6 +1022,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
case Instruction::SIToFP:
case Instruction::FPToUI:
case Instruction::FPToSI:
+ case Instruction::AddrSpaceCast:
return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
case Instruction::BitCast:
if (TD)
@@ -1024,8 +1063,8 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
// around to know if bit truncation is happening.
if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
if (TD && Ops1->isNullValue()) {
- Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
if (CE0->getOpcode() == Instruction::IntToPtr) {
+ Type *IntPtrTy = TD->getIntPtrType(CE0->getType());
// Convert the integer value to the right size to ensure we get the
// proper extension or truncation.
Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
@@ -1036,19 +1075,21 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
// Only do this transformation if the int is intptrty in size, otherwise
// there is a truncation or extension that we aren't modeling.
- if (CE0->getOpcode() == Instruction::PtrToInt &&
- CE0->getType() == IntPtrTy) {
- Constant *C = CE0->getOperand(0);
- Constant *Null = Constant::getNullValue(C->getType());
- return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
+ if (CE0->getOpcode() == Instruction::PtrToInt) {
+ Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType());
+ if (CE0->getType() == IntPtrTy) {
+ Constant *C = CE0->getOperand(0);
+ Constant *Null = Constant::getNullValue(C->getType());
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
+ }
}
}
if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
if (TD && CE0->getOpcode() == CE1->getOpcode()) {
- Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
-
if (CE0->getOpcode() == Instruction::IntToPtr) {
+ Type *IntPtrTy = TD->getIntPtrType(CE0->getType());
+
// Convert the integer value to the right size to ensure we get the
// proper extension or truncation.
Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0),
@@ -1060,11 +1101,17 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
// Only do this transformation if the int is intptrty in size, otherwise
// there is a truncation or extension that we aren't modeling.
- if ((CE0->getOpcode() == Instruction::PtrToInt &&
- CE0->getType() == IntPtrTy &&
- CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()))
- return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0),
- CE1->getOperand(0), TD, TLI);
+ if (CE0->getOpcode() == Instruction::PtrToInt) {
+ Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType());
+ if (CE0->getType() == IntPtrTy &&
+ CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) {
+ return ConstantFoldCompareInstOperands(Predicate,
+ CE0->getOperand(0),
+ CE1->getOperand(0),
+ TD,
+ TLI);
+ }
+ }
}
}
@@ -1101,7 +1148,8 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
// addressing.
for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) {
C = C->getAggregateElement(CE->getOperand(i));
- if (C == 0) return 0;
+ if (C == 0)
+ return 0;
}
return C;
}
@@ -1116,7 +1164,8 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
// addressing.
for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
C = C->getAggregateElement(Indices[i]);
- if (C == 0) return 0;
+ if (C == 0)
+ return 0;
}
return C;
}
@@ -1128,8 +1177,7 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
/// canConstantFoldCallTo - Return true if its even possible to fold a call to
/// the specified function.
-bool
-llvm::canConstantFoldCallTo(const Function *F) {
+bool llvm::canConstantFoldCallTo(const Function *F) {
switch (F->getIntrinsicID()) {
case Intrinsic::fabs:
case Intrinsic::log:
@@ -1167,7 +1215,8 @@ llvm::canConstantFoldCallTo(const Function *F) {
case 0: break;
}
- if (!F->hasName()) return false;
+ if (!F->hasName())
+ return false;
StringRef Name = F->getName();
// In these cases, the check of the length is required. We don't want to
@@ -1250,7 +1299,7 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
static Constant *ConstantFoldConvertToInt(const APFloat &Val,
bool roundTowardZero, Type *Ty) {
// All of these conversion intrinsics form an integer of at most 64bits.
- unsigned ResultWidth = cast<IntegerType>(Ty)->getBitWidth();
+ unsigned ResultWidth = Ty->getIntegerBitWidth();
assert(ResultWidth <= 64 &&
"Can only constant fold conversions to 64 and 32 bit ints");
@@ -1271,7 +1320,8 @@ static Constant *ConstantFoldConvertToInt(const APFloat &Val,
Constant *
llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
const TargetLibraryInfo *TLI) {
- if (!F->hasName()) return 0;
+ if (!F->hasName())
+ return 0;
StringRef Name = F->getName();
Type *Ty = F->getReturnType();
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
index 927508e..f943258 100644
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -19,6 +19,7 @@
#define CM_NAME "cost-model"
#define DEBUG_TYPE CM_NAME
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
@@ -26,10 +27,15 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
+ cl::Hidden,
+ cl::desc("Recognize reduction patterns."));
+
namespace {
class CostModelAnalysis : public FunctionPass {
@@ -105,6 +111,260 @@ static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
return OpInfo;
}
+static bool matchMask(SmallVectorImpl<int> &M1, SmallVectorImpl<int> &M2) {
+ if (M1.size() != M2.size())
+ return false;
+
+ for (unsigned i = 0, e = M1.size(); i != e; ++i)
+ if (M1[i] != M2[i])
+ return false;
+
+ return true;
+}
+
+static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
+ unsigned Level) {
+ // We don't need a shuffle if we just want to have element 0 in position 0 of
+ // the vector.
+ if (!SI && Level == 0 && IsLeft)
+ return true;
+ else if (!SI)
+ return false;
+
+ SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1);
+
+ // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
+ // we look at the left or right side.
+ for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
+ Mask[i] = val;
+
+ SmallVector<int, 16> ActualMask = SI->getShuffleMask();
+ if (!matchMask(Mask, ActualMask))
+ return false;
+
+ return true;
+}
+
+static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp,
+ unsigned Level, unsigned NumLevels) {
+ // Match one level of pairwise operations.
+ // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
+ // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
+ // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
+ // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+ // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+ if (BinOp == 0)
+ return false;
+
+ assert(BinOp->getType()->isVectorTy() && "Expecting a vector type");
+
+ unsigned Opcode = BinOp->getOpcode();
+ Value *L = BinOp->getOperand(0);
+ Value *R = BinOp->getOperand(1);
+
+ ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L);
+ if (!LS && Level)
+ return false;
+ ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R);
+ if (!RS && Level)
+ return false;
+
+ // On level 0 we can omit one shufflevector instruction.
+ if (!Level && !RS && !LS)
+ return false;
+
+ // Shuffle inputs must match.
+ Value *NextLevelOpL = LS ? LS->getOperand(0) : 0;
+ Value *NextLevelOpR = RS ? RS->getOperand(0) : 0;
+ Value *NextLevelOp = 0;
+ if (NextLevelOpR && NextLevelOpL) {
+ // If we have two shuffles their operands must match.
+ if (NextLevelOpL != NextLevelOpR)
+ return false;
+
+ NextLevelOp = NextLevelOpL;
+ } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
+ // On the first level we can omit the shufflevector <0, undef,...>. So the
+ // input to the other shufflevector <1, undef> must match with one of the
+ // inputs to the current binary operation.
+ // Example:
+ // %NextLevelOpL = shufflevector %R, <1, undef ...>
+ // %BinOp = fadd %NextLevelOpL, %R
+ if (NextLevelOpL && NextLevelOpL != R)
+ return false;
+ else if (NextLevelOpR && NextLevelOpR != L)
+ return false;
+
+ NextLevelOp = NextLevelOpL ? R : L;
+ } else
+ return false;
+
+ // Check that the next levels binary operation exists and matches with the
+ // current one.
+ BinaryOperator *NextLevelBinOp = 0;
+ if (Level + 1 != NumLevels) {
+ if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp)))
+ return false;
+ else if (NextLevelBinOp->getOpcode() != Opcode)
+ return false;
+ }
+
+ // Shuffle mask for pairwise operation must match.
+ if (matchPairwiseShuffleMask(LS, true, Level)) {
+ if (!matchPairwiseShuffleMask(RS, false, Level))
+ return false;
+ } else if (matchPairwiseShuffleMask(RS, true, Level)) {
+ if (!matchPairwiseShuffleMask(LS, false, Level))
+ return false;
+ } else
+ return false;
+
+ if (++Level == NumLevels)
+ return true;
+
+ // Match next level.
+ return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels);
+}
+
+static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
+ unsigned &Opcode, Type *&Ty) {
+ if (!EnableReduxCost)
+ return false;
+
+ // Need to extract the first element.
+ ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
+ unsigned Idx = ~0u;
+ if (CI)
+ Idx = CI->getZExtValue();
+ if (Idx != 0)
+ return false;
+
+ BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
+ if (!RdxStart)
+ return false;
+
+ Type *VecTy = ReduxRoot->getOperand(0)->getType();
+ unsigned NumVecElems = VecTy->getVectorNumElements();
+ if (!isPowerOf2_32(NumVecElems))
+ return false;
+
+ // We look for a sequence of shuffle,shuffle,add triples like the following
+ // that builds a pairwise reduction tree.
+ //
+ // (X0, X1, X2, X3)
+ // (X0 + X1, X2 + X3, undef, undef)
+ // ((X0 + X1) + (X2 + X3), undef, undef, undef)
+ //
+ // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
+ // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
+ // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
+ // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+ // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+ // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+ // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+ // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+ // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
+ // %r = extractelement <4 x float> %bin.rdx8, i32 0
+ if (!matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)))
+ return false;
+
+ Opcode = RdxStart->getOpcode();
+ Ty = VecTy;
+
+ return true;
+}
+
+static std::pair<Value *, ShuffleVectorInst *>
+getShuffleAndOtherOprd(BinaryOperator *B) {
+
+ Value *L = B->getOperand(0);
+ Value *R = B->getOperand(1);
+ ShuffleVectorInst *S = 0;
+
+ if ((S = dyn_cast<ShuffleVectorInst>(L)))
+ return std::make_pair(R, S);
+
+ S = dyn_cast<ShuffleVectorInst>(R);
+ return std::make_pair(L, S);
+}
+
+static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
+ unsigned &Opcode, Type *&Ty) {
+ if (!EnableReduxCost)
+ return false;
+
+ // Need to extract the first element.
+ ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
+ unsigned Idx = ~0u;
+ if (CI)
+ Idx = CI->getZExtValue();
+ if (Idx != 0)
+ return false;
+
+ BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
+ if (!RdxStart)
+ return false;
+ unsigned RdxOpcode = RdxStart->getOpcode();
+
+ Type *VecTy = ReduxRoot->getOperand(0)->getType();
+ unsigned NumVecElems = VecTy->getVectorNumElements();
+ if (!isPowerOf2_32(NumVecElems))
+ return false;
+
+ // We look for a sequence of shuffles and adds like the following matching one
+ // fadd, shuffle vector pair at a time.
+ //
+ // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
+ // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+ // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
+ // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
+ // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
+ // %r = extractelement <4 x float> %bin.rdx8, i32 0
+
+ unsigned MaskStart = 1;
+ Value *RdxOp = RdxStart;
+ SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
+ unsigned NumVecElemsRemain = NumVecElems;
+ while (NumVecElemsRemain - 1) {
+ // Check for the right reduction operation.
+ BinaryOperator *BinOp;
+ if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp)))
+ return false;
+ if (BinOp->getOpcode() != RdxOpcode)
+ return false;
+
+ Value *NextRdxOp;
+ ShuffleVectorInst *Shuffle;
+ tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp);
+
+ // Check the current reduction operation and the shuffle use the same value.
+ if (Shuffle == 0)
+ return false;
+ if (Shuffle->getOperand(0) != NextRdxOp)
+ return false;
+
+ // Check that shuffle masks matches.
+ for (unsigned j = 0; j != MaskStart; ++j)
+ ShuffleMask[j] = MaskStart + j;
+ // Fill the rest of the mask with -1 for undef.
+ std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
+
+ SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
+ if (!matchMask(ShuffleMask, Mask))
+ return false;
+
+ RdxOp = NextRdxOp;
+ NumVecElemsRemain /= 2;
+ MaskStart *= 2;
+ }
+
+ Opcode = RdxOpcode;
+ Ty = VecTy;
+ return true;
+}
+
unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
if (!TTI)
return -1;
@@ -189,6 +449,17 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
unsigned Idx = -1;
if (CI)
Idx = CI->getZExtValue();
+
+ // Try to match a reduction sequence (series of shufflevector and vector
+ // adds followed by a extractelement).
+ unsigned ReduxOpCode;
+ Type *ReduxType;
+
+ if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType))
+ return TTI->getReductionCost(ReduxOpCode, ReduxType, false);
+ else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType))
+ return TTI->getReductionCost(ReduxOpCode, ReduxType, true);
+
return TTI->getVectorInstrCost(I->getOpcode(),
EEI->getOperand(0)->getType(), Idx);
}
diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp
new file mode 100644
index 0000000..3ed0609
--- /dev/null
+++ b/lib/Analysis/Delinearization.cpp
@@ -0,0 +1,133 @@
+//===---- Delinearization.cpp - MultiDimensional Index Delinearization ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements an analysis pass that tries to delinearize all GEP
+// instructions in all loops using the SCEV analysis functionality. This pass is
+// only used for testing purposes: if your pass needs delinearization, please
+// use the on-demand SCEVAddRecExpr::delinearize() function.
+//
+//===----------------------------------------------------------------------===//
+
+#define DL_NAME "delinearize"
+#define DEBUG_TYPE DL_NAME
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+class Delinearization : public FunctionPass {
+ Delinearization(const Delinearization &); // do not implement
+protected:
+ Function *F;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ Delinearization() : FunctionPass(ID) {
+ initializeDelinearizationPass(*PassRegistry::getPassRegistry());
+ }
+ virtual bool runOnFunction(Function &F);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual void print(raw_ostream &O, const Module *M = 0) const;
+};
+
+} // end anonymous namespace
+
+void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<ScalarEvolution>();
+}
+
+bool Delinearization::runOnFunction(Function &F) {
+ this->F = &F;
+ SE = &getAnalysis<ScalarEvolution>();
+ LI = &getAnalysis<LoopInfo>();
+ return false;
+}
+
+static Value *getPointerOperand(Instruction &Inst) {
+ if (LoadInst *Load = dyn_cast<LoadInst>(&Inst))
+ return Load->getPointerOperand();
+ else if (StoreInst *Store = dyn_cast<StoreInst>(&Inst))
+ return Store->getPointerOperand();
+ else if (GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(&Inst))
+ return Gep->getPointerOperand();
+ return NULL;
+}
+
+void Delinearization::print(raw_ostream &O, const Module *) const {
+ O << "Delinearization on function " << F->getName() << ":\n";
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ Instruction *Inst = &(*I);
+
+ // Only analyze loads and stores.
+ if (!isa<StoreInst>(Inst) && !isa<LoadInst>(Inst) &&
+ !isa<GetElementPtrInst>(Inst))
+ continue;
+
+ const BasicBlock *BB = Inst->getParent();
+ // Delinearize the memory access as analyzed in all the surrounding loops.
+ // Do not analyze memory accesses outside loops.
+ for (Loop *L = LI->getLoopFor(BB); L != NULL; L = L->getParentLoop()) {
+ const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(*Inst), L);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(AccessFn);
+
+ // Do not try to delinearize memory accesses that are not AddRecs.
+ if (!AR)
+ break;
+
+ O << "AddRec: " << *AR << "\n";
+
+ SmallVector<const SCEV *, 3> Subscripts, Sizes;
+ const SCEV *Res = AR->delinearize(*SE, Subscripts, Sizes);
+ int Size = Subscripts.size();
+ if (Res == AR || Size == 0) {
+ O << "failed to delinearize\n";
+ continue;
+ }
+ O << "Base offset: " << *Res << "\n";
+ O << "ArrayDecl[UnknownSize]";
+ for (int i = 0; i < Size - 1; i++)
+ O << "[" << *Sizes[i] << "]";
+ O << " with elements of " << *Sizes[Size - 1] << " bytes.\n";
+
+ O << "ArrayRef";
+ for (int i = 0; i < Size; i++)
+ O << "[" << *Subscripts[i] << "]";
+ O << "\n";
+ }
+ }
+}
+
+char Delinearization::ID = 0;
+static const char delinearization_name[] = "Delinearization";
+INITIALIZE_PASS_BEGIN(Delinearization, DL_NAME, delinearization_name, true,
+ true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(Delinearization, DL_NAME, delinearization_name, true, true)
+
+FunctionPass *llvm::createDelinearizationPass() { return new Delinearization; }
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index a0f1a69..3b3e2ef 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -24,11 +24,11 @@
// Both of these are conservative weaknesses;
// that is, not a source of correctness problems.
//
-// The implementation depends on the GEP instruction to
-// differentiate subscripts. Since Clang linearizes subscripts
-// for most arrays, we give up some precision (though the existing MIV tests
-// will help). We trust that the GEP instruction will eventually be extended.
-// In the meantime, we should explore Maslov's ideas about delinearization.
+// The implementation depends on the GEP instruction to differentiate
+// subscripts. Since Clang linearizes some array subscripts, the dependence
+// analysis is using SCEV->delinearize to recover the representation of multiple
+// subscripts, and thus avoid the more expensive and less precise MIV tests. The
+// delinearization is controlled by the flag -da-delinearize.
//
// We should pay some careful attention to the possibility of integer overflow
// in the implementation of the various tests. This could happen with Add,
@@ -61,6 +61,7 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Operator.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/InstIterator.h"
@@ -104,6 +105,10 @@ STATISTIC(BanerjeeApplications, "Banerjee applications");
STATISTIC(BanerjeeIndependence, "Banerjee independence");
STATISTIC(BanerjeeSuccesses, "Banerjee successes");
+static cl::opt<bool>
+Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Try to delinearize array references."));
+
//===----------------------------------------------------------------------===//
// basics
@@ -3171,6 +3176,55 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level,
llvm_unreachable("constraint has unexpected kind");
}
+/// Check if we can delinearize the subscripts. If the SCEVs representing the
+/// source and destination array references are recurrences on a nested loop,
+/// this function flattens the nested recurrences into seperate recurrences
+/// for each loop level.
+bool
+DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV,
+ SmallVectorImpl<Subscript> &Pair) const {
+ const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);
+ const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV);
+ if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
+ return false;
+
+ SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts, SrcSizes, DstSizes;
+ SrcAR->delinearize(*SE, SrcSubscripts, SrcSizes);
+ DstAR->delinearize(*SE, DstSubscripts, DstSizes);
+
+ int size = SrcSubscripts.size();
+ int dstSize = DstSubscripts.size();
+ if (size != dstSize || size < 2)
+ return false;
+
+#ifndef NDEBUG
+ DEBUG(errs() << "\nSrcSubscripts: ");
+ for (int i = 0; i < size; i++)
+ DEBUG(errs() << *SrcSubscripts[i]);
+ DEBUG(errs() << "\nDstSubscripts: ");
+ for (int i = 0; i < size; i++)
+ DEBUG(errs() << *DstSubscripts[i]);
+#endif
+
+ // The delinearization transforms a single-subscript MIV dependence test into
+ // a multi-subscript SIV dependence test that is easier to compute. So we
+ // resize Pair to contain as many pairs of subscripts as the delinearization
+ // has found, and then initialize the pairs following the delinearization.
+ Pair.resize(size);
+ for (int i = 0; i < size; ++i) {
+ Pair[i].Src = SrcSubscripts[i];
+ Pair[i].Dst = DstSubscripts[i];
+
+ // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the
+ // delinearization has found, and add these constraints to the dependence
+ // check to avoid memory accesses overflow from one dimension into another.
+ // This is related to the problem of determining the existence of data
+ // dependences in array accesses using a different number of subscripts: in
+ // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc.
+ }
+
+ return true;
+}
//===----------------------------------------------------------------------===//
@@ -3280,6 +3334,12 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
Pair[0].Dst = DstSCEV;
}
+ if (Delinearize && Pairs == 1 && CommonLevels > 1 &&
+ tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) {
+ DEBUG(dbgs() << " delinerized GEP\n");
+ Pairs = Pair.size();
+ }
+
for (unsigned P = 0; P < Pairs; ++P) {
Pair[P].Loops.resize(MaxLevels + 1);
Pair[P].GroupLoops.resize(MaxLevels + 1);
@@ -3698,6 +3758,12 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep,
Pair[0].Dst = DstSCEV;
}
+ if (Delinearize && Pairs == 1 && CommonLevels > 1 &&
+ tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) {
+ DEBUG(dbgs() << " delinerized GEP\n");
+ Pairs = Pair.size();
+ }
+
for (unsigned P = 0; P < Pairs; ++P) {
Pair[P].Loops.resize(MaxLevels + 1);
Pair[P].GroupLoops.resize(MaxLevels + 1);
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index 7620fd9..f042964 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -6,11 +6,6 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file implements the CallGraph class and provides the BasicCallGraph
-// default implementation.
-//
-//===----------------------------------------------------------------------===//
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/Instructions.h"
@@ -21,168 +16,92 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-namespace {
+CallGraph::CallGraph()
+ : ModulePass(ID), Root(0), ExternalCallingNode(0), CallsExternalNode(0) {
+ initializeCallGraphPass(*PassRegistry::getPassRegistry());
+}
-//===----------------------------------------------------------------------===//
-// BasicCallGraph class definition
-//
-class BasicCallGraph : public ModulePass, public CallGraph {
- // Root is root of the call graph, or the external node if a 'main' function
- // couldn't be found.
- //
- CallGraphNode *Root;
-
- // ExternalCallingNode - This node has edges to all external functions and
- // those internal functions that have their address taken.
- CallGraphNode *ExternalCallingNode;
-
- // CallsExternalNode - This node has edges to it from all functions making
- // indirect calls or calling an external function.
- CallGraphNode *CallsExternalNode;
-
-public:
- static char ID; // Class identification, replacement for typeinfo
- BasicCallGraph() : ModulePass(ID), Root(0),
- ExternalCallingNode(0), CallsExternalNode(0) {
- initializeBasicCallGraphPass(*PassRegistry::getPassRegistry());
- }
+void CallGraph::addToCallGraph(Function *F) {
+ CallGraphNode *Node = getOrInsertFunction(F);
- // runOnModule - Compute the call graph for the specified module.
- virtual bool runOnModule(Module &M) {
- CallGraph::initialize(M);
-
- ExternalCallingNode = getOrInsertFunction(0);
- CallsExternalNode = new CallGraphNode(0);
- Root = 0;
-
- // Add every function to the call graph.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- addToCallGraph(I);
-
- // If we didn't find a main function, use the external call graph node
- if (Root == 0) Root = ExternalCallingNode;
-
- return false;
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- }
+ // If this function has external linkage, anything could call it.
+ if (!F->hasLocalLinkage()) {
+ ExternalCallingNode->addCalledFunction(CallSite(), Node);
- virtual void print(raw_ostream &OS, const Module *) const {
- OS << "CallGraph Root is: ";
- if (Function *F = getRoot()->getFunction())
- OS << F->getName() << "\n";
- else {
- OS << "<<null function: 0x" << getRoot() << ">>\n";
+ // Found the entry point?
+ if (F->getName() == "main") {
+ if (Root) // Found multiple external mains? Don't pick one.
+ Root = ExternalCallingNode;
+ else
+ Root = Node; // Found a main, keep track of it!
}
-
- CallGraph::print(OS, 0);
}
- virtual void releaseMemory() {
- destroy();
- }
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it should
- /// override this to adjust the this pointer as needed for the specified pass
- /// info.
- virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
- if (PI == &CallGraph::ID)
- return (CallGraph*)this;
- return this;
- }
-
- CallGraphNode* getExternalCallingNode() const { return ExternalCallingNode; }
- CallGraphNode* getCallsExternalNode() const { return CallsExternalNode; }
-
- // getRoot - Return the root of the call graph, which is either main, or if
- // main cannot be found, the external node.
- //
- CallGraphNode *getRoot() { return Root; }
- const CallGraphNode *getRoot() const { return Root; }
-
-private:
- //===---------------------------------------------------------------------
- // Implementation of CallGraph construction
- //
-
- // addToCallGraph - Add a function to the call graph, and link the node to all
- // of the functions that it calls.
- //
- void addToCallGraph(Function *F) {
- CallGraphNode *Node = getOrInsertFunction(F);
-
- // If this function has external linkage, anything could call it.
- if (!F->hasLocalLinkage()) {
- ExternalCallingNode->addCalledFunction(CallSite(), Node);
-
- // Found the entry point?
- if (F->getName() == "main") {
- if (Root) // Found multiple external mains? Don't pick one.
- Root = ExternalCallingNode;
- else
- Root = Node; // Found a main, keep track of it!
+ // If this function has its address taken, anything could call it.
+ if (F->hasAddressTaken())
+ ExternalCallingNode->addCalledFunction(CallSite(), Node);
+
+ // If this function is not defined in this translation unit, it could call
+ // anything.
+ if (F->isDeclaration() && !F->isIntrinsic())
+ Node->addCalledFunction(CallSite(), CallsExternalNode);
+
+ // Look for calls by this function.
+ for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;
+ ++II) {
+ CallSite CS(cast<Value>(II));
+ if (CS) {
+ const Function *Callee = CS.getCalledFunction();
+ if (!Callee)
+ // Indirect calls of intrinsics are not allowed so no need to check.
+ Node->addCalledFunction(CS, CallsExternalNode);
+ else if (!Callee->isIntrinsic())
+ Node->addCalledFunction(CS, getOrInsertFunction(Callee));
}
}
+}
- // If this function has its address taken, anything could call it.
- if (F->hasAddressTaken())
- ExternalCallingNode->addCalledFunction(CallSite(), Node);
-
- // If this function is not defined in this translation unit, it could call
- // anything.
- if (F->isDeclaration() && !F->isIntrinsic())
- Node->addCalledFunction(CallSite(), CallsExternalNode);
-
- // Look for calls by this function.
- for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
- for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
- II != IE; ++II) {
- CallSite CS(cast<Value>(II));
- if (CS) {
- const Function *Callee = CS.getCalledFunction();
- if (!Callee)
- // Indirect calls of intrinsics are not allowed so no need to check.
- Node->addCalledFunction(CS, CallsExternalNode);
- else if (!Callee->isIntrinsic())
- Node->addCalledFunction(CS, getOrInsertFunction(Callee));
- }
- }
- }
+void CallGraph::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+}
- //
- // destroy - Release memory for the call graph
- virtual void destroy() {
- /// CallsExternalNode is not in the function map, delete it explicitly.
- if (CallsExternalNode) {
- CallsExternalNode->allReferencesDropped();
- delete CallsExternalNode;
- CallsExternalNode = 0;
- }
- CallGraph::destroy();
- }
-};
+bool CallGraph::runOnModule(Module &M) {
+ Mod = &M;
-} //End anonymous namespace
+ ExternalCallingNode = getOrInsertFunction(0);
+ assert(!CallsExternalNode);
+ CallsExternalNode = new CallGraphNode(0);
+ Root = 0;
-INITIALIZE_ANALYSIS_GROUP(CallGraph, "Call Graph", BasicCallGraph)
-INITIALIZE_AG_PASS(BasicCallGraph, CallGraph, "basiccg",
- "Basic CallGraph Construction", false, true, true)
+ // Add every function to the call graph.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ addToCallGraph(I);
-char CallGraph::ID = 0;
-char BasicCallGraph::ID = 0;
+ // If we didn't find a main function, use the external call graph node
+ if (Root == 0)
+ Root = ExternalCallingNode;
-void CallGraph::initialize(Module &M) {
- Mod = &M;
+ return false;
}
-void CallGraph::destroy() {
- if (FunctionMap.empty()) return;
-
- // Reset all node's use counts to zero before deleting them to prevent an
- // assertion from firing.
+INITIALIZE_PASS(CallGraph, "basiccg", "CallGraph Construction", false, true)
+
+char CallGraph::ID = 0;
+
+void CallGraph::releaseMemory() {
+ /// CallsExternalNode is not in the function map, delete it explicitly.
+ if (CallsExternalNode) {
+ CallsExternalNode->allReferencesDropped();
+ delete CallsExternalNode;
+ CallsExternalNode = 0;
+ }
+
+ if (FunctionMap.empty())
+ return;
+
+// Reset all node's use counts to zero before deleting them to prevent an
+// assertion from firing.
#ifndef NDEBUG
for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
I != E; ++I)
@@ -195,7 +114,14 @@ void CallGraph::destroy() {
FunctionMap.clear();
}
-void CallGraph::print(raw_ostream &OS, Module*) const {
+void CallGraph::print(raw_ostream &OS, const Module*) const {
+ OS << "CallGraph Root is: ";
+ if (Function *F = Root->getFunction())
+ OS << F->getName() << "\n";
+ else {
+ OS << "<<null function: 0x" << Root << ">>\n";
+ }
+
for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I)
I->second->print(OS);
}
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index a0d788f..182beca 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -22,7 +22,7 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/PassManagers.h"
+#include "llvm/IR/LegacyPassManagers.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index 92d0d23..7ec4644 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -189,7 +189,7 @@ char GlobalsModRef::ID = 0;
INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis,
"globalsmodref-aa", "Simple mod/ref analysis for globals",
false, true, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis,
"globalsmodref-aa", "Simple mod/ref analysis for globals",
false, true, false)
diff --git a/lib/Analysis/IPA/IPA.cpp b/lib/Analysis/IPA/IPA.cpp
index 1c1816d..47357cf 100644
--- a/lib/Analysis/IPA/IPA.cpp
+++ b/lib/Analysis/IPA/IPA.cpp
@@ -19,8 +19,7 @@ using namespace llvm;
/// initializeIPA - Initialize all passes linked into the IPA library.
void llvm::initializeIPA(PassRegistry &Registry) {
- initializeBasicCallGraphPass(Registry);
- initializeCallGraphAnalysisGroup(Registry);
+ initializeCallGraphPass(Registry);
initializeCallGraphPrinterPass(Registry);
initializeCallGraphViewerPass(Registry);
initializeFindUsedTypesPass(Registry);
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index 37d73a8..3bc796e 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -59,6 +59,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool ExposesReturnsTwice;
bool HasDynamicAlloca;
bool ContainsNoDuplicateCall;
+ bool HasReturn;
+ bool HasIndirectBr;
/// Number of bytes allocated statically by the callee.
uint64_t AllocatedSize;
@@ -132,6 +134,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool visitExtractValue(ExtractValueInst &I);
bool visitInsertValue(InsertValueInst &I);
bool visitCallSite(CallSite CS);
+ bool visitReturnInst(ReturnInst &RI);
+ bool visitBranchInst(BranchInst &BI);
+ bool visitSwitchInst(SwitchInst &SI);
+ bool visitIndirectBrInst(IndirectBrInst &IBI);
+ bool visitResumeInst(ResumeInst &RI);
+ bool visitUnreachableInst(UnreachableInst &I);
public:
CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI,
@@ -139,12 +147,13 @@ public:
: TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
- ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0),
- NumVectorInstructions(0), FiftyPercentVectorBonus(0),
- TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
- NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
- NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
- SROACostSavings(0), SROACostSavingsLost(0) {}
+ ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
+ AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
+ FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
+ NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
+ NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
+ NumInstructionsSimplified(0), SROACostSavings(0),
+ SROACostSavingsLost(0) {}
bool analyzeCall(CallSite CS);
@@ -704,7 +713,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
}
bool CallAnalyzer::visitCallSite(CallSite CS) {
- if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() &&
+ if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
!F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::ReturnsTwice)) {
// This aborts the entire analysis.
@@ -785,6 +794,60 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
return Base::visitCallSite(CS);
}
+bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
+ // At least one return instruction will be free after inlining.
+ bool Free = !HasReturn;
+ HasReturn = true;
+ return Free;
+}
+
+bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
+ // We model unconditional branches as essentially free -- they really
+ // shouldn't exist at all, but handling them makes the behavior of the
+ // inliner more regular and predictable. Interestingly, conditional branches
+ // which will fold away are also free.
+ return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
+ dyn_cast_or_null<ConstantInt>(
+ SimplifiedValues.lookup(BI.getCondition()));
+}
+
+bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
+ // We model unconditional switches as free, see the comments on handling
+ // branches.
+ return isa<ConstantInt>(SI.getCondition()) ||
+ dyn_cast_or_null<ConstantInt>(
+ SimplifiedValues.lookup(SI.getCondition()));
+}
+
+bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
+ // We never want to inline functions that contain an indirectbr. This is
+ // incorrect because all the blockaddress's (in static global initializers
+ // for example) would be referring to the original function, and this
+ // indirect jump would jump from the inlined copy of the function into the
+ // original function which is extremely undefined behavior.
+ // FIXME: This logic isn't really right; we can safely inline functions with
+ // indirectbr's as long as no other function or global references the
+ // blockaddress of a block within the current function. And as a QOI issue,
+ // if someone is using a blockaddress without an indirectbr, and that
+ // reference somehow ends up in another function or global, we probably don't
+ // want to inline this function.
+ HasIndirectBr = true;
+ return false;
+}
+
+bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
+ // FIXME: It's not clear that a single instruction is an accurate model for
+ // the inline cost of a resume instruction.
+ return false;
+}
+
+bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
+ // FIXME: It might be reasonably to discount the cost of instructions leading
+ // to unreachable as they have the lowest possible impact on both runtime and
+ // code size.
+ return true; // No actual code is needed for unreachable.
+}
+
bool CallAnalyzer::visitInstruction(Instruction &I) {
// Some instructions are free. All of the free intrinsics can also be
// handled by SROA, etc.
@@ -808,8 +871,7 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
/// construct has been detected. It returns false if inlining is no longer
/// viable, and true if inlining remains viable.
bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
- for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end());
- I != E; ++I) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
++NumInstructions;
if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
++NumVectorInstructions;
@@ -825,7 +887,8 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
Cost += InlineConstants::InstrCost;
// If the visit this instruction detected an uninlinable pattern, abort.
- if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
+ if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
+ HasIndirectBr)
return false;
// If the caller is a recursive function then we don't want to inline
@@ -989,10 +1052,6 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
}
}
- // Track whether we've seen a return instruction. The first return
- // instruction is free, as at least one will usually disappear in inlining.
- bool HasReturn = false;
-
// Populate our simplified values by mapping from function arguments to call
// arguments with known important simplifications.
CallSite::arg_iterator CAI = CS.arg_begin();
@@ -1039,33 +1098,11 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
if (BB->empty())
continue;
- // Handle the terminator cost here where we can track returns and other
- // function-wide constructs.
- TerminatorInst *TI = BB->getTerminator();
-
- // We never want to inline functions that contain an indirectbr. This is
- // incorrect because all the blockaddress's (in static global initializers
- // for example) would be referring to the original function, and this
- // indirect jump would jump from the inlined copy of the function into the
- // original function which is extremely undefined behavior.
- // FIXME: This logic isn't really right; we can safely inline functions
- // with indirectbr's as long as no other function or global references the
- // blockaddress of a block within the current function. And as a QOI issue,
- // if someone is using a blockaddress without an indirectbr, and that
- // reference somehow ends up in another function or global, we probably
- // don't want to inline this function.
- if (isa<IndirectBrInst>(TI))
- return false;
-
- if (!HasReturn && isa<ReturnInst>(TI))
- HasReturn = true;
- else
- Cost += InlineConstants::InstrCost;
-
// Analyze the cost of this block. If we blow through the threshold, this
// returns false, and we can bail on out.
if (!analyzeBlock(BB)) {
- if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
+ if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
+ HasIndirectBr)
return false;
// If the caller is a recursive function then we don't want to inline
@@ -1078,6 +1115,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
break;
}
+ TerminatorInst *TI = BB->getTerminator();
+
// Add in the live successors by first checking whether we have terminator
// that may be simplified based on the values simplified by this call.
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
@@ -1171,6 +1210,22 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
return getInlineCost(CS, CS.getCalledFunction(), Threshold);
}
+/// \brief Test that two functions either have or have not the given attribute
+/// at the same time.
+static bool attributeMatches(Function *F1, Function *F2,
+ Attribute::AttrKind Attr) {
+ return F1->hasFnAttribute(Attr) == F2->hasFnAttribute(Attr);
+}
+
+/// \brief Test that there are no attribute conflicts between Caller and Callee
+/// that prevent inlining.
+static bool functionsHaveCompatibleAttributes(Function *Caller,
+ Function *Callee) {
+ return attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
+ attributeMatches(Caller, Callee, Attribute::SanitizeMemory) &&
+ attributeMatches(Caller, Callee, Attribute::SanitizeThread);
+}
+
InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
int Threshold) {
// Cannot inline indirect calls.
@@ -1179,20 +1234,26 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
// Calls to functions with always-inline attributes should be inlined
// whenever possible.
- if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::AlwaysInline)) {
+ if (Callee->hasFnAttribute(Attribute::AlwaysInline)) {
if (isInlineViable(*Callee))
return llvm::InlineCost::getAlways();
return llvm::InlineCost::getNever();
}
+ // Never inline functions with conflicting attributes (unless callee has
+ // always-inline attribute).
+ if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee))
+ return llvm::InlineCost::getNever();
+
+ // Don't inline this call if the caller has the optnone attribute.
+ if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
+ return llvm::InlineCost::getNever();
+
// Don't inline functions which can be redefined at link-time to mean
// something else. Don't inline functions marked noinline or call sites
// marked noinline.
if (Callee->mayBeOverridden() ||
- Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NoInline) ||
- CS.isNoInline())
+ Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline())
return llvm::InlineCost::getNever();
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index b275dfe..b867af1 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -668,7 +668,8 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
/// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc.
/// folding.
static Constant *stripAndComputeConstantOffsets(const DataLayout *TD,
- Value *&V) {
+ Value *&V,
+ bool AllowNonInbounds = false) {
assert(V->getType()->getScalarType()->isPointerTy());
// Without DataLayout, just be conservative for now. Theoretically, more could
@@ -685,7 +686,8 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *TD,
Visited.insert(V);
do {
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
- if (!GEP->isInBounds() || !GEP->accumulateConstantOffset(*TD, Offset))
+ if ((!AllowNonInbounds && !GEP->isInBounds()) ||
+ !GEP->accumulateConstantOffset(*TD, Offset))
break;
V = GEP->getPointerOperand();
} else if (Operator::getOpcode(V) == Instruction::BitCast) {
@@ -1737,7 +1739,7 @@ static Constant *computePointerICmp(const DataLayout *TD,
RHS = RHS->stripPointerCasts();
// A non-null pointer is not equal to a null pointer.
- if (llvm::isKnownNonNull(LHS) && isa<ConstantPointerNull>(RHS) &&
+ if (llvm::isKnownNonNull(LHS, TLI) && isa<ConstantPointerNull>(RHS) &&
(Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE))
return ConstantInt::get(GetCompareTy(LHS),
!CmpInst::isTrueWhenEqual(Pred));
@@ -1837,6 +1839,17 @@ static Constant *computePointerICmp(const DataLayout *TD,
return ConstantInt::get(GetCompareTy(LHS),
!CmpInst::isTrueWhenEqual(Pred));
}
+
+ // Even if an non-inbounds GEP occurs along the path we can still optimize
+ // equality comparisons concerning the result. We avoid walking the whole
+ // chain again by starting where the last calls to
+ // stripAndComputeConstantOffsets left off and accumulate the offsets.
+ Constant *LHSNoBound = stripAndComputeConstantOffsets(TD, LHS, true);
+ Constant *RHSNoBound = stripAndComputeConstantOffsets(TD, RHS, true);
+ if (LHS == RHS)
+ return ConstantExpr::getICmp(Pred,
+ ConstantExpr::getAdd(LHSOffset, LHSNoBound),
+ ConstantExpr::getAdd(RHSOffset, RHSNoBound));
}
// Otherwise, fail.
@@ -2946,6 +2959,7 @@ static bool IsIdempotent(Intrinsic::ID ID) {
case Intrinsic::trunc:
case Intrinsic::rint:
case Intrinsic::nearbyint:
+ case Intrinsic::round:
return true;
}
}
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 9393508..ec17f47 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -207,7 +207,7 @@ void Lint::visitCallSite(CallSite CS) {
&I);
FunctionType *FT = F->getFunctionType();
- unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
+ unsigned NumActualArgs = CS.arg_size();
Assert1(FT->isVarArg() ?
FT->getNumParams() <= NumActualArgs :
@@ -504,14 +504,42 @@ void Lint::visitShl(BinaryOperator &I) {
"Undefined result: Shift count out of range", &I);
}
-static bool isZero(Value *V, DataLayout *TD) {
+static bool isZero(Value *V, DataLayout *DL) {
// Assume undef could be zero.
- if (isa<UndefValue>(V)) return true;
+ if (isa<UndefValue>(V))
+ return true;
+
+ VectorType *VecTy = dyn_cast<VectorType>(V->getType());
+ if (!VecTy) {
+ unsigned BitWidth = V->getType()->getIntegerBitWidth();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(V, KnownZero, KnownOne, DL);
+ return KnownZero.isAllOnesValue();
+ }
+
+ // Per-component check doesn't work with zeroinitializer
+ Constant *C = dyn_cast<Constant>(V);
+ if (!C)
+ return false;
+
+ if (C->isZeroValue())
+ return true;
+
+ // For a vector, KnownZero will only be true if all values are zero, so check
+ // this per component
+ unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth();
+ for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) {
+ Constant *Elem = C->getAggregateElement(I);
+ if (isa<UndefValue>(Elem))
+ return true;
+
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(Elem, KnownZero, KnownOne, DL);
+ if (KnownZero.isAllOnesValue())
+ return true;
+ }
- unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(V, KnownZero, KnownOne, TD);
- return KnownZero.isAllOnesValue();
+ return false;
}
void Lint::visitSDiv(BinaryOperator &I) {
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 142ebed..e369633 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -177,10 +177,6 @@ PHINode *Loop::getCanonicalInductionVariable() const {
/// isLCSSAForm - Return true if the Loop is in LCSSA form
bool Loop::isLCSSAForm(DominatorTree &DT) const {
- // Sort the blocks vector so that we can use binary search to do quick
- // lookups.
- SmallPtrSet<BasicBlock*, 16> LoopBBs(block_begin(), block_end());
-
for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {
BasicBlock *BB = *BI;
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I)
@@ -196,7 +192,7 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const {
// block they are defined in. Also, blocks not reachable from the
// entry are special; uses in them don't need to go through PHIs.
if (UserBB != BB &&
- !LoopBBs.count(UserBB) &&
+ !contains(UserBB) &&
DT.isReachableFromEntry(UserBB))
return false;
}
@@ -220,12 +216,12 @@ bool Loop::isSafeToClone() const {
// Return false if any loop blocks contain indirectbrs, or there are any calls
// to noduplicate functions.
for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) {
- if (isa<IndirectBrInst>((*I)->getTerminator())) {
+ if (isa<IndirectBrInst>((*I)->getTerminator()))
return false;
- } else if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) {
+
+ if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator()))
if (II->hasFnAttr(Attribute::NoDuplicate))
return false;
- }
for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) {
if (const CallInst *CI = dyn_cast<CallInst>(BI)) {
@@ -309,15 +305,15 @@ bool Loop::isAnnotatedParallel() const {
if (!II->mayReadOrWriteMemory())
continue;
- if (!II->getMetadata("llvm.mem.parallel_loop_access"))
- return false;
-
// The memory instruction can refer to the loop identifier metadata
// directly or indirectly through another list metadata (in case of
// nested parallel loops). The loop identifier metadata refers to
// itself so we can check both cases with the same routine.
- MDNode *loopIdMD =
- dyn_cast<MDNode>(II->getMetadata("llvm.mem.parallel_loop_access"));
+ MDNode *loopIdMD = II->getMetadata("llvm.mem.parallel_loop_access");
+
+ if (!loopIdMD)
+ return false;
+
bool loopIdMDFound = false;
for (unsigned i = 0, e = loopIdMD->getNumOperands(); i < e; ++i) {
if (loopIdMD->getOperand(i) == desiredLoopIdMetadata) {
@@ -337,9 +333,6 @@ bool Loop::isAnnotatedParallel() const {
/// hasDedicatedExits - Return true if no exit block for the loop
/// has a predecessor that is outside the loop.
bool Loop::hasDedicatedExits() const {
- // Sort the blocks vector so that we can use binary search to do quick
- // lookups.
- SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end());
// Each predecessor of each exit block of a normal loop is contained
// within the loop.
SmallVector<BasicBlock *, 4> ExitBlocks;
@@ -347,7 +340,7 @@ bool Loop::hasDedicatedExits() const {
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
for (pred_iterator PI = pred_begin(ExitBlocks[i]),
PE = pred_end(ExitBlocks[i]); PI != PE; ++PI)
- if (!LoopBBs.count(*PI))
+ if (!contains(*PI))
return false;
// All the requirements are met.
return true;
@@ -362,11 +355,6 @@ Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
assert(hasDedicatedExits() &&
"getUniqueExitBlocks assumes the loop has canonical form exits!");
- // Sort the blocks vector so that we can use binary search to do quick
- // lookups.
- SmallVector<BasicBlock *, 128> LoopBBs(block_begin(), block_end());
- std::sort(LoopBBs.begin(), LoopBBs.end());
-
SmallVector<BasicBlock *, 32> switchExitBlocks;
for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) {
@@ -376,7 +364,7 @@ Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
for (succ_iterator I = succ_begin(*BI), E = succ_end(*BI); I != E; ++I) {
// If block is inside the loop then it is not a exit block.
- if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I))
+ if (contains(*I))
continue;
pred_iterator PI = pred_begin(*I);
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 0f0a1c9..1db0f63 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -31,12 +31,13 @@
using namespace llvm;
enum AllocType {
- MallocLike = 1<<0, // allocates
- CallocLike = 1<<1, // allocates + bzero
- ReallocLike = 1<<2, // reallocates
- StrDupLike = 1<<3,
+ OpNewLike = 1<<0, // allocates; never returns null
+ MallocLike = 1<<1 | OpNewLike, // allocates; may return null
+ CallocLike = 1<<2, // allocates + bzero
+ ReallocLike = 1<<3, // reallocates
+ StrDupLike = 1<<4,
AllocLike = MallocLike | CallocLike | StrDupLike,
- AnyAlloc = MallocLike | CallocLike | ReallocLike | StrDupLike
+ AnyAlloc = AllocLike | ReallocLike
};
struct AllocFnsTy {
@@ -52,20 +53,20 @@ struct AllocFnsTy {
static const AllocFnsTy AllocationFnData[] = {
{LibFunc::malloc, MallocLike, 1, 0, -1},
{LibFunc::valloc, MallocLike, 1, 0, -1},
- {LibFunc::Znwj, MallocLike, 1, 0, -1}, // new(unsigned int)
+ {LibFunc::Znwj, OpNewLike, 1, 0, -1}, // new(unsigned int)
{LibFunc::ZnwjRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new(unsigned int, nothrow)
- {LibFunc::Znwm, MallocLike, 1, 0, -1}, // new(unsigned long)
+ {LibFunc::Znwm, OpNewLike, 1, 0, -1}, // new(unsigned long)
{LibFunc::ZnwmRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new(unsigned long, nothrow)
- {LibFunc::Znaj, MallocLike, 1, 0, -1}, // new[](unsigned int)
+ {LibFunc::Znaj, OpNewLike, 1, 0, -1}, // new[](unsigned int)
{LibFunc::ZnajRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow)
- {LibFunc::Znam, MallocLike, 1, 0, -1}, // new[](unsigned long)
+ {LibFunc::Znam, OpNewLike, 1, 0, -1}, // new[](unsigned long)
{LibFunc::ZnamRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow)
- {LibFunc::posix_memalign, MallocLike, 3, 2, -1},
{LibFunc::calloc, CallocLike, 2, 0, 1},
{LibFunc::realloc, ReallocLike, 2, 1, -1},
{LibFunc::reallocf, ReallocLike, 2, 1, -1},
{LibFunc::strdup, StrDupLike, 1, -1, -1},
{LibFunc::strndup, StrDupLike, 2, 1, -1}
+ // TODO: Handle "int posix_memalign(void **, size_t, size_t)"
};
@@ -117,7 +118,7 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy,
return 0;
const AllocFnsTy *FnData = &AllocationFnData[i];
- if ((FnData->AllocTy & AllocTy) == 0)
+ if ((FnData->AllocTy & AllocTy) != FnData->AllocTy)
return 0;
// Check function prototype.
@@ -189,6 +190,13 @@ bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast);
}
+/// \brief Tests if a value is a call or invoke to a library function that
+/// allocates memory and never returns null (such as operator new).
+bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast) {
+ return getAllocationData(V, OpNewLike, TLI, LookThroughBitCast);
+}
+
/// extractMallocCall - Returns the corresponding CallInst if the instruction
/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we
/// ignore InvokeInst here.
@@ -197,7 +205,7 @@ const CallInst *llvm::extractMallocCall(const Value *I,
return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : 0;
}
-static Value *computeArraySize(const CallInst *CI, const DataLayout *TD,
+static Value *computeArraySize(const CallInst *CI, const DataLayout *DL,
const TargetLibraryInfo *TLI,
bool LookThroughSExt = false) {
if (!CI)
@@ -205,12 +213,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD,
// The size of the malloc's result type must be known to determine array size.
Type *T = getMallocAllocatedType(CI, TLI);
- if (!T || !T->isSized() || !TD)
+ if (!T || !T->isSized() || !DL)
return 0;
- unsigned ElementSize = TD->getTypeAllocSize(T);
+ unsigned ElementSize = DL->getTypeAllocSize(T);
if (StructType *ST = dyn_cast<StructType>(T))
- ElementSize = TD->getStructLayout(ST)->getSizeInBytes();
+ ElementSize = DL->getStructLayout(ST)->getSizeInBytes();
// If malloc call's arg can be determined to be a multiple of ElementSize,
// return the multiple. Otherwise, return NULL.
@@ -227,10 +235,10 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD,
/// is a call to malloc whose array size can be determined and the array size
/// is not constant 1. Otherwise, return NULL.
const CallInst *llvm::isArrayMalloc(const Value *I,
- const DataLayout *TD,
+ const DataLayout *DL,
const TargetLibraryInfo *TLI) {
const CallInst *CI = extractMallocCall(I, TLI);
- Value *ArraySize = computeArraySize(CI, TD, TLI);
+ Value *ArraySize = computeArraySize(CI, DL, TLI);
if (ConstantInt *ConstSize = dyn_cast_or_null<ConstantInt>(ArraySize))
if (ConstSize->isOne())
@@ -288,11 +296,11 @@ Type *llvm::getMallocAllocatedType(const CallInst *CI,
/// then return that multiple. For non-array mallocs, the multiple is
/// constant 1. Otherwise, return NULL for mallocs whose array size cannot be
/// determined.
-Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *TD,
+Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *DL,
const TargetLibraryInfo *TLI,
bool LookThroughSExt) {
assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call");
- return computeArraySize(CI, TD, TLI, LookThroughSExt);
+ return computeArraySize(CI, DL, TLI, LookThroughSExt);
}
@@ -354,12 +362,12 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
/// object size in Size if successful, and false otherwise.
/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
/// byval arguments, and global variables.
-bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD,
+bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *DL,
const TargetLibraryInfo *TLI, bool RoundToAlign) {
- if (!TD)
+ if (!DL)
return false;
- ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign);
+ ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), RoundToAlign);
SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr));
if (!Visitor.bothKnown(Data))
return false;
@@ -386,12 +394,12 @@ APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) {
return Size;
}
-ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD,
+ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *DL,
const TargetLibraryInfo *TLI,
LLVMContext &Context,
bool RoundToAlign)
-: TD(TD), TLI(TLI), RoundToAlign(RoundToAlign) {
- IntegerType *IntTy = TD->getIntPtrType(Context);
+: DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) {
+ IntegerType *IntTy = DL->getIntPtrType(Context);
IntTyBits = IntTy->getBitWidth();
Zero = APInt::getNullValue(IntTyBits);
}
@@ -434,7 +442,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
if (!I.getAllocatedType()->isSized())
return unknown();
- APInt Size(IntTyBits, TD->getTypeAllocSize(I.getAllocatedType()));
+ APInt Size(IntTyBits, DL->getTypeAllocSize(I.getAllocatedType()));
if (!I.isArrayAllocation())
return std::make_pair(align(Size, I.getAlignment()), Zero);
@@ -453,7 +461,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
return unknown();
}
PointerType *PT = cast<PointerType>(A.getType());
- APInt Size(IntTyBits, TD->getTypeAllocSize(PT->getElementType()));
+ APInt Size(IntTyBits, DL->getTypeAllocSize(PT->getElementType()));
return std::make_pair(align(Size, A.getParamAlignment()), Zero);
}
@@ -526,7 +534,7 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) {
SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) {
SizeOffsetType PtrData = compute(GEP.getPointerOperand());
APInt Offset(IntTyBits, 0);
- if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*TD, Offset))
+ if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*DL, Offset))
return unknown();
return std::make_pair(PtrData.first, PtrData.second + Offset);
@@ -542,7 +550,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){
if (!GV.hasDefinitiveInitializer())
return unknown();
- APInt Size(IntTyBits, TD->getTypeAllocSize(GV.getType()->getElementType()));
+ APInt Size(IntTyBits, DL->getTypeAllocSize(GV.getType()->getElementType()));
return std::make_pair(align(Size, GV.getAlignment()), Zero);
}
@@ -578,12 +586,13 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) {
return unknown();
}
-
-ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *TD,
- const TargetLibraryInfo *TLI,
- LLVMContext &Context)
-: TD(TD), TLI(TLI), Context(Context), Builder(Context, TargetFolder(TD)) {
- IntTy = TD->getIntPtrType(Context);
+ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *DL,
+ const TargetLibraryInfo *TLI,
+ LLVMContext &Context,
+ bool RoundToAlign)
+: DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)),
+ RoundToAlign(RoundToAlign) {
+ IntTy = DL->getIntPtrType(Context);
Zero = ConstantInt::get(IntTy, 0);
}
@@ -607,7 +616,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
- ObjectSizeOffsetVisitor Visitor(TD, TLI, Context);
+ ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, RoundToAlign);
SizeOffsetType Const = Visitor.compute(V);
if (Visitor.bothKnown(Const))
return std::make_pair(ConstantInt::get(Context, Const.first),
@@ -626,13 +635,15 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(V))
Builder.SetInsertPoint(I);
- // record the pointers that were handled in this run, so that they can be
- // cleaned later if something fails
- SeenVals.insert(V);
-
// now compute the size and offset
SizeOffsetEvalType Result;
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+
+ // Record the pointers that were handled in this run, so that they can be
+ // cleaned later if something fails. We also use this set to break cycles that
+ // can occur in dead code.
+ if (!SeenVals.insert(V)) {
+ Result = unknown();
+ } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
Result = visitGEPOperator(*GEP);
} else if (Instruction *I = dyn_cast<Instruction>(V)) {
Result = visit(*I);
@@ -665,7 +676,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) {
assert(I.isArrayAllocation());
Value *ArraySize = I.getArraySize();
Value *Size = ConstantInt::get(ArraySize->getType(),
- TD->getTypeAllocSize(I.getAllocatedType()));
+ DL->getTypeAllocSize(I.getAllocatedType()));
Size = Builder.CreateMul(Size, ArraySize);
return std::make_pair(Size, Zero);
}
@@ -717,7 +728,7 @@ ObjectSizeOffsetEvaluator::visitGEPOperator(GEPOperator &GEP) {
if (!bothKnown(PtrData))
return unknown();
- Value *Offset = EmitGEPOffset(&Builder, *TD, &GEP, /*NoAssumptions=*/true);
+ Value *Offset = EmitGEPOffset(&Builder, *DL, &GEP, /*NoAssumptions=*/true);
Offset = Builder.CreateAdd(PtrData.second, Offset);
return std::make_pair(PtrData.first, Offset);
}
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index fe1c874..84ff2ee 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -371,18 +371,19 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// Walk backwards through the basic block, looking for dependencies.
while (ScanIt != BB->begin()) {
+ Instruction *Inst = --ScanIt;
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+ // Debug intrinsics don't (and can't) cause dependencies.
+ if (isa<DbgInfoIntrinsic>(II)) continue;
+
// Limit the amount of scanning we do so we don't end up with quadratic
// running time on extreme testcases.
--Limit;
if (!Limit)
return MemDepResult::getUnknown();
- Instruction *Inst = --ScanIt;
-
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- // Debug intrinsics don't (and can't) cause dependences.
- if (isa<DbgInfoIntrinsic>(II)) continue;
-
// If we reach a lifetime begin or end marker, then the query ends here
// because the value is undefined.
if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp
deleted file mode 100644
index 30d213b..0000000
--- a/lib/Analysis/PathNumbering.cpp
+++ /dev/null
@@ -1,521 +0,0 @@
-//===- PathNumbering.cpp --------------------------------------*- C++ -*---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Ball-Larus path numbers uniquely identify paths through a directed acyclic
-// graph (DAG) [Ball96]. For a CFG backedges are removed and replaced by phony
-// edges to obtain a DAG, and thus the unique path numbers [Ball96].
-//
-// The purpose of this analysis is to enumerate the edges in a CFG in order
-// to obtain paths from path numbers in a convenient manner. As described in
-// [Ball96] edges can be enumerated such that given a path number by following
-// the CFG and updating the path number, the path is obtained.
-//
-// [Ball96]
-// T. Ball and J. R. Larus. "Efficient Path Profiling."
-// International Symposium on Microarchitecture, pages 46-57, 1996.
-// http://portal.acm.org/citation.cfm?id=243857
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ball-larus-numbering"
-
-#include "llvm/Analysis/PathNumbering.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/TypeBuilder.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <queue>
-#include <sstream>
-#include <stack>
-#include <string>
-#include <utility>
-
-using namespace llvm;
-
-// Are we enabling early termination
-static cl::opt<bool> ProcessEarlyTermination(
- "path-profile-early-termination", cl::Hidden,
- cl::desc("In path profiling, insert extra instrumentation to account for "
- "unexpected function termination."));
-
-// Returns the basic block for the BallLarusNode
-BasicBlock* BallLarusNode::getBlock() {
- return(_basicBlock);
-}
-
-// Returns the number of paths to the exit starting at the node.
-unsigned BallLarusNode::getNumberPaths() {
- return(_numberPaths);
-}
-
-// Sets the number of paths to the exit starting at the node.
-void BallLarusNode::setNumberPaths(unsigned numberPaths) {
- _numberPaths = numberPaths;
-}
-
-// Gets the NodeColor used in graph algorithms.
-BallLarusNode::NodeColor BallLarusNode::getColor() {
- return(_color);
-}
-
-// Sets the NodeColor used in graph algorithms.
-void BallLarusNode::setColor(BallLarusNode::NodeColor color) {
- _color = color;
-}
-
-// Returns an iterator over predecessor edges. Includes phony and
-// backedges.
-BLEdgeIterator BallLarusNode::predBegin() {
- return(_predEdges.begin());
-}
-
-// Returns the end sentinel for the predecessor iterator.
-BLEdgeIterator BallLarusNode::predEnd() {
- return(_predEdges.end());
-}
-
-// Returns the number of predecessor edges. Includes phony and
-// backedges.
-unsigned BallLarusNode::getNumberPredEdges() {
- return(_predEdges.size());
-}
-
-// Returns an iterator over successor edges. Includes phony and
-// backedges.
-BLEdgeIterator BallLarusNode::succBegin() {
- return(_succEdges.begin());
-}
-
-// Returns the end sentinel for the successor iterator.
-BLEdgeIterator BallLarusNode::succEnd() {
- return(_succEdges.end());
-}
-
-// Returns the number of successor edges. Includes phony and
-// backedges.
-unsigned BallLarusNode::getNumberSuccEdges() {
- return(_succEdges.size());
-}
-
-// Add an edge to the predecessor list.
-void BallLarusNode::addPredEdge(BallLarusEdge* edge) {
- _predEdges.push_back(edge);
-}
-
-// Remove an edge from the predecessor list.
-void BallLarusNode::removePredEdge(BallLarusEdge* edge) {
- removeEdge(_predEdges, edge);
-}
-
-// Add an edge to the successor list.
-void BallLarusNode::addSuccEdge(BallLarusEdge* edge) {
- _succEdges.push_back(edge);
-}
-
-// Remove an edge from the successor list.
-void BallLarusNode::removeSuccEdge(BallLarusEdge* edge) {
- removeEdge(_succEdges, edge);
-}
-
-// Returns the name of the BasicBlock being represented. If BasicBlock
-// is null then returns "<null>". If BasicBlock has no name, then
-// "<unnamed>" is returned. Intended for use with debug output.
-std::string BallLarusNode::getName() {
- std::stringstream name;
-
- if(getBlock() != NULL) {
- if(getBlock()->hasName()) {
- std::string tempName(getBlock()->getName());
- name << tempName.c_str() << " (" << _uid << ")";
- } else
- name << "<unnamed> (" << _uid << ")";
- } else
- name << "<null> (" << _uid << ")";
-
- return name.str();
-}
-
-// Removes an edge from an edgeVector. Used by removePredEdge and
-// removeSuccEdge.
-void BallLarusNode::removeEdge(BLEdgeVector& v, BallLarusEdge* e) {
- // TODO: Avoid linear scan by using a set instead
- for(BLEdgeIterator i = v.begin(),
- end = v.end();
- i != end;
- ++i) {
- if((*i) == e) {
- v.erase(i);
- break;
- }
- }
-}
-
-// Returns the source node of this edge.
-BallLarusNode* BallLarusEdge::getSource() const {
- return(_source);
-}
-
-// Returns the target node of this edge.
-BallLarusNode* BallLarusEdge::getTarget() const {
- return(_target);
-}
-
-// Sets the type of the edge.
-BallLarusEdge::EdgeType BallLarusEdge::getType() const {
- return _edgeType;
-}
-
-// Gets the type of the edge.
-void BallLarusEdge::setType(EdgeType type) {
- _edgeType = type;
-}
-
-// Returns the weight of this edge. Used to decode path numbers to sequences
-// of basic blocks.
-unsigned BallLarusEdge::getWeight() {
- return(_weight);
-}
-
-// Sets the weight of the edge. Used during path numbering.
-void BallLarusEdge::setWeight(unsigned weight) {
- _weight = weight;
-}
-
-// Gets the phony edge originating at the root.
-BallLarusEdge* BallLarusEdge::getPhonyRoot() {
- return _phonyRoot;
-}
-
-// Sets the phony edge originating at the root.
-void BallLarusEdge::setPhonyRoot(BallLarusEdge* phonyRoot) {
- _phonyRoot = phonyRoot;
-}
-
-// Gets the phony edge terminating at the exit.
-BallLarusEdge* BallLarusEdge::getPhonyExit() {
- return _phonyExit;
-}
-
-// Sets the phony edge terminating at the exit.
-void BallLarusEdge::setPhonyExit(BallLarusEdge* phonyExit) {
- _phonyExit = phonyExit;
-}
-
-// Gets the associated real edge if this is a phony edge.
-BallLarusEdge* BallLarusEdge::getRealEdge() {
- return _realEdge;
-}
-
-// Sets the associated real edge if this is a phony edge.
-void BallLarusEdge::setRealEdge(BallLarusEdge* realEdge) {
- _realEdge = realEdge;
-}
-
-// Returns the duplicate number of the edge.
-unsigned BallLarusEdge::getDuplicateNumber() {
- return(_duplicateNumber);
-}
-
-// Initialization that requires virtual functions which are not fully
-// functional in the constructor.
-void BallLarusDag::init() {
- BLBlockNodeMap inDag;
- std::stack<BallLarusNode*> dfsStack;
-
- _root = addNode(&(_function.getEntryBlock()));
- _exit = addNode(NULL);
-
- // start search from root
- dfsStack.push(getRoot());
-
- // dfs to add each bb into the dag
- while(dfsStack.size())
- buildNode(inDag, dfsStack);
-
- // put in the final edge
- addEdge(getExit(),getRoot(),0);
-}
-
-// Frees all memory associated with the DAG.
-BallLarusDag::~BallLarusDag() {
- for(BLEdgeIterator edge = _edges.begin(), end = _edges.end(); edge != end;
- ++edge)
- delete (*edge);
-
- for(BLNodeIterator node = _nodes.begin(), end = _nodes.end(); node != end;
- ++node)
- delete (*node);
-}
-
-// Calculate the path numbers by assigning edge increments as prescribed
-// in Ball-Larus path profiling.
-void BallLarusDag::calculatePathNumbers() {
- BallLarusNode* node;
- std::queue<BallLarusNode*> bfsQueue;
- bfsQueue.push(getExit());
-
- while(bfsQueue.size() > 0) {
- node = bfsQueue.front();
-
- DEBUG(dbgs() << "calculatePathNumbers on " << node->getName() << "\n");
-
- bfsQueue.pop();
- unsigned prevPathNumber = node->getNumberPaths();
- calculatePathNumbersFrom(node);
-
- // Check for DAG splitting
- if( node->getNumberPaths() > 100000000 && node != getRoot() ) {
- // Add new phony edge from the split-node to the DAG's exit
- BallLarusEdge* exitEdge = addEdge(node, getExit(), 0);
- exitEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
-
- // Counters to handle the possibility of a multi-graph
- BasicBlock* oldTarget = 0;
- unsigned duplicateNumber = 0;
-
- // Iterate through each successor edge, adding phony edges
- for( BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
- succ != end; oldTarget = (*succ)->getTarget()->getBlock(), succ++ ) {
-
- if( (*succ)->getType() == BallLarusEdge::NORMAL ) {
- // is this edge a duplicate?
- if( oldTarget != (*succ)->getTarget()->getBlock() )
- duplicateNumber = 0;
-
- // create the new phony edge: root -> succ
- BallLarusEdge* rootEdge =
- addEdge(getRoot(), (*succ)->getTarget(), duplicateNumber++);
- rootEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
- rootEdge->setRealEdge(*succ);
-
- // split on this edge and reference it's exit/root phony edges
- (*succ)->setType(BallLarusEdge::SPLITEDGE);
- (*succ)->setPhonyRoot(rootEdge);
- (*succ)->setPhonyExit(exitEdge);
- (*succ)->setWeight(0);
- }
- }
-
- calculatePathNumbersFrom(node);
- }
-
- DEBUG(dbgs() << "prev, new number paths " << prevPathNumber << ", "
- << node->getNumberPaths() << ".\n");
-
- if(prevPathNumber == 0 && node->getNumberPaths() != 0) {
- DEBUG(dbgs() << "node ready : " << node->getName() << "\n");
- for(BLEdgeIterator pred = node->predBegin(), end = node->predEnd();
- pred != end; pred++) {
- if( (*pred)->getType() == BallLarusEdge::BACKEDGE ||
- (*pred)->getType() == BallLarusEdge::SPLITEDGE )
- continue;
-
- BallLarusNode* nextNode = (*pred)->getSource();
- // not yet visited?
- if(nextNode->getNumberPaths() == 0)
- bfsQueue.push(nextNode);
- }
- }
- }
-
- DEBUG(dbgs() << "\tNumber of paths: " << getRoot()->getNumberPaths() << "\n");
-}
-
-// Returns the number of paths for the Dag.
-unsigned BallLarusDag::getNumberOfPaths() {
- return(getRoot()->getNumberPaths());
-}
-
-// Returns the root (i.e. entry) node for the DAG.
-BallLarusNode* BallLarusDag::getRoot() {
- return _root;
-}
-
-// Returns the exit node for the DAG.
-BallLarusNode* BallLarusDag::getExit() {
- return _exit;
-}
-
-// Returns the function for the DAG.
-Function& BallLarusDag::getFunction() {
- return(_function);
-}
-
-// Clears the node colors.
-void BallLarusDag::clearColors(BallLarusNode::NodeColor color) {
- for (BLNodeIterator nodeIt = _nodes.begin(); nodeIt != _nodes.end(); nodeIt++)
- (*nodeIt)->setColor(color);
-}
-
-// Processes one node and its imediate edges for building the DAG.
-void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) {
- BallLarusNode* currentNode = dfsStack.top();
- BasicBlock* currentBlock = currentNode->getBlock();
-
- if(currentNode->getColor() != BallLarusNode::WHITE) {
- // we have already visited this node
- dfsStack.pop();
- currentNode->setColor(BallLarusNode::BLACK);
- } else {
- // are there any external procedure calls?
- if( ProcessEarlyTermination ) {
- for( BasicBlock::iterator bbCurrent = currentNode->getBlock()->begin(),
- bbEnd = currentNode->getBlock()->end(); bbCurrent != bbEnd;
- bbCurrent++ ) {
- Instruction& instr = *bbCurrent;
- if( instr.getOpcode() == Instruction::Call ) {
- BallLarusEdge* callEdge = addEdge(currentNode, getExit(), 0);
- callEdge->setType(BallLarusEdge::CALLEDGE_PHONY);
- break;
- }
- }
- }
-
- TerminatorInst* terminator = currentNode->getBlock()->getTerminator();
- if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator) ||
- isa<ResumeInst>(terminator))
- addEdge(currentNode, getExit(),0);
-
- currentNode->setColor(BallLarusNode::GRAY);
- inDag[currentBlock] = currentNode;
-
- BasicBlock* oldSuccessor = 0;
- unsigned duplicateNumber = 0;
-
- // iterate through this node's successors
- for(succ_iterator successor = succ_begin(currentBlock),
- succEnd = succ_end(currentBlock); successor != succEnd;
- oldSuccessor = *successor, ++successor ) {
- BasicBlock* succBB = *successor;
-
- // is this edge a duplicate?
- if (oldSuccessor == succBB)
- duplicateNumber++;
- else
- duplicateNumber = 0;
-
- buildEdge(inDag, dfsStack, currentNode, succBB, duplicateNumber);
- }
- }
-}
-
-// Process an edge in the CFG for DAG building.
-void BallLarusDag::buildEdge(BLBlockNodeMap& inDag, std::stack<BallLarusNode*>&
- dfsStack, BallLarusNode* currentNode,
- BasicBlock* succBB, unsigned duplicateCount) {
- BallLarusNode* succNode = inDag[succBB];
-
- if(succNode && succNode->getColor() == BallLarusNode::BLACK) {
- // visited node and forward edge
- addEdge(currentNode, succNode, duplicateCount);
- } else if(succNode && succNode->getColor() == BallLarusNode::GRAY) {
- // visited node and back edge
- DEBUG(dbgs() << "Backedge detected.\n");
- addBackedge(currentNode, succNode, duplicateCount);
- } else {
- BallLarusNode* childNode;
- // not visited node and forward edge
- if(succNode) // an unvisited node that is child of a gray node
- childNode = succNode;
- else { // an unvisited node that is a child of a an unvisted node
- childNode = addNode(succBB);
- inDag[succBB] = childNode;
- }
- addEdge(currentNode, childNode, duplicateCount);
- dfsStack.push(childNode);
- }
-}
-
-// The weight on each edge is the increment required along any path that
-// contains that edge.
-void BallLarusDag::calculatePathNumbersFrom(BallLarusNode* node) {
- if(node == getExit())
- // The Exit node must be base case
- node->setNumberPaths(1);
- else {
- unsigned sumPaths = 0;
- BallLarusNode* succNode;
-
- for(BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
- succ != end; succ++) {
- if( (*succ)->getType() == BallLarusEdge::BACKEDGE ||
- (*succ)->getType() == BallLarusEdge::SPLITEDGE )
- continue;
-
- (*succ)->setWeight(sumPaths);
- succNode = (*succ)->getTarget();
-
- if( !succNode->getNumberPaths() )
- return;
- sumPaths += succNode->getNumberPaths();
- }
-
- node->setNumberPaths(sumPaths);
- }
-}
-
-// Allows subclasses to determine which type of Node is created.
-// Override this method to produce subclasses of BallLarusNode if
-// necessary. The destructor of BallLarusDag will call free on each
-// pointer created.
-BallLarusNode* BallLarusDag::createNode(BasicBlock* BB) {
- return( new BallLarusNode(BB) );
-}
-
-// Allows subclasses to determine which type of Edge is created.
-// Override this method to produce subclasses of BallLarusEdge if
-// necessary. The destructor of BallLarusDag will call free on each
-// pointer created.
-BallLarusEdge* BallLarusDag::createEdge(BallLarusNode* source,
- BallLarusNode* target,
- unsigned duplicateCount) {
- return( new BallLarusEdge(source, target, duplicateCount) );
-}
-
-// Proxy to node's constructor. Updates the DAG state.
-BallLarusNode* BallLarusDag::addNode(BasicBlock* BB) {
- BallLarusNode* newNode = createNode(BB);
- _nodes.push_back(newNode);
- return( newNode );
-}
-
-// Proxy to edge's constructor. Updates the DAG state.
-BallLarusEdge* BallLarusDag::addEdge(BallLarusNode* source,
- BallLarusNode* target,
- unsigned duplicateCount) {
- BallLarusEdge* newEdge = createEdge(source, target, duplicateCount);
- _edges.push_back(newEdge);
- source->addSuccEdge(newEdge);
- target->addPredEdge(newEdge);
- return(newEdge);
-}
-
-// Adds a backedge with its phony edges. Updates the DAG state.
-void BallLarusDag::addBackedge(BallLarusNode* source, BallLarusNode* target,
- unsigned duplicateCount) {
- BallLarusEdge* childEdge = addEdge(source, target, duplicateCount);
- childEdge->setType(BallLarusEdge::BACKEDGE);
-
- childEdge->setPhonyRoot(addEdge(getRoot(), target,0));
- childEdge->setPhonyExit(addEdge(source, getExit(),0));
-
- childEdge->getPhonyRoot()->setRealEdge(childEdge);
- childEdge->getPhonyRoot()->setType(BallLarusEdge::BACKEDGE_PHONY);
-
- childEdge->getPhonyExit()->setRealEdge(childEdge);
- childEdge->getPhonyExit()->setType(BallLarusEdge::BACKEDGE_PHONY);
- _backEdges.push_back(childEdge);
-}
diff --git a/lib/Analysis/PathProfileInfo.cpp b/lib/Analysis/PathProfileInfo.cpp
deleted file mode 100644
index bc53221..0000000
--- a/lib/Analysis/PathProfileInfo.cpp
+++ /dev/null
@@ -1,433 +0,0 @@
-//===- PathProfileInfo.cpp ------------------------------------*- C++ -*---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interface used by optimizers to load path profiles,
-// and provides a loader pass which reads a path profile file.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "path-profile-info"
-
-#include "llvm/Analysis/PathProfileInfo.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/ProfileInfoTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstdio>
-
-using namespace llvm;
-
-// command line option for loading path profiles
-static cl::opt<std::string>
-PathProfileInfoFilename("path-profile-loader-file", cl::init("llvmprof.out"),
- cl::value_desc("filename"),
- cl::desc("Path profile file loaded by -path-profile-loader"), cl::Hidden);
-
-namespace {
- class PathProfileLoaderPass : public ModulePass, public PathProfileInfo {
- public:
- PathProfileLoaderPass() : ModulePass(ID) { }
- ~PathProfileLoaderPass();
-
- // this pass doesn't change anything (only loads information)
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- }
-
- // the full name of the loader pass
- virtual const char* getPassName() const {
- return "Path Profiling Information Loader";
- }
-
- // required since this pass implements multiple inheritance
- virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
- if (PI == &PathProfileInfo::ID)
- return (PathProfileInfo*)this;
- return this;
- }
-
- // entry point to run the pass
- bool runOnModule(Module &M);
-
- // pass identification
- static char ID;
-
- private:
- // make a reference table to refer to function by number
- void buildFunctionRefs(Module &M);
-
- // process argument info of a program from the input file
- void handleArgumentInfo();
-
- // process path number information from the input file
- void handlePathInfo();
-
- // array of references to the functions in the module
- std::vector<Function*> _functions;
-
- // path profile file handle
- FILE* _file;
-
- // path profile file name
- std::string _filename;
- };
-}
-
-// register PathLoader
-char PathProfileLoaderPass::ID = 0;
-
-INITIALIZE_ANALYSIS_GROUP(PathProfileInfo, "Path Profile Information",
- NoPathProfileInfo)
-INITIALIZE_AG_PASS(PathProfileLoaderPass, PathProfileInfo,
- "path-profile-loader",
- "Load path profile information from file",
- false, true, false)
-
-char &llvm::PathProfileLoaderPassID = PathProfileLoaderPass::ID;
-
-// link PathLoader as a pass, and make it available as an optimisation
-ModulePass *llvm::createPathProfileLoaderPass() {
- return new PathProfileLoaderPass;
-}
-
-// ----------------------------------------------------------------------------
-// PathEdge implementation
-//
-ProfilePathEdge::ProfilePathEdge (BasicBlock* source, BasicBlock* target,
- unsigned duplicateNumber)
- : _source(source), _target(target), _duplicateNumber(duplicateNumber) {}
-
-// ----------------------------------------------------------------------------
-// Path implementation
-//
-
-ProfilePath::ProfilePath (unsigned int number, unsigned int count,
- double countStdDev, PathProfileInfo* ppi)
- : _number(number) , _count(count), _countStdDev(countStdDev), _ppi(ppi) {}
-
-double ProfilePath::getFrequency() const {
- return 100 * double(_count) /
- double(_ppi->_functionPathCounts[_ppi->_currentFunction]);
-}
-
-static BallLarusEdge* getNextEdge (BallLarusNode* node,
- unsigned int pathNumber) {
- BallLarusEdge* best = 0;
-
- for( BLEdgeIterator next = node->succBegin(),
- end = node->succEnd(); next != end; next++ ) {
- if( (*next)->getType() != BallLarusEdge::BACKEDGE && // no backedges
- (*next)->getType() != BallLarusEdge::SPLITEDGE && // no split edges
- (*next)->getWeight() <= pathNumber && // weight must be <= pathNumber
- (!best || (best->getWeight() < (*next)->getWeight())) ) // best one?
- best = *next;
- }
-
- return best;
-}
-
-ProfilePathEdgeVector* ProfilePath::getPathEdges() const {
- BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
- unsigned int increment = _number;
- ProfilePathEdgeVector* pev = new ProfilePathEdgeVector;
-
- while (currentNode != _ppi->_currentDag->getExit()) {
- BallLarusEdge* next = getNextEdge(currentNode, increment);
-
- increment -= next->getWeight();
-
- if( next->getType() != BallLarusEdge::BACKEDGE_PHONY &&
- next->getType() != BallLarusEdge::SPLITEDGE_PHONY &&
- next->getTarget() != _ppi->_currentDag->getExit() )
- pev->push_back(ProfilePathEdge(
- next->getSource()->getBlock(),
- next->getTarget()->getBlock(),
- next->getDuplicateNumber()));
-
- if( next->getType() == BallLarusEdge::BACKEDGE_PHONY &&
- next->getTarget() == _ppi->_currentDag->getExit() )
- pev->push_back(ProfilePathEdge(
- next->getRealEdge()->getSource()->getBlock(),
- next->getRealEdge()->getTarget()->getBlock(),
- next->getDuplicateNumber()));
-
- if( next->getType() == BallLarusEdge::SPLITEDGE_PHONY &&
- next->getSource() == _ppi->_currentDag->getRoot() )
- pev->push_back(ProfilePathEdge(
- next->getRealEdge()->getSource()->getBlock(),
- next->getRealEdge()->getTarget()->getBlock(),
- next->getDuplicateNumber()));
-
- // set the new node
- currentNode = next->getTarget();
- }
-
- return pev;
-}
-
-ProfilePathBlockVector* ProfilePath::getPathBlocks() const {
- BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
- unsigned int increment = _number;
- ProfilePathBlockVector* pbv = new ProfilePathBlockVector;
-
- while (currentNode != _ppi->_currentDag->getExit()) {
- BallLarusEdge* next = getNextEdge(currentNode, increment);
- increment -= next->getWeight();
-
- // add block to the block list if it is a real edge
- if( next->getType() == BallLarusEdge::NORMAL)
- pbv->push_back (currentNode->getBlock());
- // make the back edge the last edge since we are at the end
- else if( next->getTarget() == _ppi->_currentDag->getExit() ) {
- pbv->push_back (currentNode->getBlock());
- pbv->push_back (next->getRealEdge()->getTarget()->getBlock());
- }
-
- // set the new node
- currentNode = next->getTarget();
- }
-
- return pbv;
-}
-
-BasicBlock* ProfilePath::getFirstBlockInPath() const {
- BallLarusNode* root = _ppi->_currentDag->getRoot();
- BallLarusEdge* edge = getNextEdge(root, _number);
-
- if( edge && (edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
- edge->getType() == BallLarusEdge::SPLITEDGE_PHONY) )
- return edge->getTarget()->getBlock();
-
- return root->getBlock();
-}
-
-// ----------------------------------------------------------------------------
-// PathProfileInfo implementation
-//
-
-// Pass identification
-char llvm::PathProfileInfo::ID = 0;
-
-PathProfileInfo::PathProfileInfo () : _currentDag(0) , _currentFunction(0) {
-}
-
-PathProfileInfo::~PathProfileInfo() {
- if (_currentDag)
- delete _currentDag;
-}
-
-// set the function for which paths are currently begin processed
-void PathProfileInfo::setCurrentFunction(Function* F) {
- // Make sure it exists
- if (!F) return;
-
- if (_currentDag)
- delete _currentDag;
-
- _currentFunction = F;
- _currentDag = new BallLarusDag(*F);
- _currentDag->init();
- _currentDag->calculatePathNumbers();
-}
-
-// get the function for which paths are currently being processed
-Function* PathProfileInfo::getCurrentFunction() const {
- return _currentFunction;
-}
-
-// get the entry block of the function
-BasicBlock* PathProfileInfo::getCurrentFunctionEntry() {
- return _currentDag->getRoot()->getBlock();
-}
-
-// return the path based on its number
-ProfilePath* PathProfileInfo::getPath(unsigned int number) {
- return _functionPaths[_currentFunction][number];
-}
-
-// return the number of paths which a function may potentially execute
-unsigned int PathProfileInfo::getPotentialPathCount() {
- return _currentDag ? _currentDag->getNumberOfPaths() : 0;
-}
-
-// return an iterator for the beginning of a functions executed paths
-ProfilePathIterator PathProfileInfo::pathBegin() {
- return _functionPaths[_currentFunction].begin();
-}
-
-// return an iterator for the end of a functions executed paths
-ProfilePathIterator PathProfileInfo::pathEnd() {
- return _functionPaths[_currentFunction].end();
-}
-
-// returns the total number of paths run in the function
-unsigned int PathProfileInfo::pathsRun() {
- return _currentFunction ? _functionPaths[_currentFunction].size() : 0;
-}
-
-// ----------------------------------------------------------------------------
-// PathLoader implementation
-//
-
-// remove all generated paths
-PathProfileLoaderPass::~PathProfileLoaderPass() {
- for( FunctionPathIterator funcNext = _functionPaths.begin(),
- funcEnd = _functionPaths.end(); funcNext != funcEnd; funcNext++)
- for( ProfilePathIterator pathNext = funcNext->second.begin(),
- pathEnd = funcNext->second.end(); pathNext != pathEnd; pathNext++)
- delete pathNext->second;
-}
-
-// entry point of the pass; this loads and parses a file
-bool PathProfileLoaderPass::runOnModule(Module &M) {
- // get the filename and setup the module's function references
- _filename = PathProfileInfoFilename;
- buildFunctionRefs (M);
-
- if (!(_file = fopen(_filename.c_str(), "rb"))) {
- errs () << "error: input '" << _filename << "' file does not exist.\n";
- return false;
- }
-
- ProfilingType profType;
-
- while( fread(&profType, sizeof(ProfilingType), 1, _file) ) {
- switch (profType) {
- case ArgumentInfo:
- handleArgumentInfo ();
- break;
- case PathInfo:
- handlePathInfo ();
- break;
- default:
- errs () << "error: bad path profiling file syntax, " << profType << "\n";
- fclose (_file);
- return false;
- }
- }
-
- fclose (_file);
-
- return true;
-}
-
-// create a reference table for functions defined in the path profile file
-void PathProfileLoaderPass::buildFunctionRefs (Module &M) {
- _functions.push_back(0); // make the 0 index a null pointer
-
- for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
- if (F->isDeclaration())
- continue;
- _functions.push_back(F);
- }
-}
-
-// handle command like argument infor in the output file
-void PathProfileLoaderPass::handleArgumentInfo() {
- // get the argument list's length
- unsigned savedArgsLength;
- if( fread(&savedArgsLength, sizeof(unsigned), 1, _file) != 1 ) {
- errs() << "warning: argument info header/data mismatch\n";
- return;
- }
-
- // allocate a buffer, and get the arguments
- char* args = new char[savedArgsLength+1];
- if( fread(args, 1, savedArgsLength, _file) != savedArgsLength )
- errs() << "warning: argument info header/data mismatch\n";
-
- args[savedArgsLength] = '\0';
- argList = std::string(args);
- delete [] args; // cleanup dynamic string
-
- // byte alignment
- if (savedArgsLength & 3)
- fseek(_file, 4-(savedArgsLength&3), SEEK_CUR);
-}
-
-// Handle path profile information in the output file
-void PathProfileLoaderPass::handlePathInfo () {
- // get the number of functions in this profile
- unsigned functionCount;
- if( fread(&functionCount, sizeof(functionCount), 1, _file) != 1 ) {
- errs() << "warning: path info header/data mismatch\n";
- return;
- }
-
- // gather path information for each function
- for (unsigned i = 0; i < functionCount; i++) {
- PathProfileHeader pathHeader;
- if( fread(&pathHeader, sizeof(pathHeader), 1, _file) != 1 ) {
- errs() << "warning: bad header for path function info\n";
- break;
- }
-
- Function* f = _functions[pathHeader.fnNumber];
-
- // dynamically allocate a table to store path numbers
- PathProfileTableEntry* pathTable =
- new PathProfileTableEntry[pathHeader.numEntries];
-
- if( fread(pathTable, sizeof(PathProfileTableEntry),
- pathHeader.numEntries, _file) != pathHeader.numEntries) {
- delete [] pathTable;
- errs() << "warning: path function info header/data mismatch\n";
- return;
- }
-
- // Build a new path for the current function
- unsigned int totalPaths = 0;
- for (unsigned int j = 0; j < pathHeader.numEntries; j++) {
- totalPaths += pathTable[j].pathCounter;
- _functionPaths[f][pathTable[j].pathNumber]
- = new ProfilePath(pathTable[j].pathNumber, pathTable[j].pathCounter,
- 0, this);
- }
-
- _functionPathCounts[f] = totalPaths;
-
- delete [] pathTable;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// NoProfile PathProfileInfo implementation
-//
-
-namespace {
- struct NoPathProfileInfo : public ImmutablePass, public PathProfileInfo {
- static char ID; // Class identification, replacement for typeinfo
- NoPathProfileInfo() : ImmutablePass(ID) {
- initializeNoPathProfileInfoPass(*PassRegistry::getPassRegistry());
- }
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
- if (PI == &PathProfileInfo::ID)
- return (PathProfileInfo*)this;
- return this;
- }
-
- virtual const char *getPassName() const {
- return "NoPathProfileInfo";
- }
- };
-} // End of anonymous namespace
-
-char NoPathProfileInfo::ID = 0;
-// Register this pass...
-INITIALIZE_AG_PASS(NoPathProfileInfo, PathProfileInfo, "no-path-profile",
- "No Path Profile Information", false, true, true)
-
-ImmutablePass *llvm::createNoPathProfileInfoPass() { return new NoPathProfileInfo(); }
diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp
deleted file mode 100644
index 48d7d05..0000000
--- a/lib/Analysis/PathProfileVerifier.cpp
+++ /dev/null
@@ -1,206 +0,0 @@
-//===- PathProfileVerifier.cpp --------------------------------*- C++ -*---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This verifier derives an edge profile file from current path profile
-// information
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "path-profile-verifier"
-
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/PathProfileInfo.h"
-#include "llvm/Analysis/ProfileInfoTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <stdio.h>
-
-using namespace llvm;
-
-namespace {
- class PathProfileVerifier : public ModulePass {
- private:
- bool runOnModule(Module &M);
-
- public:
- static char ID; // Pass identification, replacement for typeid
- PathProfileVerifier() : ModulePass(ID) {
- initializePathProfileVerifierPass(*PassRegistry::getPassRegistry());
- }
-
-
- virtual const char *getPassName() const {
- return "Path Profiler Verifier";
- }
-
- // The verifier requires the path profile and edge profile.
- virtual void getAnalysisUsage(AnalysisUsage& AU) const;
- };
-}
-
-static cl::opt<std::string>
-EdgeProfileFilename("path-profile-verifier-file",
- cl::init("edgefrompath.llvmprof.out"),
- cl::value_desc("filename"),
- cl::desc("Edge profile file generated by -path-profile-verifier"),
- cl::Hidden);
-
-char PathProfileVerifier::ID = 0;
-INITIALIZE_PASS(PathProfileVerifier, "path-profile-verifier",
- "Compare the path profile derived edge profile against the "
- "edge profile.", true, true)
-
-ModulePass *llvm::createPathProfileVerifierPass() {
- return new PathProfileVerifier();
-}
-
-// The verifier requires the path profile and edge profile.
-void PathProfileVerifier::getAnalysisUsage(AnalysisUsage& AU) const {
- AU.addRequired<PathProfileInfo>();
- AU.addPreserved<PathProfileInfo>();
-}
-
-typedef std::map<unsigned, unsigned> DuplicateToIndexMap;
-typedef std::map<BasicBlock*,DuplicateToIndexMap> BlockToDuplicateMap;
-typedef std::map<BasicBlock*,BlockToDuplicateMap> NestedBlockToIndexMap;
-
-// the verifier iterates through each path to gather the total
-// number of edge frequencies
-bool PathProfileVerifier::runOnModule (Module &M) {
- PathProfileInfo& pathProfileInfo = getAnalysis<PathProfileInfo>();
-
- // setup a data structure to map path edges which index an
- // array of edge counters
- NestedBlockToIndexMap arrayMap;
- unsigned i = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
-
- arrayMap[(BasicBlock*)0][F->begin()][0] = i++;
-
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- TerminatorInst *TI = BB->getTerminator();
-
- unsigned duplicate = 0;
- BasicBlock* prev = 0;
- for (unsigned s = 0, e = TI->getNumSuccessors(); s != e;
- prev = TI->getSuccessor(s), ++s) {
- if (prev == TI->getSuccessor(s))
- duplicate++;
- else duplicate = 0;
-
- arrayMap[BB][TI->getSuccessor(s)][duplicate] = i++;
- }
- }
- }
-
- std::vector<unsigned> edgeArray(i);
-
- // iterate through each path and increment the edge counters as needed
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
-
- pathProfileInfo.setCurrentFunction(F);
-
- DEBUG(dbgs() << "function '" << F->getName() << "' ran "
- << pathProfileInfo.pathsRun()
- << "/" << pathProfileInfo.getPotentialPathCount()
- << " potential paths\n");
-
- for( ProfilePathIterator nextPath = pathProfileInfo.pathBegin(),
- endPath = pathProfileInfo.pathEnd();
- nextPath != endPath; nextPath++ ) {
- ProfilePath* currentPath = nextPath->second;
-
- ProfilePathEdgeVector* pev = currentPath->getPathEdges();
- DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": "
- << currentPath->getCount() << "\n");
- // setup the entry edge (normally path profiling doesn't care about this)
- if (currentPath->getFirstBlockInPath() == &F->getEntryBlock())
- edgeArray[arrayMap[(BasicBlock*)0][currentPath->getFirstBlockInPath()][0]]
- += currentPath->getCount();
-
- for( ProfilePathEdgeIterator nextEdge = pev->begin(),
- endEdge = pev->end(); nextEdge != endEdge; nextEdge++ ) {
- if (nextEdge != pev->begin())
- DEBUG(dbgs() << " :: ");
-
- BasicBlock* source = nextEdge->getSource();
- BasicBlock* target = nextEdge->getTarget();
- unsigned duplicateNumber = nextEdge->getDuplicateNumber();
- DEBUG(dbgs() << source->getName() << " --{" << duplicateNumber
- << "}--> " << target->getName());
-
- // Ensure all the referenced edges exist
- // TODO: make this a separate function
- if( !arrayMap.count(source) ) {
- errs() << " error [" << F->getName() << "()]: source '"
- << source->getName()
- << "' does not exist in the array map.\n";
- } else if( !arrayMap[source].count(target) ) {
- errs() << " error [" << F->getName() << "()]: target '"
- << target->getName()
- << "' does not exist in the array map.\n";
- } else if( !arrayMap[source][target].count(duplicateNumber) ) {
- errs() << " error [" << F->getName() << "()]: edge "
- << source->getName() << " -> " << target->getName()
- << " duplicate number " << duplicateNumber
- << " does not exist in the array map.\n";
- } else {
- edgeArray[arrayMap[source][target][duplicateNumber]]
- += currentPath->getCount();
- }
- }
-
- DEBUG(errs() << "\n");
-
- delete pev;
- }
- }
-
- std::string errorInfo;
- std::string filename = EdgeProfileFilename;
-
- // Open a handle to the file
- FILE* edgeFile = fopen(filename.c_str(),"wb");
-
- if (!edgeFile) {
- errs() << "error: unable to open file '" << filename << "' for output.\n";
- return false;
- }
-
- errs() << "Generating edge profile '" << filename << "' ...\n";
-
- // write argument info
- unsigned type = ArgumentInfo;
- unsigned num = pathProfileInfo.argList.size();
- int zeros = 0;
-
- fwrite(&type,sizeof(unsigned),1,edgeFile);
- fwrite(&num,sizeof(unsigned),1,edgeFile);
- fwrite(pathProfileInfo.argList.c_str(),1,num,edgeFile);
- if (num&3)
- fwrite(&zeros, 1, 4-(num&3), edgeFile);
-
- type = EdgeInfo;
- num = edgeArray.size();
- fwrite(&type,sizeof(unsigned),1,edgeFile);
- fwrite(&num,sizeof(unsigned),1,edgeFile);
-
- // write each edge to the file
- for( std::vector<unsigned>::iterator s = edgeArray.begin(),
- e = edgeArray.end(); s != e; s++)
- fwrite(&*s, sizeof (unsigned), 1, edgeFile);
-
- fclose (edgeFile);
-
- return true;
-}
diff --git a/lib/Analysis/ProfileDataLoader.cpp b/lib/Analysis/ProfileDataLoader.cpp
deleted file mode 100644
index 3d0a1e2..0000000
--- a/lib/Analysis/ProfileDataLoader.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-//===- ProfileDataLoader.cpp - Load profile information from disk ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The ProfileDataLoader class is used to load raw profiling data from the dump
-// file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/ProfileDataLoader.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Analysis/ProfileDataTypes.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
-#include <cstdio>
-#include <cstdlib>
-using namespace llvm;
-
-raw_ostream &llvm::operator<<(raw_ostream &O, std::pair<const BasicBlock *,
- const BasicBlock *> E) {
- O << "(";
-
- if (E.first)
- O << E.first->getName();
- else
- O << "0";
-
- O << ",";
-
- if (E.second)
- O << E.second->getName();
- else
- O << "0";
-
- return O << ")";
-}
-
-/// AddCounts - Add 'A' and 'B', accounting for the fact that the value of one
-/// (or both) may not be defined.
-static unsigned AddCounts(unsigned A, unsigned B) {
- // If either value is undefined, use the other.
- // Undefined + undefined = undefined.
- if (A == ProfileDataLoader::Uncounted) return B;
- if (B == ProfileDataLoader::Uncounted) return A;
-
- return A + B;
-}
-
-/// ReadProfilingData - Load 'NumEntries' items of type 'T' from file 'F'
-template <typename T>
-static void ReadProfilingData(const char *ToolName, FILE *F,
- T *Data, size_t NumEntries) {
- // Read in the block of data...
- if (fread(Data, sizeof(T), NumEntries, F) != NumEntries)
- report_fatal_error(Twine(ToolName) + ": Profiling data truncated");
-}
-
-/// ReadProfilingNumEntries - Read how many entries are in this profiling data
-/// packet.
-static unsigned ReadProfilingNumEntries(const char *ToolName, FILE *F,
- bool ShouldByteSwap) {
- unsigned Entry;
- ReadProfilingData<unsigned>(ToolName, F, &Entry, 1);
- return ShouldByteSwap ? ByteSwap_32(Entry) : Entry;
-}
-
-/// ReadProfilingBlock - Read the number of entries in the next profiling data
-/// packet and then accumulate the entries into 'Data'.
-static void ReadProfilingBlock(const char *ToolName, FILE *F,
- bool ShouldByteSwap,
- SmallVectorImpl<unsigned> &Data) {
- // Read the number of entries...
- unsigned NumEntries = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap);
-
- // Read in the data.
- SmallVector<unsigned, 8> TempSpace(NumEntries);
- ReadProfilingData<unsigned>(ToolName, F, TempSpace.data(), NumEntries);
-
- // Make sure we have enough space ...
- if (Data.size() < NumEntries)
- Data.resize(NumEntries, ProfileDataLoader::Uncounted);
-
- // Accumulate the data we just read into the existing data.
- for (unsigned i = 0; i < NumEntries; ++i) {
- unsigned Entry = ShouldByteSwap ? ByteSwap_32(TempSpace[i]) : TempSpace[i];
- Data[i] = AddCounts(Entry, Data[i]);
- }
-}
-
-/// ReadProfilingArgBlock - Read the command line arguments that the progam was
-/// run with when the current profiling data packet(s) were generated.
-static void ReadProfilingArgBlock(const char *ToolName, FILE *F,
- bool ShouldByteSwap,
- SmallVectorImpl<std::string> &CommandLines) {
- // Read the number of bytes ...
- unsigned ArgLength = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap);
-
- // Read in the arguments (if there are any to read). Round up the length to
- // the nearest 4-byte multiple.
- SmallVector<char, 8> Args(ArgLength+4);
- if (ArgLength)
- ReadProfilingData<char>(ToolName, F, Args.data(), (ArgLength+3) & ~3);
-
- // Store the arguments.
- CommandLines.push_back(std::string(&Args[0], &Args[ArgLength]));
-}
-
-const unsigned ProfileDataLoader::Uncounted = ~0U;
-
-/// ProfileDataLoader ctor - Read the specified profiling data file, reporting
-/// a fatal error if the file is invalid or broken.
-ProfileDataLoader::ProfileDataLoader(const char *ToolName,
- const std::string &Filename)
- : Filename(Filename) {
- FILE *F = fopen(Filename.c_str(), "rb");
- if (F == 0)
- report_fatal_error(Twine(ToolName) + ": Error opening '" +
- Filename + "': ");
-
- // Keep reading packets until we run out of them.
- unsigned PacketType;
- while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) {
- // If the low eight bits of the packet are zero, we must be dealing with an
- // endianness mismatch. Byteswap all words read from the profiling
- // information. This can happen when the compiler host and target have
- // different endianness.
- bool ShouldByteSwap = (char)PacketType == 0;
- PacketType = ShouldByteSwap ? ByteSwap_32(PacketType) : PacketType;
-
- switch (PacketType) {
- case ArgumentInfo:
- ReadProfilingArgBlock(ToolName, F, ShouldByteSwap, CommandLines);
- break;
-
- case EdgeInfo:
- ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts);
- break;
-
- default:
- report_fatal_error(std::string(ToolName)
- + ": Unknown profiling packet type");
- break;
- }
- }
-
- fclose(F);
-}
diff --git a/lib/Analysis/ProfileDataLoaderPass.cpp b/lib/Analysis/ProfileDataLoaderPass.cpp
deleted file mode 100644
index 2ee0093..0000000
--- a/lib/Analysis/ProfileDataLoaderPass.cpp
+++ /dev/null
@@ -1,188 +0,0 @@
-//===- ProfileDataLoaderPass.cpp - Set branch weight metadata from prof ---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass loads profiling data from a dump file and sets branch weight
-// metadata.
-//
-// TODO: Replace all "profile-metadata-loader" strings with "profile-loader"
-// once ProfileInfo etc. has been removed.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "profile-metadata-loader"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ProfileDataLoader.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-STATISTIC(NumEdgesRead, "The # of edges read.");
-STATISTIC(NumTermsAnnotated, "The # of terminator instructions annotated.");
-
-static cl::opt<std::string>
-ProfileMetadataFilename("profile-file", cl::init("llvmprof.out"),
- cl::value_desc("filename"),
- cl::desc("Profile file loaded by -profile-metadata-loader"));
-
-namespace {
- /// This pass loads profiling data from a dump file and sets branch weight
- /// metadata.
- class ProfileMetadataLoaderPass : public ModulePass {
- std::string Filename;
- public:
- static char ID; // Class identification, replacement for typeinfo
- explicit ProfileMetadataLoaderPass(const std::string &filename = "")
- : ModulePass(ID), Filename(filename) {
- initializeProfileMetadataLoaderPassPass(*PassRegistry::getPassRegistry());
- if (filename.empty()) Filename = ProfileMetadataFilename;
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- }
-
- virtual const char *getPassName() const {
- return "Profile loader";
- }
-
- virtual void readEdge(unsigned, ProfileData&, ProfileData::Edge,
- ArrayRef<unsigned>);
- virtual unsigned matchEdges(Module&, ProfileData&, ArrayRef<unsigned>);
- virtual void setBranchWeightMetadata(Module&, ProfileData&);
-
- virtual bool runOnModule(Module &M);
- };
-} // End of anonymous namespace
-
-char ProfileMetadataLoaderPass::ID = 0;
-INITIALIZE_PASS_BEGIN(ProfileMetadataLoaderPass, "profile-metadata-loader",
- "Load profile information from llvmprof.out", false, true)
-INITIALIZE_PASS_END(ProfileMetadataLoaderPass, "profile-metadata-loader",
- "Load profile information from llvmprof.out", false, true)
-
-char &llvm::ProfileMetadataLoaderPassID = ProfileMetadataLoaderPass::ID;
-
-/// createProfileMetadataLoaderPass - This function returns a Pass that loads
-/// the profiling information for the module from the specified filename,
-/// making it available to the optimizers.
-ModulePass *llvm::createProfileMetadataLoaderPass() {
- return new ProfileMetadataLoaderPass();
-}
-ModulePass *llvm::createProfileMetadataLoaderPass(const std::string &Filename) {
- return new ProfileMetadataLoaderPass(Filename);
-}
-
-/// readEdge - Take the value from a profile counter and assign it to an edge.
-void ProfileMetadataLoaderPass::readEdge(unsigned ReadCount,
- ProfileData &PB, ProfileData::Edge e,
- ArrayRef<unsigned> Counters) {
- if (ReadCount >= Counters.size()) return;
-
- unsigned weight = Counters[ReadCount];
- assert(weight != ProfileDataLoader::Uncounted);
- PB.addEdgeWeight(e, weight);
-
- DEBUG(dbgs() << "-- Read Edge Counter for " << e
- << " (# "<< (ReadCount) << "): "
- << PB.getEdgeWeight(e) << "\n");
-}
-
-/// matchEdges - Link every profile counter with an edge.
-unsigned ProfileMetadataLoaderPass::matchEdges(Module &M, ProfileData &PB,
- ArrayRef<unsigned> Counters) {
- if (Counters.size() == 0) return 0;
-
- unsigned ReadCount = 0;
-
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- DEBUG(dbgs() << "Loading edges in '" << F->getName() << "'\n");
- readEdge(ReadCount++, PB, PB.getEdge(0, &F->getEntryBlock()), Counters);
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- TerminatorInst *TI = BB->getTerminator();
- for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
- readEdge(ReadCount++, PB, PB.getEdge(BB,TI->getSuccessor(s)),
- Counters);
- }
- }
- }
-
- return ReadCount;
-}
-
-/// setBranchWeightMetadata - Translate the counter values associated with each
-/// edge into branch weights for each conditional branch (a branch with 2 or
-/// more desinations).
-void ProfileMetadataLoaderPass::setBranchWeightMetadata(Module &M,
- ProfileData &PB) {
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- DEBUG(dbgs() << "Setting branch metadata in '" << F->getName() << "'\n");
-
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- TerminatorInst *TI = BB->getTerminator();
- unsigned NumSuccessors = TI->getNumSuccessors();
-
- // If there is only one successor then we can not set a branch
- // probability as the target is certain.
- if (NumSuccessors < 2) continue;
-
- // Load the weights of all edges leading from this terminator.
- DEBUG(dbgs() << "-- Terminator with " << NumSuccessors
- << " successors:\n");
- SmallVector<uint32_t, 4> Weights(NumSuccessors);
- for (unsigned s = 0 ; s < NumSuccessors ; ++s) {
- ProfileData::Edge edge = PB.getEdge(BB, TI->getSuccessor(s));
- Weights[s] = (uint32_t)PB.getEdgeWeight(edge);
- DEBUG(dbgs() << "---- Edge '" << edge << "' has weight "
- << Weights[s] << "\n");
- }
-
- // Set branch weight metadata. This will set branch probabilities of
- // 100%/0% if that is true of the dynamic execution.
- // BranchProbabilityInfo can account for this when it loads this metadata
- // (it gives the unexectuted branch a weight of 1 for the purposes of
- // probability calculations).
- MDBuilder MDB(TI->getContext());
- MDNode *Node = MDB.createBranchWeights(Weights);
- TI->setMetadata(LLVMContext::MD_prof, Node);
- NumTermsAnnotated++;
- }
- }
-}
-
-bool ProfileMetadataLoaderPass::runOnModule(Module &M) {
- ProfileDataLoader PDL("profile-data-loader", Filename);
- ProfileData PB;
-
- ArrayRef<unsigned> Counters = PDL.getRawEdgeCounts();
-
- unsigned ReadCount = matchEdges(M, PB, Counters);
-
- if (ReadCount != Counters.size()) {
- errs() << "WARNING: profile information is inconsistent with "
- << "the current program!\n";
- }
- NumEdgesRead = ReadCount;
-
- setBranchWeightMetadata(M, PB);
-
- return ReadCount > 0;
-}
diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp
deleted file mode 100644
index 365b64c..0000000
--- a/lib/Analysis/ProfileEstimatorPass.cpp
+++ /dev/null
@@ -1,426 +0,0 @@
-//===- ProfileEstimatorPass.cpp - LLVM Pass to estimate profile info ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a concrete implementation of profiling information that
-// estimates the profiling information in a very crude and unimaginative way.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "profile-estimator"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-static cl::opt<double>
-LoopWeight(
- "profile-estimator-loop-weight", cl::init(10),
- cl::value_desc("loop-weight"),
- cl::desc("Number of loop executions used for profile-estimator")
-);
-
-namespace {
- class ProfileEstimatorPass : public FunctionPass, public ProfileInfo {
- double ExecCount;
- LoopInfo *LI;
- std::set<BasicBlock*> BBToVisit;
- std::map<Loop*,double> LoopExitWeights;
- std::map<Edge,double> MinimalWeight;
- public:
- static char ID; // Class identification, replacement for typeinfo
- explicit ProfileEstimatorPass(const double execcount = 0)
- : FunctionPass(ID), ExecCount(execcount) {
- initializeProfileEstimatorPassPass(*PassRegistry::getPassRegistry());
- if (execcount == 0) ExecCount = LoopWeight;
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<LoopInfo>();
- }
-
- virtual const char *getPassName() const {
- return "Profiling information estimator";
- }
-
- /// run - Estimate the profile information from the specified file.
- virtual bool runOnFunction(Function &F);
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
- if (PI == &ProfileInfo::ID)
- return (ProfileInfo*)this;
- return this;
- }
-
- virtual void recurseBasicBlock(BasicBlock *BB);
-
- void inline printEdgeWeight(Edge);
- };
-} // End of anonymous namespace
-
-char ProfileEstimatorPass::ID = 0;
-INITIALIZE_AG_PASS_BEGIN(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
- "Estimate profiling information", false, true, false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_AG_PASS_END(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
- "Estimate profiling information", false, true, false)
-
-namespace llvm {
- char &ProfileEstimatorPassID = ProfileEstimatorPass::ID;
-
- FunctionPass *createProfileEstimatorPass() {
- return new ProfileEstimatorPass();
- }
-
- /// createProfileEstimatorPass - This function returns a Pass that estimates
- /// profiling information using the given loop execution count.
- Pass *createProfileEstimatorPass(const unsigned execcount) {
- return new ProfileEstimatorPass(execcount);
- }
-}
-
-static double ignoreMissing(double w) {
- if (w == ProfileInfo::MissingValue) return 0;
- return w;
-}
-
-static void inline printEdgeError(ProfileInfo::Edge e, const char *M) {
- DEBUG(dbgs() << "-- Edge " << e << " is not calculated, " << M << "\n");
-}
-
-void inline ProfileEstimatorPass::printEdgeWeight(Edge E) {
- DEBUG(dbgs() << "-- Weight of Edge " << E << ":"
- << format("%20.20g", getEdgeWeight(E)) << "\n");
-}
-
-// recurseBasicBlock() - This calculates the ProfileInfo estimation for a
-// single block and then recurses into the successors.
-// The algorithm preserves the flow condition, meaning that the sum of the
-// weight of the incoming edges must be equal the block weight which must in
-// turn be equal to the sume of the weights of the outgoing edges.
-// Since the flow of an block is deterimined from the current state of the
-// flow, once an edge has a flow assigned this flow is never changed again,
-// otherwise it would be possible to violate the flow condition in another
-// block.
-void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
-
- // Break the recursion if this BasicBlock was already visited.
- if (BBToVisit.find(BB) == BBToVisit.end()) return;
-
- // Read the LoopInfo for this block.
- bool BBisHeader = LI->isLoopHeader(BB);
- Loop* BBLoop = LI->getLoopFor(BB);
-
- // To get the block weight, read all incoming edges.
- double BBWeight = 0;
- std::set<BasicBlock*> ProcessedPreds;
- for ( pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- bbi != bbe; ++bbi ) {
- // If this block was not considered already, add weight.
- Edge edge = getEdge(*bbi,BB);
- double w = getEdgeWeight(edge);
- if (ProcessedPreds.insert(*bbi).second) {
- BBWeight += ignoreMissing(w);
- }
- // If this block is a loop header and the predecessor is contained in this
- // loop, thus the edge is a backedge, continue and do not check if the
- // value is valid.
- if (BBisHeader && BBLoop->contains(*bbi)) {
- printEdgeError(edge, "but is backedge, continuing");
- continue;
- }
- // If the edges value is missing (and this is no loop header, and this is
- // no backedge) return, this block is currently non estimatable.
- if (w == MissingValue) {
- printEdgeError(edge, "returning");
- return;
- }
- }
- if (getExecutionCount(BB) != MissingValue) {
- BBWeight = getExecutionCount(BB);
- }
-
- // Fetch all necessary information for current block.
- SmallVector<Edge, 8> ExitEdges;
- SmallVector<Edge, 8> Edges;
- if (BBLoop) {
- BBLoop->getExitEdges(ExitEdges);
- }
-
- // If this is a loop header, consider the following:
- // Exactly the flow that is entering this block, must exit this block too. So
- // do the following:
- // *) get all the exit edges, read the flow that is already leaving this
- // loop, remember the edges that do not have any flow on them right now.
- // (The edges that have already flow on them are most likely exiting edges of
- // other loops, do not touch those flows because the previously caclulated
- // loopheaders would not be exact anymore.)
- // *) In case there is not a single exiting edge left, create one at the loop
- // latch to prevent the flow from building up in the loop.
- // *) Take the flow that is not leaving the loop already and distribute it on
- // the remaining exiting edges.
- // (This ensures that all flow that enters the loop also leaves it.)
- // *) Increase the flow into the loop by increasing the weight of this block.
- // There is at least one incoming backedge that will bring us this flow later
- // on. (So that the flow condition in this node is valid again.)
- if (BBisHeader) {
- double incoming = BBWeight;
- // Subtract the flow leaving the loop.
- std::set<Edge> ProcessedExits;
- for (SmallVectorImpl<Edge>::iterator ei = ExitEdges.begin(),
- ee = ExitEdges.end(); ei != ee; ++ei) {
- if (ProcessedExits.insert(*ei).second) {
- double w = getEdgeWeight(*ei);
- if (w == MissingValue) {
- Edges.push_back(*ei);
- // Check if there is a necessary minimal weight, if yes, subtract it
- // from weight.
- if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
- incoming -= MinimalWeight[*ei];
- DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
- }
- } else {
- incoming -= w;
- }
- }
- }
- // If no exit edges, create one:
- if (Edges.size() == 0) {
- BasicBlock *Latch = BBLoop->getLoopLatch();
- if (Latch) {
- Edge edge = getEdge(Latch,0);
- EdgeInformation[BB->getParent()][edge] = BBWeight;
- printEdgeWeight(edge);
- edge = getEdge(Latch, BB);
- EdgeInformation[BB->getParent()][edge] = BBWeight * ExecCount;
- printEdgeWeight(edge);
- }
- }
-
- // Distribute remaining weight to the exting edges. To prevent fractions
- // from building up and provoking precision problems the weight which is to
- // be distributed is split and the rounded, the last edge gets a somewhat
- // bigger value, but we are close enough for an estimation.
- double fraction = floor(incoming/Edges.size());
- for (SmallVectorImpl<Edge>::iterator ei = Edges.begin(), ee = Edges.end();
- ei != ee; ++ei) {
- double w = 0;
- if (ei != (ee-1)) {
- w = fraction;
- incoming -= fraction;
- } else {
- w = incoming;
- }
- EdgeInformation[BB->getParent()][*ei] += w;
- // Read necessary minimal weight.
- if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
- EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
- DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
- }
- printEdgeWeight(*ei);
-
- // Add minimal weight to paths to all exit edges, this is used to ensure
- // that enough flow is reaching this edges.
- Path p;
- const BasicBlock *Dest = GetPath(BB, (*ei).first, p, GetPathToDest);
- while (Dest != BB) {
- const BasicBlock *Parent = p.find(Dest)->second;
- Edge e = getEdge(Parent, Dest);
- if (MinimalWeight.find(e) == MinimalWeight.end()) {
- MinimalWeight[e] = 0;
- }
- MinimalWeight[e] += w;
- DEBUG(dbgs() << "Minimal Weight for " << e << ": " << format("%.20g",MinimalWeight[e]) << "\n");
- Dest = Parent;
- }
- }
- // Increase flow into the loop.
- BBWeight *= (ExecCount+1);
- }
-
- BlockInformation[BB->getParent()][BB] = BBWeight;
- // Up until now we considered only the loop exiting edges, now we have a
- // definite block weight and must distribute this onto the outgoing edges.
- // Since there may be already flow attached to some of the edges, read this
- // flow first and remember the edges that have still now flow attached.
- Edges.clear();
- std::set<BasicBlock*> ProcessedSuccs;
-
- succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- // Also check for (BB,0) edges that may already contain some flow. (But only
- // in case there are no successors.)
- if (bbi == bbe) {
- Edge edge = getEdge(BB,0);
- EdgeInformation[BB->getParent()][edge] = BBWeight;
- printEdgeWeight(edge);
- }
- for ( ; bbi != bbe; ++bbi ) {
- if (ProcessedSuccs.insert(*bbi).second) {
- Edge edge = getEdge(BB,*bbi);
- double w = getEdgeWeight(edge);
- if (w != MissingValue) {
- BBWeight -= getEdgeWeight(edge);
- } else {
- Edges.push_back(edge);
- // If minimal weight is necessary, reserve weight by subtracting weight
- // from block weight, this is readded later on.
- if (MinimalWeight.find(edge) != MinimalWeight.end()) {
- BBWeight -= MinimalWeight[edge];
- DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[edge]) << " at " << edge << "\n");
- }
- }
- }
- }
-
- double fraction = Edges.size() ? floor(BBWeight/Edges.size()) : 0.0;
- // Finally we know what flow is still not leaving the block, distribute this
- // flow onto the empty edges.
- for (SmallVectorImpl<Edge>::iterator ei = Edges.begin(), ee = Edges.end();
- ei != ee; ++ei) {
- if (ei != (ee-1)) {
- EdgeInformation[BB->getParent()][*ei] += fraction;
- BBWeight -= fraction;
- } else {
- EdgeInformation[BB->getParent()][*ei] += BBWeight;
- }
- // Readd minial necessary weight.
- if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
- EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
- DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
- }
- printEdgeWeight(*ei);
- }
-
- // This block is visited, mark this before the recursion.
- BBToVisit.erase(BB);
-
- // Recurse into successors.
- for (succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- bbi != bbe; ++bbi) {
- recurseBasicBlock(*bbi);
- }
-}
-
-bool ProfileEstimatorPass::runOnFunction(Function &F) {
- if (F.isDeclaration()) return false;
-
- // Fetch LoopInfo and clear ProfileInfo for this function.
- LI = &getAnalysis<LoopInfo>();
- FunctionInformation.erase(&F);
- BlockInformation[&F].clear();
- EdgeInformation[&F].clear();
- BBToVisit.clear();
-
- // Mark all blocks as to visit.
- for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi)
- BBToVisit.insert(bi);
-
- // Clear Minimal Edges.
- MinimalWeight.clear();
-
- DEBUG(dbgs() << "Working on function " << F.getName() << "\n");
-
- // Since the entry block is the first one and has no predecessors, the edge
- // (0,entry) is inserted with the starting weight of 1.
- BasicBlock *entry = &F.getEntryBlock();
- BlockInformation[&F][entry] = pow(2.0, 32.0);
- Edge edge = getEdge(0,entry);
- EdgeInformation[&F][edge] = BlockInformation[&F][entry];
- printEdgeWeight(edge);
-
- // Since recurseBasicBlock() maybe returns with a block which was not fully
- // estimated, use recurseBasicBlock() until everything is calculated.
- bool cleanup = false;
- recurseBasicBlock(entry);
- while (BBToVisit.size() > 0 && !cleanup) {
- // Remember number of open blocks, this is later used to check if progress
- // was made.
- unsigned size = BBToVisit.size();
-
- // Try to calculate all blocks in turn.
- for (std::set<BasicBlock*>::iterator bi = BBToVisit.begin(),
- be = BBToVisit.end(); bi != be; ++bi) {
- recurseBasicBlock(*bi);
- // If at least one block was finished, break because iterator may be
- // invalid.
- if (BBToVisit.size() < size) break;
- }
-
- // If there was not a single block resolved, make some assumptions.
- if (BBToVisit.size() == size) {
- bool found = false;
- for (std::set<BasicBlock*>::iterator BBI = BBToVisit.begin(), BBE = BBToVisit.end();
- (BBI != BBE) && (!found); ++BBI) {
- BasicBlock *BB = *BBI;
- // Try each predecessor if it can be assumend.
- for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- (bbi != bbe) && (!found); ++bbi) {
- Edge e = getEdge(*bbi,BB);
- double w = getEdgeWeight(e);
- // Check that edge from predecessor is still free.
- if (w == MissingValue) {
- // Check if there is a circle from this block to predecessor.
- Path P;
- const BasicBlock *Dest = GetPath(BB, *bbi, P, GetPathToDest);
- if (Dest != *bbi) {
- // If there is no circle, just set edge weight to 0
- EdgeInformation[&F][e] = 0;
- DEBUG(dbgs() << "Assuming edge weight: ");
- printEdgeWeight(e);
- found = true;
- }
- }
- }
- }
- if (!found) {
- cleanup = true;
- DEBUG(dbgs() << "No assumption possible in Fuction "<<F.getName()<<", setting all to zero\n");
- }
- }
- }
- // In case there was no safe way to assume edges, set as a last measure,
- // set _everything_ to zero.
- if (cleanup) {
- FunctionInformation[&F] = 0;
- BlockInformation[&F].clear();
- EdgeInformation[&F].clear();
- for (Function::const_iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
- const BasicBlock *BB = &(*FI);
- BlockInformation[&F][BB] = 0;
- const_pred_iterator predi = pred_begin(BB), prede = pred_end(BB);
- if (predi == prede) {
- Edge e = getEdge(0,BB);
- setEdgeWeight(e,0);
- }
- for (;predi != prede; ++predi) {
- Edge e = getEdge(*predi,BB);
- setEdgeWeight(e,0);
- }
- succ_const_iterator succi = succ_begin(BB), succe = succ_end(BB);
- if (succi == succe) {
- Edge e = getEdge(BB,0);
- setEdgeWeight(e,0);
- }
- for (;succi != succe; ++succi) {
- Edge e = getEdge(*succi,BB);
- setEdgeWeight(e,0);
- }
- }
- }
-
- return false;
-}
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
deleted file mode 100644
index 9626a48..0000000
--- a/lib/Analysis/ProfileInfo.cpp
+++ /dev/null
@@ -1,1079 +0,0 @@
-//===- ProfileInfo.cpp - Profile Info Interface ---------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the abstract ProfileInfo interface, and the default
-// "no profile" implementation.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "profile-info"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include <limits>
-#include <queue>
-#include <set>
-using namespace llvm;
-
-namespace llvm {
- template<> char ProfileInfoT<Function,BasicBlock>::ID = 0;
-}
-
-// Register the ProfileInfo interface, providing a nice name to refer to.
-INITIALIZE_ANALYSIS_GROUP(ProfileInfo, "Profile Information", NoProfileInfo)
-
-namespace llvm {
-
-template <>
-ProfileInfoT<MachineFunction, MachineBasicBlock>::ProfileInfoT() {}
-template <>
-ProfileInfoT<MachineFunction, MachineBasicBlock>::~ProfileInfoT() {}
-
-template <>
-ProfileInfoT<Function, BasicBlock>::ProfileInfoT() {
- MachineProfile = 0;
-}
-template <>
-ProfileInfoT<Function, BasicBlock>::~ProfileInfoT() {
- if (MachineProfile) delete MachineProfile;
-}
-
-template<>
-char ProfileInfoT<MachineFunction, MachineBasicBlock>::ID = 0;
-
-template<>
-const double ProfileInfoT<Function,BasicBlock>::MissingValue = -1;
-
-template<> const
-double ProfileInfoT<MachineFunction, MachineBasicBlock>::MissingValue = -1;
-
-template<> double
-ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) {
- std::map<const Function*, BlockCounts>::iterator J =
- BlockInformation.find(BB->getParent());
- if (J != BlockInformation.end()) {
- BlockCounts::iterator I = J->second.find(BB);
- if (I != J->second.end())
- return I->second;
- }
-
- double Count = MissingValue;
-
- const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
-
- // Are there zero predecessors of this block?
- if (PI == PE) {
- Edge e = getEdge(0, BB);
- Count = getEdgeWeight(e);
- } else {
- // Otherwise, if there are predecessors, the execution count of this block is
- // the sum of the edge frequencies from the incoming edges.
- std::set<const BasicBlock*> ProcessedPreds;
- Count = 0;
- for (; PI != PE; ++PI) {
- const BasicBlock *P = *PI;
- if (ProcessedPreds.insert(P).second) {
- double w = getEdgeWeight(getEdge(P, BB));
- if (w == MissingValue) {
- Count = MissingValue;
- break;
- }
- Count += w;
- }
- }
- }
-
- // If the predecessors did not suffice to get block weight, try successors.
- if (Count == MissingValue) {
-
- succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB);
-
- // Are there zero successors of this block?
- if (SI == SE) {
- Edge e = getEdge(BB,0);
- Count = getEdgeWeight(e);
- } else {
- std::set<const BasicBlock*> ProcessedSuccs;
- Count = 0;
- for (; SI != SE; ++SI)
- if (ProcessedSuccs.insert(*SI).second) {
- double w = getEdgeWeight(getEdge(BB, *SI));
- if (w == MissingValue) {
- Count = MissingValue;
- break;
- }
- Count += w;
- }
- }
- }
-
- if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count;
- return Count;
-}
-
-template<>
-double ProfileInfoT<MachineFunction, MachineBasicBlock>::
- getExecutionCount(const MachineBasicBlock *MBB) {
- std::map<const MachineFunction*, BlockCounts>::iterator J =
- BlockInformation.find(MBB->getParent());
- if (J != BlockInformation.end()) {
- BlockCounts::iterator I = J->second.find(MBB);
- if (I != J->second.end())
- return I->second;
- }
-
- return MissingValue;
-}
-
-template<>
-double ProfileInfoT<Function,BasicBlock>::getExecutionCount(const Function *F) {
- std::map<const Function*, double>::iterator J =
- FunctionInformation.find(F);
- if (J != FunctionInformation.end())
- return J->second;
-
- // isDeclaration() is checked here and not at start of function to allow
- // functions without a body still to have a execution count.
- if (F->isDeclaration()) return MissingValue;
-
- double Count = getExecutionCount(&F->getEntryBlock());
- if (Count != MissingValue) FunctionInformation[F] = Count;
- return Count;
-}
-
-template<>
-double ProfileInfoT<MachineFunction, MachineBasicBlock>::
- getExecutionCount(const MachineFunction *MF) {
- std::map<const MachineFunction*, double>::iterator J =
- FunctionInformation.find(MF);
- if (J != FunctionInformation.end())
- return J->second;
-
- double Count = getExecutionCount(&MF->front());
- if (Count != MissingValue) FunctionInformation[MF] = Count;
- return Count;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::
- setExecutionCount(const BasicBlock *BB, double w) {
- DEBUG(dbgs() << "Creating Block " << BB->getName()
- << " (weight: " << format("%.20g",w) << ")\n");
- BlockInformation[BB->getParent()][BB] = w;
-}
-
-template<>
-void ProfileInfoT<MachineFunction, MachineBasicBlock>::
- setExecutionCount(const MachineBasicBlock *MBB, double w) {
- DEBUG(dbgs() << "Creating Block " << MBB->getBasicBlock()->getName()
- << " (weight: " << format("%.20g",w) << ")\n");
- BlockInformation[MBB->getParent()][MBB] = w;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::addEdgeWeight(Edge e, double w) {
- double oldw = getEdgeWeight(e);
- assert (oldw != MissingValue && "Adding weight to Edge with no previous weight");
- DEBUG(dbgs() << "Adding to Edge " << e
- << " (new weight: " << format("%.20g",oldw + w) << ")\n");
- EdgeInformation[getFunction(e)][e] = oldw + w;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::
- addExecutionCount(const BasicBlock *BB, double w) {
- double oldw = getExecutionCount(BB);
- assert (oldw != MissingValue && "Adding weight to Block with no previous weight");
- DEBUG(dbgs() << "Adding to Block " << BB->getName()
- << " (new weight: " << format("%.20g",oldw + w) << ")\n");
- BlockInformation[BB->getParent()][BB] = oldw + w;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::removeBlock(const BasicBlock *BB) {
- std::map<const Function*, BlockCounts>::iterator J =
- BlockInformation.find(BB->getParent());
- if (J == BlockInformation.end()) return;
-
- DEBUG(dbgs() << "Deleting " << BB->getName() << "\n");
- J->second.erase(BB);
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::removeEdge(Edge e) {
- std::map<const Function*, EdgeWeights>::iterator J =
- EdgeInformation.find(getFunction(e));
- if (J == EdgeInformation.end()) return;
-
- DEBUG(dbgs() << "Deleting" << e << "\n");
- J->second.erase(e);
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::
- replaceEdge(const Edge &oldedge, const Edge &newedge) {
- double w;
- if ((w = getEdgeWeight(newedge)) == MissingValue) {
- w = getEdgeWeight(oldedge);
- DEBUG(dbgs() << "Replacing " << oldedge << " with " << newedge << "\n");
- } else {
- w += getEdgeWeight(oldedge);
- DEBUG(dbgs() << "Adding " << oldedge << " to " << newedge << "\n");
- }
- setEdgeWeight(newedge,w);
- removeEdge(oldedge);
-}
-
-template<>
-const BasicBlock *ProfileInfoT<Function,BasicBlock>::
- GetPath(const BasicBlock *Src, const BasicBlock *Dest,
- Path &P, unsigned Mode) {
- const BasicBlock *BB = 0;
- bool hasFoundPath = false;
-
- std::queue<const BasicBlock *> BFS;
- BFS.push(Src);
-
- while(BFS.size() && !hasFoundPath) {
- BB = BFS.front();
- BFS.pop();
-
- succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
- if (Succ == End) {
- P[(const BasicBlock*)0] = BB;
- if (Mode & GetPathToExit) {
- hasFoundPath = true;
- BB = 0;
- }
- }
- for(;Succ != End; ++Succ) {
- if (P.find(*Succ) != P.end()) continue;
- Edge e = getEdge(BB,*Succ);
- if ((Mode & GetPathWithNewEdges) && (getEdgeWeight(e) != MissingValue)) continue;
- P[*Succ] = BB;
- BFS.push(*Succ);
- if ((Mode & GetPathToDest) && *Succ == Dest) {
- hasFoundPath = true;
- BB = *Succ;
- break;
- }
- if ((Mode & GetPathToValue) && (getExecutionCount(*Succ) != MissingValue)) {
- hasFoundPath = true;
- BB = *Succ;
- break;
- }
- }
- }
-
- return BB;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::
- divertFlow(const Edge &oldedge, const Edge &newedge) {
- DEBUG(dbgs() << "Diverting " << oldedge << " via " << newedge );
-
- // First check if the old edge was taken, if not, just delete it...
- if (getEdgeWeight(oldedge) == 0) {
- removeEdge(oldedge);
- return;
- }
-
- Path P;
- P[newedge.first] = 0;
- P[newedge.second] = newedge.first;
- const BasicBlock *BB = GetPath(newedge.second,oldedge.second,P,GetPathToExit | GetPathToDest);
-
- double w = getEdgeWeight (oldedge);
- DEBUG(dbgs() << ", Weight: " << format("%.20g",w) << "\n");
- do {
- const BasicBlock *Parent = P.find(BB)->second;
- Edge e = getEdge(Parent,BB);
- double oldw = getEdgeWeight(e);
- double oldc = getExecutionCount(e.first);
- setEdgeWeight(e, w+oldw);
- if (Parent != oldedge.first) {
- setExecutionCount(e.first, w+oldc);
- }
- BB = Parent;
- } while (BB != newedge.first);
- removeEdge(oldedge);
-}
-
-/// Replaces all occurrences of RmBB in the ProfilingInfo with DestBB.
-/// This checks all edges of the function the blocks reside in and replaces the
-/// occurrences of RmBB with DestBB.
-template<>
-void ProfileInfoT<Function,BasicBlock>::
- replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) {
- DEBUG(dbgs() << "Replacing " << RmBB->getName()
- << " with " << DestBB->getName() << "\n");
- const Function *F = DestBB->getParent();
- std::map<const Function*, EdgeWeights>::iterator J =
- EdgeInformation.find(F);
- if (J == EdgeInformation.end()) return;
-
- Edge e, newedge;
- bool erasededge = false;
- EdgeWeights::iterator I = J->second.begin(), E = J->second.end();
- while(I != E) {
- e = (I++)->first;
- bool foundedge = false; bool eraseedge = false;
- if (e.first == RmBB) {
- if (e.second == DestBB) {
- eraseedge = true;
- } else {
- newedge = getEdge(DestBB, e.second);
- foundedge = true;
- }
- }
- if (e.second == RmBB) {
- if (e.first == DestBB) {
- eraseedge = true;
- } else {
- newedge = getEdge(e.first, DestBB);
- foundedge = true;
- }
- }
- if (foundedge) {
- replaceEdge(e, newedge);
- }
- if (eraseedge) {
- if (erasededge) {
- Edge newedge = getEdge(DestBB, DestBB);
- replaceEdge(e, newedge);
- } else {
- removeEdge(e);
- erasededge = true;
- }
- }
- }
-}
-
-/// Splits an edge in the ProfileInfo and redirects flow over NewBB.
-/// Since its possible that there is more than one edge in the CFG from FristBB
-/// to SecondBB its necessary to redirect the flow proporionally.
-template<>
-void ProfileInfoT<Function,BasicBlock>::splitEdge(const BasicBlock *FirstBB,
- const BasicBlock *SecondBB,
- const BasicBlock *NewBB,
- bool MergeIdenticalEdges) {
- const Function *F = FirstBB->getParent();
- std::map<const Function*, EdgeWeights>::iterator J =
- EdgeInformation.find(F);
- if (J == EdgeInformation.end()) return;
-
- // Generate edges and read current weight.
- Edge e = getEdge(FirstBB, SecondBB);
- Edge n1 = getEdge(FirstBB, NewBB);
- Edge n2 = getEdge(NewBB, SecondBB);
- EdgeWeights &ECs = J->second;
- double w = ECs[e];
-
- int succ_count = 0;
- if (!MergeIdenticalEdges) {
- // First count the edges from FristBB to SecondBB, if there is more than
- // one, only slice out a proporional part for NewBB.
- for(succ_const_iterator BBI = succ_begin(FirstBB), BBE = succ_end(FirstBB);
- BBI != BBE; ++BBI) {
- if (*BBI == SecondBB) succ_count++;
- }
- // When the NewBB is completely new, increment the count by one so that
- // the counts are properly distributed.
- if (getExecutionCount(NewBB) == ProfileInfo::MissingValue) succ_count++;
- } else {
- // When the edges are merged anyway, then redirect all flow.
- succ_count = 1;
- }
-
- // We know now how many edges there are from FirstBB to SecondBB, reroute a
- // proportional part of the edge weight over NewBB.
- double neww = floor(w / succ_count);
- ECs[n1] += neww;
- ECs[n2] += neww;
- BlockInformation[F][NewBB] += neww;
- if (succ_count == 1) {
- ECs.erase(e);
- } else {
- ECs[e] -= neww;
- }
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *Old,
- const BasicBlock* New) {
- const Function *F = Old->getParent();
- std::map<const Function*, EdgeWeights>::iterator J =
- EdgeInformation.find(F);
- if (J == EdgeInformation.end()) return;
-
- DEBUG(dbgs() << "Splitting " << Old->getName() << " to " << New->getName() << "\n");
-
- std::set<Edge> Edges;
- for (EdgeWeights::iterator ewi = J->second.begin(), ewe = J->second.end();
- ewi != ewe; ++ewi) {
- Edge old = ewi->first;
- if (old.first == Old) {
- Edges.insert(old);
- }
- }
- for (std::set<Edge>::iterator EI = Edges.begin(), EE = Edges.end();
- EI != EE; ++EI) {
- Edge newedge = getEdge(New, EI->second);
- replaceEdge(*EI, newedge);
- }
-
- double w = getExecutionCount(Old);
- setEdgeWeight(getEdge(Old, New), w);
- setExecutionCount(New, w);
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *BB,
- const BasicBlock* NewBB,
- BasicBlock *const *Preds,
- unsigned NumPreds) {
- const Function *F = BB->getParent();
- std::map<const Function*, EdgeWeights>::iterator J =
- EdgeInformation.find(F);
- if (J == EdgeInformation.end()) return;
-
- DEBUG(dbgs() << "Splitting " << NumPreds << " Edges from " << BB->getName()
- << " to " << NewBB->getName() << "\n");
-
- // Collect weight that was redirected over NewBB.
- double newweight = 0;
-
- std::set<const BasicBlock *> ProcessedPreds;
- // For all requestes Predecessors.
- for (unsigned pred = 0; pred < NumPreds; ++pred) {
- const BasicBlock * Pred = Preds[pred];
- if (ProcessedPreds.insert(Pred).second) {
- // Create edges and read old weight.
- Edge oldedge = getEdge(Pred, BB);
- Edge newedge = getEdge(Pred, NewBB);
-
- // Remember how much weight was redirected.
- newweight += getEdgeWeight(oldedge);
-
- replaceEdge(oldedge,newedge);
- }
- }
-
- Edge newedge = getEdge(NewBB,BB);
- setEdgeWeight(newedge, newweight);
- setExecutionCount(NewBB, newweight);
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::transfer(const Function *Old,
- const Function *New) {
- DEBUG(dbgs() << "Replacing Function " << Old->getName() << " with "
- << New->getName() << "\n");
- std::map<const Function*, EdgeWeights>::iterator J =
- EdgeInformation.find(Old);
- if(J != EdgeInformation.end()) {
- EdgeInformation[New] = J->second;
- }
- EdgeInformation.erase(Old);
- BlockInformation.erase(Old);
- FunctionInformation.erase(Old);
-}
-
-static double readEdgeOrRemember(ProfileInfo::Edge edge, double w,
- ProfileInfo::Edge &tocalc, unsigned &uncalc) {
- if (w == ProfileInfo::MissingValue) {
- tocalc = edge;
- uncalc++;
- return 0;
- } else {
- return w;
- }
-}
-
-template<>
-bool ProfileInfoT<Function,BasicBlock>::
- CalculateMissingEdge(const BasicBlock *BB, Edge &removed,
- bool assumeEmptySelf) {
- Edge edgetocalc;
- unsigned uncalculated = 0;
-
- // collect weights of all incoming and outgoing edges, rememer edges that
- // have no value
- double incount = 0;
- SmallSet<const BasicBlock*,8> pred_visited;
- const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- if (bbi==bbe) {
- Edge e = getEdge(0,BB);
- incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
- }
- for (;bbi != bbe; ++bbi) {
- if (pred_visited.insert(*bbi)) {
- Edge e = getEdge(*bbi,BB);
- incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
- }
- }
-
- double outcount = 0;
- SmallSet<const BasicBlock*,8> succ_visited;
- succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
- if (sbbi==sbbe) {
- Edge e = getEdge(BB,0);
- if (getEdgeWeight(e) == MissingValue) {
- double w = getExecutionCount(BB);
- if (w != MissingValue) {
- setEdgeWeight(e,w);
- removed = e;
- }
- }
- outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
- }
- for (;sbbi != sbbe; ++sbbi) {
- if (succ_visited.insert(*sbbi)) {
- Edge e = getEdge(BB,*sbbi);
- outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
- }
- }
-
- // if exactly one edge weight was missing, calculate it and remove it from
- // spanning tree
- if (uncalculated == 0 ) {
- return true;
- } else
- if (uncalculated == 1) {
- if (incount < outcount) {
- EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount;
- } else {
- EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount;
- }
- DEBUG(dbgs() << "--Calc Edge Counter for " << edgetocalc << ": "
- << format("%.20g", getEdgeWeight(edgetocalc)) << "\n");
- removed = edgetocalc;
- return true;
- } else
- if (uncalculated == 2 && assumeEmptySelf && edgetocalc.first == edgetocalc.second && incount == outcount) {
- setEdgeWeight(edgetocalc, incount * 10);
- removed = edgetocalc;
- return true;
- } else {
- return false;
- }
-}
-
-static void readEdge(ProfileInfo *PI, ProfileInfo::Edge e, double &calcw, std::set<ProfileInfo::Edge> &misscount) {
- double w = PI->getEdgeWeight(e);
- if (w != ProfileInfo::MissingValue) {
- calcw += w;
- } else {
- misscount.insert(e);
- }
-}
-
-template<>
-bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *BB) {
- double inWeight = 0;
- std::set<Edge> inMissing;
- std::set<const BasicBlock*> ProcessedPreds;
- const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- if (bbi == bbe) {
- readEdge(this,getEdge(0,BB),inWeight,inMissing);
- }
- for( ; bbi != bbe; ++bbi ) {
- if (ProcessedPreds.insert(*bbi).second) {
- readEdge(this,getEdge(*bbi,BB),inWeight,inMissing);
- }
- }
-
- double outWeight = 0;
- std::set<Edge> outMissing;
- std::set<const BasicBlock*> ProcessedSuccs;
- succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
- if (sbbi == sbbe)
- readEdge(this,getEdge(BB,0),outWeight,outMissing);
- for ( ; sbbi != sbbe; ++sbbi ) {
- if (ProcessedSuccs.insert(*sbbi).second) {
- readEdge(this,getEdge(BB,*sbbi),outWeight,outMissing);
- }
- }
-
- double share;
- std::set<Edge>::iterator ei,ee;
- if (inMissing.size() == 0 && outMissing.size() > 0) {
- ei = outMissing.begin();
- ee = outMissing.end();
- share = inWeight/outMissing.size();
- setExecutionCount(BB,inWeight);
- } else
- if (inMissing.size() > 0 && outMissing.size() == 0 && outWeight == 0) {
- ei = inMissing.begin();
- ee = inMissing.end();
- share = 0;
- setExecutionCount(BB,0);
- } else
- if (inMissing.size() == 0 && outMissing.size() == 0) {
- setExecutionCount(BB,outWeight);
- return true;
- } else {
- return false;
- }
- for ( ; ei != ee; ++ei ) {
- setEdgeWeight(*ei,share);
- }
- return true;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
-// if (getExecutionCount(&(F->getEntryBlock())) == 0) {
-// for (Function::const_iterator FI = F->begin(), FE = F->end();
-// FI != FE; ++FI) {
-// const BasicBlock* BB = &(*FI);
-// {
-// const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
-// if (NBB == End) {
-// setEdgeWeight(getEdge(0,BB),0);
-// }
-// for(;NBB != End; ++NBB) {
-// setEdgeWeight(getEdge(*NBB,BB),0);
-// }
-// }
-// {
-// succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
-// if (NBB == End) {
-// setEdgeWeight(getEdge(0,BB),0);
-// }
-// for(;NBB != End; ++NBB) {
-// setEdgeWeight(getEdge(*NBB,BB),0);
-// }
-// }
-// }
-// return;
-// }
- // The set of BasicBlocks that are still unvisited.
- std::set<const BasicBlock*> Unvisited;
-
- // The set of return edges (Edges with no successors).
- std::set<Edge> ReturnEdges;
- double ReturnWeight = 0;
-
- // First iterate over the whole function and collect:
- // 1) The blocks in this function in the Unvisited set.
- // 2) The return edges in the ReturnEdges set.
- // 3) The flow that is leaving the function already via return edges.
-
- // Data structure for searching the function.
- std::queue<const BasicBlock *> BFS;
- const BasicBlock *BB = &(F->getEntryBlock());
- BFS.push(BB);
- Unvisited.insert(BB);
-
- while (BFS.size()) {
- BB = BFS.front(); BFS.pop();
- succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
- if (NBB == End) {
- Edge e = getEdge(BB,0);
- double w = getEdgeWeight(e);
- if (w == MissingValue) {
- // If the return edge has no value, try to read value from block.
- double bw = getExecutionCount(BB);
- if (bw != MissingValue) {
- setEdgeWeight(e,bw);
- ReturnWeight += bw;
- } else {
- // If both return edge and block provide no value, collect edge.
- ReturnEdges.insert(e);
- }
- } else {
- // If the return edge has a proper value, collect it.
- ReturnWeight += w;
- }
- }
- for (;NBB != End; ++NBB) {
- if (Unvisited.insert(*NBB).second) {
- BFS.push(*NBB);
- }
- }
- }
-
- while (Unvisited.size() > 0) {
- unsigned oldUnvisitedCount = Unvisited.size();
- bool FoundPath = false;
-
- // If there is only one edge left, calculate it.
- if (ReturnEdges.size() == 1) {
- ReturnWeight = getExecutionCount(&(F->getEntryBlock())) - ReturnWeight;
-
- Edge e = *ReturnEdges.begin();
- setEdgeWeight(e,ReturnWeight);
- setExecutionCount(e.first,ReturnWeight);
-
- Unvisited.erase(e.first);
- ReturnEdges.erase(e);
- continue;
- }
-
- // Calculate all blocks where only one edge is missing, this may also
- // resolve furhter return edges.
- std::set<const BasicBlock *>::iterator FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE) {
- const BasicBlock *BB = *FI; ++FI;
- Edge e;
- if(CalculateMissingEdge(BB,e,true)) {
- if (BlockInformation[F].find(BB) == BlockInformation[F].end()) {
- setExecutionCount(BB,getExecutionCount(BB));
- }
- Unvisited.erase(BB);
- if (e.first != 0 && e.second == 0) {
- ReturnEdges.erase(e);
- ReturnWeight += getEdgeWeight(e);
- }
- }
- }
- if (oldUnvisitedCount > Unvisited.size()) continue;
-
- // Estimate edge weights by dividing the flow proportionally.
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE) {
- const BasicBlock *BB = *FI; ++FI;
- const BasicBlock *Dest = 0;
- bool AllEdgesHaveSameReturn = true;
- // Check each Successor, these must all end up in the same or an empty
- // return block otherwise its dangerous to do an estimation on them.
- for (succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
- Succ != End; ++Succ) {
- Path P;
- GetPath(*Succ, 0, P, GetPathToExit);
- if (Dest && Dest != P[(const BasicBlock*)0]) {
- AllEdgesHaveSameReturn = false;
- }
- Dest = P[(const BasicBlock*)0];
- }
- if (AllEdgesHaveSameReturn) {
- if(EstimateMissingEdges(BB)) {
- Unvisited.erase(BB);
- break;
- }
- }
- }
- if (oldUnvisitedCount > Unvisited.size()) continue;
-
- // Check if there is a path to an block that has a known value and redirect
- // flow accordingly.
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE && !FoundPath) {
- // Fetch path.
- const BasicBlock *BB = *FI; ++FI;
- Path P;
- const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToValue);
-
- // Calculate incoming flow.
- double iw = 0; unsigned inmissing = 0; unsigned incount = 0; unsigned invalid = 0;
- std::set<const BasicBlock *> Processed;
- for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
- NBB != End; ++NBB) {
- if (Processed.insert(*NBB).second) {
- Edge e = getEdge(*NBB, BB);
- double ew = getEdgeWeight(e);
- if (ew != MissingValue) {
- iw += ew;
- invalid++;
- } else {
- // If the path contains the successor, this means its a backedge,
- // do not count as missing.
- if (P.find(*NBB) == P.end())
- inmissing++;
- }
- incount++;
- }
- }
- if (inmissing == incount) continue;
- if (invalid == 0) continue;
-
- // Subtract (already) outgoing flow.
- Processed.clear();
- for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
- NBB != End; ++NBB) {
- if (Processed.insert(*NBB).second) {
- Edge e = getEdge(BB, *NBB);
- double ew = getEdgeWeight(e);
- if (ew != MissingValue) {
- iw -= ew;
- }
- }
- }
- if (iw < 0) continue;
-
- // Check the receiving end of the path if it can handle the flow.
- double ow = getExecutionCount(Dest);
- Processed.clear();
- for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
- NBB != End; ++NBB) {
- if (Processed.insert(*NBB).second) {
- Edge e = getEdge(BB, *NBB);
- double ew = getEdgeWeight(e);
- if (ew != MissingValue) {
- ow -= ew;
- }
- }
- }
- if (ow < 0) continue;
-
- // Determine how much flow shall be used.
- double ew = getEdgeWeight(getEdge(P[Dest],Dest));
- if (ew != MissingValue) {
- ew = ew<ow?ew:ow;
- ew = ew<iw?ew:iw;
- } else {
- if (inmissing == 0)
- ew = iw;
- }
-
- // Create flow.
- if (ew != MissingValue) {
- do {
- Edge e = getEdge(P[Dest],Dest);
- if (getEdgeWeight(e) == MissingValue) {
- setEdgeWeight(e,ew);
- FoundPath = true;
- }
- Dest = P[Dest];
- } while (Dest != BB);
- }
- }
- if (FoundPath) continue;
-
- // Calculate a block with self loop.
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE && !FoundPath) {
- const BasicBlock *BB = *FI; ++FI;
- bool SelfEdgeFound = false;
- for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
- NBB != End; ++NBB) {
- if (*NBB == BB) {
- SelfEdgeFound = true;
- break;
- }
- }
- if (SelfEdgeFound) {
- Edge e = getEdge(BB,BB);
- if (getEdgeWeight(e) == MissingValue) {
- double iw = 0;
- std::set<const BasicBlock *> Processed;
- for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
- NBB != End; ++NBB) {
- if (Processed.insert(*NBB).second) {
- Edge e = getEdge(*NBB, BB);
- double ew = getEdgeWeight(e);
- if (ew != MissingValue) {
- iw += ew;
- }
- }
- }
- setEdgeWeight(e,iw * 10);
- FoundPath = true;
- }
- }
- }
- if (FoundPath) continue;
-
- // Determine backedges, set them to zero.
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE && !FoundPath) {
- const BasicBlock *BB = *FI; ++FI;
- const BasicBlock *Dest = 0;
- Path P;
- bool BackEdgeFound = false;
- for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
- NBB != End; ++NBB) {
- Dest = GetPath(BB, *NBB, P, GetPathToDest | GetPathWithNewEdges);
- if (Dest == *NBB) {
- BackEdgeFound = true;
- break;
- }
- }
- if (BackEdgeFound) {
- Edge e = getEdge(Dest,BB);
- double w = getEdgeWeight(e);
- if (w == MissingValue) {
- setEdgeWeight(e,0);
- FoundPath = true;
- }
- do {
- Edge e = getEdge(P[Dest], Dest);
- double w = getEdgeWeight(e);
- if (w == MissingValue) {
- setEdgeWeight(e,0);
- FoundPath = true;
- }
- Dest = P[Dest];
- } while (Dest != BB);
- }
- }
- if (FoundPath) continue;
-
- // Channel flow to return block.
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE && !FoundPath) {
- const BasicBlock *BB = *FI; ++FI;
-
- Path P;
- const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges);
- Dest = P[(const BasicBlock*)0];
- if (!Dest) continue;
-
- if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) {
- // Calculate incoming flow.
- double iw = 0;
- std::set<const BasicBlock *> Processed;
- for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
- NBB != End; ++NBB) {
- if (Processed.insert(*NBB).second) {
- Edge e = getEdge(*NBB, BB);
- double ew = getEdgeWeight(e);
- if (ew != MissingValue) {
- iw += ew;
- }
- }
- }
- do {
- Edge e = getEdge(P[Dest], Dest);
- double w = getEdgeWeight(e);
- if (w == MissingValue) {
- setEdgeWeight(e,iw);
- FoundPath = true;
- } else {
- assert(0 && "Edge should not have value already!");
- }
- Dest = P[Dest];
- } while (Dest != BB);
- }
- }
- if (FoundPath) continue;
-
- // Speculatively set edges to zero.
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE && !FoundPath) {
- const BasicBlock *BB = *FI; ++FI;
-
- for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
- NBB != End; ++NBB) {
- Edge e = getEdge(*NBB,BB);
- double w = getEdgeWeight(e);
- if (w == MissingValue) {
- setEdgeWeight(e,0);
- FoundPath = true;
- break;
- }
- }
- }
- if (FoundPath) continue;
-
- errs() << "{";
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE) {
- const BasicBlock *BB = *FI; ++FI;
- dbgs() << BB->getName();
- if (FI != FE)
- dbgs() << ",";
- }
- errs() << "}";
-
- errs() << "ASSERT: could not repair function";
- assert(0 && "could not repair function");
- }
-
- EdgeWeights J = EdgeInformation[F];
- for (EdgeWeights::iterator EI = J.begin(), EE = J.end(); EI != EE; ++EI) {
- Edge e = EI->first;
-
- bool SuccFound = false;
- if (e.first != 0) {
- succ_const_iterator NBB = succ_begin(e.first), End = succ_end(e.first);
- if (NBB == End) {
- if (0 == e.second) {
- SuccFound = true;
- }
- }
- for (;NBB != End; ++NBB) {
- if (*NBB == e.second) {
- SuccFound = true;
- break;
- }
- }
- if (!SuccFound) {
- removeEdge(e);
- }
- }
- }
-}
-
-raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) {
- return O << MF->getFunction()->getName() << "(MF)";
-}
-
-raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) {
- return O << MBB->getBasicBlock()->getName() << "(MB)";
-}
-
-raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E) {
- O << "(";
-
- if (E.first)
- O << E.first;
- else
- O << "0";
-
- O << ",";
-
- if (E.second)
- O << E.second;
- else
- O << "0";
-
- return O << ")";
-}
-
-} // namespace llvm
-
-//===----------------------------------------------------------------------===//
-// NoProfile ProfileInfo implementation
-//
-
-namespace {
- struct NoProfileInfo : public ImmutablePass, public ProfileInfo {
- static char ID; // Class identification, replacement for typeinfo
- NoProfileInfo() : ImmutablePass(ID) {
- initializeNoProfileInfoPass(*PassRegistry::getPassRegistry());
- }
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
- if (PI == &ProfileInfo::ID)
- return (ProfileInfo*)this;
- return this;
- }
-
- virtual const char *getPassName() const {
- return "NoProfileInfo";
- }
- };
-} // End of anonymous namespace
-
-char NoProfileInfo::ID = 0;
-// Register this pass...
-INITIALIZE_AG_PASS(NoProfileInfo, ProfileInfo, "no-profile",
- "No Profile Information", false, true, true)
-
-ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); }
diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp
deleted file mode 100644
index f1f3e94..0000000
--- a/lib/Analysis/ProfileInfoLoader.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-//===- ProfileInfoLoad.cpp - Load profile information from disk -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The ProfileInfoLoader class is used to load and represent profiling
-// information read in from the dump file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/ProfileInfoLoader.h"
-#include "llvm/Analysis/ProfileInfoTypes.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstdio>
-#include <cstdlib>
-using namespace llvm;
-
-// ByteSwap - Byteswap 'Var' if 'Really' is true.
-//
-static inline unsigned ByteSwap(unsigned Var, bool Really) {
- if (!Really) return Var;
- return ((Var & (255U<< 0U)) << 24U) |
- ((Var & (255U<< 8U)) << 8U) |
- ((Var & (255U<<16U)) >> 8U) |
- ((Var & (255U<<24U)) >> 24U);
-}
-
-static unsigned AddCounts(unsigned A, unsigned B) {
- // If either value is undefined, use the other.
- if (A == ProfileInfoLoader::Uncounted) return B;
- if (B == ProfileInfoLoader::Uncounted) return A;
- return A + B;
-}
-
-static void ReadProfilingBlock(const char *ToolName, FILE *F,
- bool ShouldByteSwap,
- std::vector<unsigned> &Data) {
- // Read the number of entries...
- unsigned NumEntries;
- if (fread(&NumEntries, sizeof(unsigned), 1, F) != 1) {
- errs() << ToolName << ": data packet truncated!\n";
- perror(0);
- exit(1);
- }
- NumEntries = ByteSwap(NumEntries, ShouldByteSwap);
-
- // Read the counts...
- std::vector<unsigned> TempSpace(NumEntries);
-
- // Read in the block of data...
- if (fread(&TempSpace[0], sizeof(unsigned)*NumEntries, 1, F) != 1) {
- errs() << ToolName << ": data packet truncated!\n";
- perror(0);
- exit(1);
- }
-
- // Make sure we have enough space... The space is initialised to -1 to
- // facitiltate the loading of missing values for OptimalEdgeProfiling.
- if (Data.size() < NumEntries)
- Data.resize(NumEntries, ProfileInfoLoader::Uncounted);
-
- // Accumulate the data we just read into the data.
- if (!ShouldByteSwap) {
- for (unsigned i = 0; i != NumEntries; ++i) {
- Data[i] = AddCounts(TempSpace[i], Data[i]);
- }
- } else {
- for (unsigned i = 0; i != NumEntries; ++i) {
- Data[i] = AddCounts(ByteSwap(TempSpace[i], true), Data[i]);
- }
- }
-}
-
-const unsigned ProfileInfoLoader::Uncounted = ~0U;
-
-// ProfileInfoLoader ctor - Read the specified profiling data file, exiting the
-// program if the file is invalid or broken.
-//
-ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
- const std::string &Filename)
- : Filename(Filename) {
- FILE *F = fopen(Filename.c_str(), "rb");
- if (F == 0) {
- errs() << ToolName << ": Error opening '" << Filename << "': ";
- perror(0);
- exit(1);
- }
-
- // Keep reading packets until we run out of them.
- unsigned PacketType;
- while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) {
- // If the low eight bits of the packet are zero, we must be dealing with an
- // endianness mismatch. Byteswap all words read from the profiling
- // information.
- bool ShouldByteSwap = (char)PacketType == 0;
- PacketType = ByteSwap(PacketType, ShouldByteSwap);
-
- switch (PacketType) {
- case ArgumentInfo: {
- unsigned ArgLength;
- if (fread(&ArgLength, sizeof(unsigned), 1, F) != 1) {
- errs() << ToolName << ": arguments packet truncated!\n";
- perror(0);
- exit(1);
- }
- ArgLength = ByteSwap(ArgLength, ShouldByteSwap);
-
- // Read in the arguments...
- std::vector<char> Chars(ArgLength+4);
-
- if (ArgLength)
- if (fread(&Chars[0], (ArgLength+3) & ~3, 1, F) != 1) {
- errs() << ToolName << ": arguments packet truncated!\n";
- perror(0);
- exit(1);
- }
- CommandLines.push_back(std::string(&Chars[0], &Chars[ArgLength]));
- break;
- }
-
- case FunctionInfo:
- ReadProfilingBlock(ToolName, F, ShouldByteSwap, FunctionCounts);
- break;
-
- case BlockInfo:
- ReadProfilingBlock(ToolName, F, ShouldByteSwap, BlockCounts);
- break;
-
- case EdgeInfo:
- ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts);
- break;
-
- case OptEdgeInfo:
- ReadProfilingBlock(ToolName, F, ShouldByteSwap, OptimalEdgeCounts);
- break;
-
- case BBTraceInfo:
- ReadProfilingBlock(ToolName, F, ShouldByteSwap, BBTrace);
- break;
-
- default:
- errs() << ToolName << ": Unknown packet type #" << PacketType << "!\n";
- exit(1);
- }
- }
-
- fclose(F);
-}
-
diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp
deleted file mode 100644
index 346f8d6..0000000
--- a/lib/Analysis/ProfileInfoLoaderPass.cpp
+++ /dev/null
@@ -1,267 +0,0 @@
-//===- ProfileInfoLoaderPass.cpp - LLVM Pass to load profile info ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a concrete implementation of profiling information that
-// loads the information from a profile dump file.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "profile-loader"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Analysis/ProfileInfoLoader.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-#include <set>
-using namespace llvm;
-
-STATISTIC(NumEdgesRead, "The # of edges read.");
-
-static cl::opt<std::string>
-ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"),
- cl::value_desc("filename"),
- cl::desc("Profile file loaded by -profile-loader"));
-
-namespace {
- class LoaderPass : public ModulePass, public ProfileInfo {
- std::string Filename;
- std::set<Edge> SpanningTree;
- std::set<const BasicBlock*> BBisUnvisited;
- unsigned ReadCount;
- public:
- static char ID; // Class identification, replacement for typeinfo
- explicit LoaderPass(const std::string &filename = "")
- : ModulePass(ID), Filename(filename) {
- initializeLoaderPassPass(*PassRegistry::getPassRegistry());
- if (filename.empty()) Filename = ProfileInfoFilename;
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- }
-
- virtual const char *getPassName() const {
- return "Profiling information loader";
- }
-
- // recurseBasicBlock() - Calculates the edge weights for as much basic
- // blocks as possbile.
- virtual void recurseBasicBlock(const BasicBlock *BB);
- virtual void readEdgeOrRemember(Edge, Edge&, unsigned &, double &);
- virtual void readEdge(ProfileInfo::Edge, std::vector<unsigned>&);
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
- if (PI == &ProfileInfo::ID)
- return (ProfileInfo*)this;
- return this;
- }
-
- /// run - Load the profile information from the specified file.
- virtual bool runOnModule(Module &M);
- };
-} // End of anonymous namespace
-
-char LoaderPass::ID = 0;
-INITIALIZE_AG_PASS(LoaderPass, ProfileInfo, "profile-loader",
- "Load profile information from llvmprof.out", false, true, false)
-
-char &llvm::ProfileLoaderPassID = LoaderPass::ID;
-
-ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); }
-
-/// createProfileLoaderPass - This function returns a Pass that loads the
-/// profiling information for the module from the specified filename, making it
-/// available to the optimizers.
-Pass *llvm::createProfileLoaderPass(const std::string &Filename) {
- return new LoaderPass(Filename);
-}
-
-void LoaderPass::readEdgeOrRemember(Edge edge, Edge &tocalc,
- unsigned &uncalc, double &count) {
- double w;
- if ((w = getEdgeWeight(edge)) == MissingValue) {
- tocalc = edge;
- uncalc++;
- } else {
- count+=w;
- }
-}
-
-// recurseBasicBlock - Visits all neighbours of a block and then tries to
-// calculate the missing edge values.
-void LoaderPass::recurseBasicBlock(const BasicBlock *BB) {
-
- // break recursion if already visited
- if (BBisUnvisited.find(BB) == BBisUnvisited.end()) return;
- BBisUnvisited.erase(BB);
- if (!BB) return;
-
- for (succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- bbi != bbe; ++bbi) {
- recurseBasicBlock(*bbi);
- }
- for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- bbi != bbe; ++bbi) {
- recurseBasicBlock(*bbi);
- }
-
- Edge tocalc;
- if (CalculateMissingEdge(BB, tocalc)) {
- SpanningTree.erase(tocalc);
- }
-}
-
-void LoaderPass::readEdge(ProfileInfo::Edge e,
- std::vector<unsigned> &ECs) {
- if (ReadCount < ECs.size()) {
- double weight = ECs[ReadCount++];
- if (weight != ProfileInfoLoader::Uncounted) {
- // Here the data realm changes from the unsigned of the file to the
- // double of the ProfileInfo. This conversion is save because we know
- // that everything thats representable in unsinged is also representable
- // in double.
- EdgeInformation[getFunction(e)][e] += (double)weight;
-
- DEBUG(dbgs() << "--Read Edge Counter for " << e
- << " (# "<< (ReadCount-1) << "): "
- << (unsigned)getEdgeWeight(e) << "\n");
- } else {
- // This happens only if reading optimal profiling information, not when
- // reading regular profiling information.
- SpanningTree.insert(e);
- }
- }
-}
-
-bool LoaderPass::runOnModule(Module &M) {
- ProfileInfoLoader PIL("profile-loader", Filename);
-
- EdgeInformation.clear();
- std::vector<unsigned> Counters = PIL.getRawEdgeCounts();
- if (Counters.size() > 0) {
- ReadCount = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- DEBUG(dbgs() << "Working on " << F->getName() << "\n");
- readEdge(getEdge(0,&F->getEntryBlock()), Counters);
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- TerminatorInst *TI = BB->getTerminator();
- for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
- readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
- }
- }
- }
- if (ReadCount != Counters.size()) {
- errs() << "WARNING: profile information is inconsistent with "
- << "the current program!\n";
- }
- NumEdgesRead = ReadCount;
- }
-
- Counters = PIL.getRawOptimalEdgeCounts();
- if (Counters.size() > 0) {
- ReadCount = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- DEBUG(dbgs() << "Working on " << F->getName() << "\n");
- readEdge(getEdge(0,&F->getEntryBlock()), Counters);
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- TerminatorInst *TI = BB->getTerminator();
- if (TI->getNumSuccessors() == 0) {
- readEdge(getEdge(BB,0), Counters);
- }
- for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
- readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
- }
- }
- while (SpanningTree.size() > 0) {
-
- unsigned size = SpanningTree.size();
-
- BBisUnvisited.clear();
- for (std::set<Edge>::iterator ei = SpanningTree.begin(),
- ee = SpanningTree.end(); ei != ee; ++ei) {
- BBisUnvisited.insert(ei->first);
- BBisUnvisited.insert(ei->second);
- }
- while (BBisUnvisited.size() > 0) {
- recurseBasicBlock(*BBisUnvisited.begin());
- }
-
- if (SpanningTree.size() == size) {
- DEBUG(dbgs()<<"{");
- for (std::set<Edge>::iterator ei = SpanningTree.begin(),
- ee = SpanningTree.end(); ei != ee; ++ei) {
- DEBUG(dbgs()<< *ei <<",");
- }
- assert(0 && "No edge calculated!");
- }
-
- }
- }
- if (ReadCount != Counters.size()) {
- errs() << "WARNING: profile information is inconsistent with "
- << "the current program!\n";
- }
- NumEdgesRead = ReadCount;
- }
-
- BlockInformation.clear();
- Counters = PIL.getRawBlockCounts();
- if (Counters.size() > 0) {
- ReadCount = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- if (ReadCount < Counters.size())
- // Here the data realm changes from the unsigned of the file to the
- // double of the ProfileInfo. This conversion is save because we know
- // that everything thats representable in unsinged is also
- // representable in double.
- BlockInformation[F][BB] = (double)Counters[ReadCount++];
- }
- if (ReadCount != Counters.size()) {
- errs() << "WARNING: profile information is inconsistent with "
- << "the current program!\n";
- }
- }
-
- FunctionInformation.clear();
- Counters = PIL.getRawFunctionCounts();
- if (Counters.size() > 0) {
- ReadCount = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- if (ReadCount < Counters.size())
- // Here the data realm changes from the unsigned of the file to the
- // double of the ProfileInfo. This conversion is save because we know
- // that everything thats representable in unsinged is also
- // representable in double.
- FunctionInformation[F] = (double)Counters[ReadCount++];
- }
- if (ReadCount != Counters.size()) {
- errs() << "WARNING: profile information is inconsistent with "
- << "the current program!\n";
- }
- }
-
- return false;
-}
diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp
deleted file mode 100644
index c8896de..0000000
--- a/lib/Analysis/ProfileVerifierPass.cpp
+++ /dev/null
@@ -1,383 +0,0 @@
-//===- ProfileVerifierPass.cpp - LLVM Pass to estimate profile info -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a pass that checks profiling information for
-// plausibility.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "profile-verifier"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/InstIterator.h"
-#include "llvm/Support/raw_ostream.h"
-#include <set>
-using namespace llvm;
-
-static cl::opt<bool,false>
-ProfileVerifierDisableAssertions("profile-verifier-noassert",
- cl::desc("Disable assertions"));
-
-namespace {
- template<class FType, class BType>
- class ProfileVerifierPassT : public FunctionPass {
-
- struct DetailedBlockInfo {
- const BType *BB;
- double BBWeight;
- double inWeight;
- int inCount;
- double outWeight;
- int outCount;
- };
-
- ProfileInfoT<FType, BType> *PI;
- std::set<const BType*> BBisVisited;
- std::set<const FType*> FisVisited;
- bool DisableAssertions;
-
- // When debugging is enabled, the verifier prints a whole slew of debug
- // information, otherwise its just the assert. These are all the helper
- // functions.
- bool PrintedDebugTree;
- std::set<const BType*> BBisPrinted;
- void debugEntry(DetailedBlockInfo*);
- void printDebugInfo(const BType *BB);
-
- public:
- static char ID; // Class identification, replacement for typeinfo
-
- explicit ProfileVerifierPassT () : FunctionPass(ID) {
- initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
- DisableAssertions = ProfileVerifierDisableAssertions;
- }
- explicit ProfileVerifierPassT (bool da) : FunctionPass(ID),
- DisableAssertions(da) {
- initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<ProfileInfoT<FType, BType> >();
- }
-
- const char *getPassName() const {
- return "Profiling information verifier";
- }
-
- /// run - Verify the profile information.
- bool runOnFunction(FType &F);
- void recurseBasicBlock(const BType*);
-
- bool exitReachable(const FType*);
- double ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge);
- void CheckValue(bool, const char*, DetailedBlockInfo*);
- };
-
- typedef ProfileVerifierPassT<Function, BasicBlock> ProfileVerifierPass;
-
- template<class FType, class BType>
- void ProfileVerifierPassT<FType, BType>::printDebugInfo(const BType *BB) {
-
- if (BBisPrinted.find(BB) != BBisPrinted.end()) return;
-
- double BBWeight = PI->getExecutionCount(BB);
- if (BBWeight == ProfileInfoT<FType, BType>::MissingValue) { BBWeight = 0; }
- double inWeight = 0;
- int inCount = 0;
- std::set<const BType*> ProcessedPreds;
- for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- bbi != bbe; ++bbi ) {
- if (ProcessedPreds.insert(*bbi).second) {
- typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(*bbi,BB);
- double EdgeWeight = PI->getEdgeWeight(E);
- if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
- dbgs() << "calculated in-edge " << E << ": "
- << format("%20.20g",EdgeWeight) << "\n";
- inWeight += EdgeWeight;
- inCount++;
- }
- }
- double outWeight = 0;
- int outCount = 0;
- std::set<const BType*> ProcessedSuccs;
- for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- bbi != bbe; ++bbi ) {
- if (ProcessedSuccs.insert(*bbi).second) {
- typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(BB,*bbi);
- double EdgeWeight = PI->getEdgeWeight(E);
- if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
- dbgs() << "calculated out-edge " << E << ": "
- << format("%20.20g",EdgeWeight) << "\n";
- outWeight += EdgeWeight;
- outCount++;
- }
- }
- dbgs() << "Block " << BB->getName() << " in "
- << BB->getParent()->getName() << ":"
- << "BBWeight=" << format("%20.20g",BBWeight) << ","
- << "inWeight=" << format("%20.20g",inWeight) << ","
- << "inCount=" << inCount << ","
- << "outWeight=" << format("%20.20g",outWeight) << ","
- << "outCount" << outCount << "\n";
-
- // mark as visited and recurse into subnodes
- BBisPrinted.insert(BB);
- for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- bbi != bbe; ++bbi ) {
- printDebugInfo(*bbi);
- }
- }
-
- template<class FType, class BType>
- void ProfileVerifierPassT<FType, BType>::debugEntry (DetailedBlockInfo *DI) {
- dbgs() << "TROUBLE: Block " << DI->BB->getName() << " in "
- << DI->BB->getParent()->getName() << ":"
- << "BBWeight=" << format("%20.20g",DI->BBWeight) << ","
- << "inWeight=" << format("%20.20g",DI->inWeight) << ","
- << "inCount=" << DI->inCount << ","
- << "outWeight=" << format("%20.20g",DI->outWeight) << ","
- << "outCount=" << DI->outCount << "\n";
- if (!PrintedDebugTree) {
- PrintedDebugTree = true;
- printDebugInfo(&(DI->BB->getParent()->getEntryBlock()));
- }
- }
-
- // This compares A and B for equality.
- static bool Equals(double A, double B) {
- return A == B;
- }
-
- // This checks if the function "exit" is reachable from an given function
- // via calls, this is necessary to check if a profile is valid despite the
- // counts not fitting exactly.
- template<class FType, class BType>
- bool ProfileVerifierPassT<FType, BType>::exitReachable(const FType *F) {
- if (!F) return false;
-
- if (FisVisited.count(F)) return false;
-
- FType *Exit = F->getParent()->getFunction("exit");
- if (Exit == F) {
- return true;
- }
-
- FisVisited.insert(F);
- bool exits = false;
- for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
- if (const CallInst *CI = dyn_cast<CallInst>(&*I)) {
- FType *F = CI->getCalledFunction();
- if (F) {
- exits |= exitReachable(F);
- } else {
- // This is a call to a pointer, all bets are off...
- exits = true;
- }
- if (exits) break;
- }
- }
- return exits;
- }
-
- #define ASSERTMESSAGE(M) \
- { dbgs() << "ASSERT:" << (M) << "\n"; \
- if (!DisableAssertions) assert(0 && (M)); }
-
- template<class FType, class BType>
- double ProfileVerifierPassT<FType, BType>::ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge E) {
- double EdgeWeight = PI->getEdgeWeight(E);
- if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) {
- dbgs() << "Edge " << E << " in Function "
- << ProfileInfoT<FType, BType>::getFunction(E)->getName() << ": ";
- ASSERTMESSAGE("Edge has missing value");
- return 0;
- } else {
- if (EdgeWeight < 0) {
- dbgs() << "Edge " << E << " in Function "
- << ProfileInfoT<FType, BType>::getFunction(E)->getName() << ": ";
- ASSERTMESSAGE("Edge has negative value");
- }
- return EdgeWeight;
- }
- }
-
- template<class FType, class BType>
- void ProfileVerifierPassT<FType, BType>::CheckValue(bool Error,
- const char *Message,
- DetailedBlockInfo *DI) {
- if (Error) {
- DEBUG(debugEntry(DI));
- dbgs() << "Block " << DI->BB->getName() << " in Function "
- << DI->BB->getParent()->getName() << ": ";
- ASSERTMESSAGE(Message);
- }
- return;
- }
-
- // This calculates the Information for a block and then recurses into the
- // successors.
- template<class FType, class BType>
- void ProfileVerifierPassT<FType, BType>::recurseBasicBlock(const BType *BB) {
-
- // Break the recursion by remembering all visited blocks.
- if (BBisVisited.find(BB) != BBisVisited.end()) return;
-
- // Use a data structure to store all the information, this can then be handed
- // to debug printers.
- DetailedBlockInfo DI;
- DI.BB = BB;
- DI.outCount = DI.inCount = 0;
- DI.inWeight = DI.outWeight = 0;
-
- // Read predecessors.
- std::set<const BType*> ProcessedPreds;
- const_pred_iterator bpi = pred_begin(BB), bpe = pred_end(BB);
- // If there are none, check for (0,BB) edge.
- if (bpi == bpe) {
- DI.inWeight += ReadOrAssert(PI->getEdge(0,BB));
- DI.inCount++;
- }
- for (;bpi != bpe; ++bpi) {
- if (ProcessedPreds.insert(*bpi).second) {
- DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB));
- DI.inCount++;
- }
- }
-
- // Read successors.
- std::set<const BType*> ProcessedSuccs;
- succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- // If there is an (0,BB) edge, consider it too. (This is done not only when
- // there are no successors, but every time; not every function contains
- // return blocks with no successors (think loop latch as return block)).
- double w = PI->getEdgeWeight(PI->getEdge(BB,0));
- if (w != ProfileInfoT<FType, BType>::MissingValue) {
- DI.outWeight += w;
- DI.outCount++;
- }
- for (;bbi != bbe; ++bbi) {
- if (ProcessedSuccs.insert(*bbi).second) {
- DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi));
- DI.outCount++;
- }
- }
-
- // Read block weight.
- DI.BBWeight = PI->getExecutionCount(BB);
- CheckValue(DI.BBWeight == ProfileInfoT<FType, BType>::MissingValue,
- "BasicBlock has missing value", &DI);
- CheckValue(DI.BBWeight < 0,
- "BasicBlock has negative value", &DI);
-
- // Check if this block is a setjmp target.
- bool isSetJmpTarget = false;
- if (DI.outWeight > DI.inWeight) {
- for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
- i != ie; ++i) {
- if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
- FType *F = CI->getCalledFunction();
- if (F && (F->getName() == "_setjmp")) {
- isSetJmpTarget = true; break;
- }
- }
- }
- }
- // Check if this block is eventually reaching exit.
- bool isExitReachable = false;
- if (DI.inWeight > DI.outWeight) {
- for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
- i != ie; ++i) {
- if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
- FType *F = CI->getCalledFunction();
- if (F) {
- FisVisited.clear();
- isExitReachable |= exitReachable(F);
- } else {
- // This is a call to a pointer, all bets are off...
- isExitReachable = true;
- }
- if (isExitReachable) break;
- }
- }
- }
-
- if (DI.inCount > 0 && DI.outCount == 0) {
- // If this is a block with no successors.
- if (!isSetJmpTarget) {
- CheckValue(!Equals(DI.inWeight,DI.BBWeight),
- "inWeight and BBWeight do not match", &DI);
- }
- } else if (DI.inCount == 0 && DI.outCount > 0) {
- // If this is a block with no predecessors.
- if (!isExitReachable)
- CheckValue(!Equals(DI.BBWeight,DI.outWeight),
- "BBWeight and outWeight do not match", &DI);
- } else {
- // If this block has successors and predecessors.
- if (DI.inWeight > DI.outWeight && !isExitReachable)
- CheckValue(!Equals(DI.inWeight,DI.outWeight),
- "inWeight and outWeight do not match", &DI);
- if (DI.inWeight < DI.outWeight && !isSetJmpTarget)
- CheckValue(!Equals(DI.inWeight,DI.outWeight),
- "inWeight and outWeight do not match", &DI);
- }
-
-
- // Mark this block as visited, rescurse into successors.
- BBisVisited.insert(BB);
- for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- bbi != bbe; ++bbi ) {
- recurseBasicBlock(*bbi);
- }
- }
-
- template<class FType, class BType>
- bool ProfileVerifierPassT<FType, BType>::runOnFunction(FType &F) {
- PI = getAnalysisIfAvailable<ProfileInfoT<FType, BType> >();
- if (!PI)
- ASSERTMESSAGE("No ProfileInfo available");
-
- // Prepare global variables.
- PrintedDebugTree = false;
- BBisVisited.clear();
-
- // Fetch entry block and recurse into it.
- const BType *entry = &F.getEntryBlock();
- recurseBasicBlock(entry);
-
- if (PI->getExecutionCount(&F) != PI->getExecutionCount(entry))
- ASSERTMESSAGE("Function count and entry block count do not match");
-
- return false;
- }
-
- template<class FType, class BType>
- char ProfileVerifierPassT<FType, BType>::ID = 0;
-}
-
-INITIALIZE_PASS_BEGIN(ProfileVerifierPass, "profile-verifier",
- "Verify profiling information", false, true)
-INITIALIZE_AG_DEPENDENCY(ProfileInfo)
-INITIALIZE_PASS_END(ProfileVerifierPass, "profile-verifier",
- "Verify profiling information", false, true)
-
-namespace llvm {
- FunctionPass *createProfileVerifierPass() {
- return new ProfileVerifierPass(ProfileVerifierDisableAssertions);
- }
-}
-
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 8577025..5635688 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -9,6 +9,7 @@
// Detects single entry single exit regions in the control flow graph.
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "region"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
@@ -17,12 +18,9 @@
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-
-#define DEBUG_TYPE "region"
#include "llvm/Support/Debug.h"
-
-#include <set>
#include <algorithm>
+#include <set>
using namespace llvm;
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index f5d095b..0a02f4e 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -2590,55 +2590,39 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
}
-const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) {
+const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
// If we have DataLayout, we can bypass creating a target-independent
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
if (TD)
- return getConstant(TD->getIntPtrType(getContext()),
- TD->getTypeAllocSize(AllocTy));
+ return getConstant(IntTy, TD->getTypeAllocSize(AllocTy));
Constant *C = ConstantExpr::getSizeOf(AllocTy);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+ assert(Ty == IntTy && "Effective SCEV type doesn't match");
return getTruncateOrZeroExtend(getSCEV(C), Ty);
}
-const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) {
- Constant *C = ConstantExpr::getAlignOf(AllocTy);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
- C = Folded;
- Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
- return getTruncateOrZeroExtend(getSCEV(C), Ty);
-}
-
-const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy,
+const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
+ StructType *STy,
unsigned FieldNo) {
// If we have DataLayout, we can bypass creating a target-independent
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
- if (TD)
- return getConstant(TD->getIntPtrType(getContext()),
+ if (TD) {
+ return getConstant(IntTy,
TD->getStructLayout(STy)->getElementOffset(FieldNo));
+ }
Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
- Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
- return getTruncateOrZeroExtend(getSCEV(C), Ty);
-}
-const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy,
- Constant *FieldNo) {
- Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
- C = Folded;
- Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
+ Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
}
@@ -2703,12 +2687,15 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
assert(isSCEVable(Ty) && "Type is not SCEVable!");
- if (Ty->isIntegerTy())
+ if (Ty->isIntegerTy()) {
return Ty;
+ }
// The only other support type is pointer.
assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
- if (TD) return TD->getIntPtrType(getContext());
+
+ if (TD)
+ return TD->getIntPtrType(Ty);
// Without DataLayout, conservatively assume pointers are 64-bit.
return Type::getInt64Ty(getContext());
@@ -3101,15 +3088,26 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
Flags = setFlags(Flags, SCEV::FlagNUW);
if (OBO->hasNoSignedWrap())
Flags = setFlags(Flags, SCEV::FlagNSW);
- } else if (const GEPOperator *GEP =
- dyn_cast<GEPOperator>(BEValueV)) {
+ } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
// If the increment is an inbounds GEP, then we know the address
// space cannot be wrapped around. We cannot make any guarantee
// about signed or unsigned overflow because pointers are
// unsigned but we may have a negative index from the base
- // pointer.
- if (GEP->isInBounds())
+ // pointer. We can guarantee that no unsigned wrap occurs if the
+ // indices form a positive value.
+ if (GEP->isInBounds()) {
Flags = setFlags(Flags, SCEV::FlagNW);
+
+ const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
+ if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
+ Flags = setFlags(Flags, SCEV::FlagNUW);
+ }
+ } else if (const SubOperator *OBO =
+ dyn_cast<SubOperator>(BEValueV)) {
+ if (OBO->hasNoUnsignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNUW);
+ if (OBO->hasNoSignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNSW);
}
const SCEV *StartVal = getSCEV(StartValueV);
@@ -3177,18 +3175,18 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
/// operations. This allows them to be analyzed by regular SCEV code.
///
const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
+ Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
+ Value *Base = GEP->getOperand(0);
+ // Don't attempt to analyze GEPs over unsized objects.
+ if (!Base->getType()->getPointerElementType()->isSized())
+ return getUnknown(GEP);
// Don't blindly transfer the inbounds flag from the GEP instruction to the
// Add expression, because the Instruction may be guarded by control flow
// and the no-overflow bits may not be valid for the expression in any
// context.
- bool isInBounds = GEP->isInBounds();
+ SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
- Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
- Value *Base = GEP->getOperand(0);
- // Don't attempt to analyze GEPs over unsized objects.
- if (!cast<PointerType>(Base->getType())->getElementType()->isSized())
- return getUnknown(GEP);
const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
gep_type_iterator GTI = gep_type_begin(GEP);
for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()),
@@ -3199,21 +3197,19 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
// For a struct, add the member offset.
unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
- const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo);
+ const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
// Add the field offset to the running total offset.
TotalOffset = getAddExpr(TotalOffset, FieldOffset);
} else {
// For an array, add the element offset, explicitly scaled.
- const SCEV *ElementSize = getSizeOfExpr(*GTI);
+ const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, *GTI);
const SCEV *IndexS = getSCEV(Index);
// Getelementptr indices are signed.
IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
// Multiply the index by the element size to compute the element offset.
- const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize,
- isInBounds ? SCEV::FlagNSW :
- SCEV::FlagAnyWrap);
+ const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, Wrap);
// Add the element offset to the running total offset.
TotalOffset = getAddExpr(TotalOffset, LocalOffset);
@@ -3224,8 +3220,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
const SCEV *BaseS = getSCEV(Base);
// Add the total offset from all the GEP indices to the base.
- return getAddExpr(BaseS, TotalOffset,
- isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap);
+ return getAddExpr(BaseS, TotalOffset, Wrap);
}
/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
@@ -4616,25 +4611,17 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
if (EL.hasAnyInfo()) return EL;
break;
}
- case ICmpInst::ICMP_SLT: {
- ExitLimit EL = HowManyLessThans(LHS, RHS, L, true, IsSubExpr);
- if (EL.hasAnyInfo()) return EL;
- break;
- }
- case ICmpInst::ICMP_SGT: {
- ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
- getNotSCEV(RHS), L, true, IsSubExpr);
- if (EL.hasAnyInfo()) return EL;
- break;
- }
- case ICmpInst::ICMP_ULT: {
- ExitLimit EL = HowManyLessThans(LHS, RHS, L, false, IsSubExpr);
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_ULT: { // while (X < Y)
+ bool IsSigned = Cond == ICmpInst::ICMP_SLT;
+ ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, IsSubExpr);
if (EL.hasAnyInfo()) return EL;
break;
}
- case ICmpInst::ICMP_UGT: {
- ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
- getNotSCEV(RHS), L, false, IsSubExpr);
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_UGT: { // while (X > Y)
+ bool IsSigned = Cond == ICmpInst::ICMP_SGT;
+ ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, IsSubExpr);
if (EL.hasAnyInfo()) return EL;
break;
}
@@ -5072,15 +5059,21 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
/// original value V is returned.
const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
// Check to see if we've folded this expression at this loop before.
- std::map<const Loop *, const SCEV *> &Values = ValuesAtScopes[V];
- std::pair<std::map<const Loop *, const SCEV *>::iterator, bool> Pair =
- Values.insert(std::make_pair(L, static_cast<const SCEV *>(0)));
- if (!Pair.second)
- return Pair.first->second ? Pair.first->second : V;
-
+ SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = ValuesAtScopes[V];
+ for (unsigned u = 0; u < Values.size(); u++) {
+ if (Values[u].first == L)
+ return Values[u].second ? Values[u].second : V;
+ }
+ Values.push_back(std::make_pair(L, static_cast<const SCEV *>(0)));
// Otherwise compute it.
const SCEV *C = computeSCEVAtScope(V, L);
- ValuesAtScopes[V][L] = C;
+ SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V];
+ for (unsigned u = Values2.size(); u > 0; u--) {
+ if (Values2[u - 1].first == L) {
+ Values2[u - 1].second = C;
+ break;
+ }
+ }
return C;
}
@@ -5119,18 +5112,23 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
case scAddExpr: {
const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
- if (C->getType()->isPointerTy())
- C = ConstantExpr::getBitCast(C, Type::getInt8PtrTy(C->getContext()));
+ if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
+ unsigned AS = PTy->getAddressSpace();
+ Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
+ C = ConstantExpr::getBitCast(C, DestPtrTy);
+ }
for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
if (!C2) return 0;
// First pointer!
if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
+ unsigned AS = C2->getType()->getPointerAddressSpace();
std::swap(C, C2);
+ Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
// The offsets have been converted to bytes. We can add bytes to an
// i8* by GEP with the byte count in the first index.
- C = ConstantExpr::getBitCast(C,Type::getInt8PtrTy(C->getContext()));
+ C = ConstantExpr::getBitCast(C, DestPtrTy);
}
// Don't bother trying to sum two pointers. We probably can't
@@ -5138,8 +5136,8 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
if (C2->getType()->isPointerTy())
return 0;
- if (C->getType()->isPointerTy()) {
- if (cast<PointerType>(C->getType())->getElementType()->isStructTy())
+ if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
+ if (PTy->getElementType()->isStructTy())
C2 = ConstantExpr::getIntegerCast(
C2, Type::getInt32Ty(C->getContext()), true);
C = ConstantExpr::getGetElementPtr(C, C2);
@@ -6336,45 +6334,72 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
return false;
}
-/// getBECount - Subtract the end and start values and divide by the step,
-/// rounding up, to get the number of times the backedge is executed. Return
-/// CouldNotCompute if an intermediate computation overflows.
-const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
- const SCEV *End,
- const SCEV *Step,
- bool NoWrap) {
- assert(!isKnownNegative(Step) &&
- "This code doesn't handle negative strides yet!");
-
- Type *Ty = Start->getType();
-
- // When Start == End, we have an exact BECount == 0. Short-circuit this case
- // here because SCEV may not be able to determine that the unsigned division
- // after rounding is zero.
- if (Start == End)
- return getConstant(Ty, 0);
-
- const SCEV *NegOne = getConstant(Ty, (uint64_t)-1);
- const SCEV *Diff = getMinusSCEV(End, Start);
- const SCEV *RoundUp = getAddExpr(Step, NegOne);
-
- // Add an adjustment to the difference between End and Start so that
- // the division will effectively round up.
- const SCEV *Add = getAddExpr(Diff, RoundUp);
-
- if (!NoWrap) {
- // Check Add for unsigned overflow.
- // TODO: More sophisticated things could be done here.
- Type *WideTy = IntegerType::get(getContext(),
- getTypeSizeInBits(Ty) + 1);
- const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy);
- const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy);
- const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp);
- if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd)
- return getCouldNotCompute();
+// Verify if an linear IV with positive stride can overflow when in a
+// less-than comparison, knowing the invariant term of the comparison, the
+// stride and the knowledge of NSW/NUW flags on the recurrence.
+bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
+ bool IsSigned, bool NoWrap) {
+ if (NoWrap) return false;
+
+ unsigned BitWidth = getTypeSizeInBits(RHS->getType());
+ const SCEV *One = getConstant(Stride->getType(), 1);
+
+ if (IsSigned) {
+ APInt MaxRHS = getSignedRange(RHS).getSignedMax();
+ APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
+ APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
+ .getSignedMax();
+
+ // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow!
+ return (MaxValue - MaxStrideMinusOne).slt(MaxRHS);
+ }
+
+ APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax();
+ APInt MaxValue = APInt::getMaxValue(BitWidth);
+ APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
+ .getUnsignedMax();
+
+ // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow!
+ return (MaxValue - MaxStrideMinusOne).ult(MaxRHS);
+}
+
+// Verify if an linear IV with negative stride can overflow when in a
+// greater-than comparison, knowing the invariant term of the comparison,
+// the stride and the knowledge of NSW/NUW flags on the recurrence.
+bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
+ bool IsSigned, bool NoWrap) {
+ if (NoWrap) return false;
+
+ unsigned BitWidth = getTypeSizeInBits(RHS->getType());
+ const SCEV *One = getConstant(Stride->getType(), 1);
+
+ if (IsSigned) {
+ APInt MinRHS = getSignedRange(RHS).getSignedMin();
+ APInt MinValue = APInt::getSignedMinValue(BitWidth);
+ APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
+ .getSignedMax();
+
+ // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow!
+ return (MinValue + MaxStrideMinusOne).sgt(MinRHS);
}
- return getUDivExpr(Add, Step);
+ APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin();
+ APInt MinValue = APInt::getMinValue(BitWidth);
+ APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
+ .getUnsignedMax();
+
+ // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow!
+ return (MinValue + MaxStrideMinusOne).ugt(MinRHS);
+}
+
+// Compute the backedge taken count knowing the interval difference, the
+// stride and presence of the equality in the comparison.
+const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
+ bool Equality) {
+ const SCEV *One = getConstant(Step->getType(), 1);
+ Delta = Equality ? getAddExpr(Delta, Step)
+ : getAddExpr(Delta, getMinusSCEV(Step, One));
+ return getUDivExpr(Delta, Step);
}
/// HowManyLessThans - Return the number of times a backedge containing the
@@ -6386,119 +6411,144 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
/// a subexpression that cannot overflow before evaluating true.
ScalarEvolution::ExitLimit
ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
- const Loop *L, bool isSigned,
+ const Loop *L, bool IsSigned,
bool IsSubExpr) {
- // Only handle: "ADDREC < LoopInvariant".
- if (!isLoopInvariant(RHS, L)) return getCouldNotCompute();
+ // We handle only IV < Invariant
+ if (!isLoopInvariant(RHS, L))
+ return getCouldNotCompute();
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS);
- if (!AddRec || AddRec->getLoop() != L)
+ const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
+
+ // Avoid weird loops
+ if (!IV || IV->getLoop() != L || !IV->isAffine())
return getCouldNotCompute();
- // Check to see if we have a flag which makes analysis easy.
- bool NoWrap = false;
- if (!IsSubExpr) {
- NoWrap = AddRec->getNoWrapFlags(
- (SCEV::NoWrapFlags)(((isSigned ? SCEV::FlagNSW : SCEV::FlagNUW))
- | SCEV::FlagNW));
- }
- if (AddRec->isAffine()) {
- unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
- const SCEV *Step = AddRec->getStepRecurrence(*this);
+ bool NoWrap = !IsSubExpr &&
+ IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
- if (Step->isZero())
- return getCouldNotCompute();
- if (Step->isOne()) {
- // With unit stride, the iteration never steps past the limit value.
- } else if (isKnownPositive(Step)) {
- // Test whether a positive iteration can step past the limit
- // value and past the maximum value for its type in a single step.
- // Note that it's not sufficient to check NoWrap here, because even
- // though the value after a wrap is undefined, it's not undefined
- // behavior, so if wrap does occur, the loop could either terminate or
- // loop infinitely, but in either case, the loop is guaranteed to
- // iterate at least until the iteration where the wrapping occurs.
- const SCEV *One = getConstant(Step->getType(), 1);
- if (isSigned) {
- APInt Max = APInt::getSignedMaxValue(BitWidth);
- if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax())
- .slt(getSignedRange(RHS).getSignedMax()))
- return getCouldNotCompute();
- } else {
- APInt Max = APInt::getMaxValue(BitWidth);
- if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax())
- .ult(getUnsignedRange(RHS).getUnsignedMax()))
- return getCouldNotCompute();
- }
- } else
- // TODO: Handle negative strides here and below.
- return getCouldNotCompute();
+ const SCEV *Stride = IV->getStepRecurrence(*this);
- // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant
- // m. So, we count the number of iterations in which {n,+,s} < m is true.
- // Note that we cannot simply return max(m-n,0)/s because it's not safe to
- // treat m-n as signed nor unsigned due to overflow possibility.
-
- // First, we get the value of the LHS in the first iteration: n
- const SCEV *Start = AddRec->getOperand(0);
-
- // Determine the minimum constant start value.
- const SCEV *MinStart = getConstant(isSigned ?
- getSignedRange(Start).getSignedMin() :
- getUnsignedRange(Start).getUnsignedMin());
-
- // If we know that the condition is true in order to enter the loop,
- // then we know that it will run exactly (m-n)/s times. Otherwise, we
- // only know that it will execute (max(m,n)-n)/s times. In both cases,
- // the division must round up.
- const SCEV *End = RHS;
- if (!isLoopEntryGuardedByCond(L,
- isSigned ? ICmpInst::ICMP_SLT :
- ICmpInst::ICMP_ULT,
- getMinusSCEV(Start, Step), RHS))
- End = isSigned ? getSMaxExpr(RHS, Start)
- : getUMaxExpr(RHS, Start);
-
- // Determine the maximum constant end value.
- const SCEV *MaxEnd = getConstant(isSigned ?
- getSignedRange(End).getSignedMax() :
- getUnsignedRange(End).getUnsignedMax());
-
- // If MaxEnd is within a step of the maximum integer value in its type,
- // adjust it down to the minimum value which would produce the same effect.
- // This allows the subsequent ceiling division of (N+(step-1))/step to
- // compute the correct value.
- const SCEV *StepMinusOne = getMinusSCEV(Step,
- getConstant(Step->getType(), 1));
- MaxEnd = isSigned ?
- getSMinExpr(MaxEnd,
- getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)),
- StepMinusOne)) :
- getUMinExpr(MaxEnd,
- getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)),
- StepMinusOne));
-
- // Finally, we subtract these two values and divide, rounding up, to get
- // the number of times the backedge is executed.
- const SCEV *BECount = getBECount(Start, End, Step, NoWrap);
-
- // The maximum backedge count is similar, except using the minimum start
- // value and the maximum end value.
- // If we already have an exact constant BECount, use it instead.
- const SCEV *MaxBECount = isa<SCEVConstant>(BECount) ? BECount
- : getBECount(MinStart, MaxEnd, Step, NoWrap);
-
- // If the stride is nonconstant, and NoWrap == true, then
- // getBECount(MinStart, MaxEnd) may not compute. This would result in an
- // exact BECount and invalid MaxBECount, which should be avoided to catch
- // more optimization opportunities.
- if (isa<SCEVCouldNotCompute>(MaxBECount))
- MaxBECount = BECount;
-
- return ExitLimit(BECount, MaxBECount);
- }
+ // Avoid negative or zero stride values
+ if (!isKnownPositive(Stride))
+ return getCouldNotCompute();
- return getCouldNotCompute();
+ // Avoid proven overflow cases: this will ensure that the backedge taken count
+ // will not generate any unsigned overflow. Relaxed no-overflow conditions
+ // exploit NoWrapFlags, allowing to optimize in presence of undefined
+ // behaviors like the case of C language.
+ if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
+ return getCouldNotCompute();
+
+ ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT
+ : ICmpInst::ICMP_ULT;
+ const SCEV *Start = IV->getStart();
+ const SCEV *End = RHS;
+ if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS))
+ End = IsSigned ? getSMaxExpr(RHS, Start)
+ : getUMaxExpr(RHS, Start);
+
+ const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
+
+ APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
+ : getUnsignedRange(Start).getUnsignedMin();
+
+ APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
+ : getUnsignedRange(Stride).getUnsignedMin();
+
+ unsigned BitWidth = getTypeSizeInBits(LHS->getType());
+ APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1)
+ : APInt::getMaxValue(BitWidth) - (MinStride - 1);
+
+ // Although End can be a MAX expression we estimate MaxEnd considering only
+ // the case End = RHS. This is safe because in the other case (End - Start)
+ // is zero, leading to a zero maximum backedge taken count.
+ APInt MaxEnd =
+ IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
+ : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
+
+ const SCEV *MaxBECount = getCouldNotCompute();
+ if (isa<SCEVConstant>(BECount))
+ MaxBECount = BECount;
+ else
+ MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
+ getConstant(MinStride), false);
+
+ if (isa<SCEVCouldNotCompute>(MaxBECount))
+ MaxBECount = BECount;
+
+ return ExitLimit(BECount, MaxBECount);
+}
+
+ScalarEvolution::ExitLimit
+ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
+ const Loop *L, bool IsSigned,
+ bool IsSubExpr) {
+ // We handle only IV > Invariant
+ if (!isLoopInvariant(RHS, L))
+ return getCouldNotCompute();
+
+ const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
+
+ // Avoid weird loops
+ if (!IV || IV->getLoop() != L || !IV->isAffine())
+ return getCouldNotCompute();
+
+ bool NoWrap = !IsSubExpr &&
+ IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
+
+ const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this));
+
+ // Avoid negative or zero stride values
+ if (!isKnownPositive(Stride))
+ return getCouldNotCompute();
+
+ // Avoid proven overflow cases: this will ensure that the backedge taken count
+ // will not generate any unsigned overflow. Relaxed no-overflow conditions
+ // exploit NoWrapFlags, allowing to optimize in presence of undefined
+ // behaviors like the case of C language.
+ if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap))
+ return getCouldNotCompute();
+
+ ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT
+ : ICmpInst::ICMP_UGT;
+
+ const SCEV *Start = IV->getStart();
+ const SCEV *End = RHS;
+ if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS))
+ End = IsSigned ? getSMinExpr(RHS, Start)
+ : getUMinExpr(RHS, Start);
+
+ const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false);
+
+ APInt MaxStart = IsSigned ? getSignedRange(Start).getSignedMax()
+ : getUnsignedRange(Start).getUnsignedMax();
+
+ APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
+ : getUnsignedRange(Stride).getUnsignedMin();
+
+ unsigned BitWidth = getTypeSizeInBits(LHS->getType());
+ APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1)
+ : APInt::getMinValue(BitWidth) + (MinStride - 1);
+
+ // Although End can be a MIN expression we estimate MinEnd considering only
+ // the case End = RHS. This is safe because in the other case (Start - End)
+ // is zero, leading to a zero maximum backedge taken count.
+ APInt MinEnd =
+ IsSigned ? APIntOps::smax(getSignedRange(RHS).getSignedMin(), Limit)
+ : APIntOps::umax(getUnsignedRange(RHS).getUnsignedMin(), Limit);
+
+
+ const SCEV *MaxBECount = getCouldNotCompute();
+ if (isa<SCEVConstant>(BECount))
+ MaxBECount = BECount;
+ else
+ MaxBECount = computeBECount(getConstant(MaxStart - MinEnd),
+ getConstant(MinStride), false);
+
+ if (isa<SCEVCouldNotCompute>(MaxBECount))
+ MaxBECount = BECount;
+
+ return ExitLimit(BECount, MaxBECount);
}
/// getNumIterationsInRange - Return the number of iterations of this loop that
@@ -6627,7 +6677,534 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
return SE.getCouldNotCompute();
}
+static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
+ APInt A = C1->getValue()->getValue().abs();
+ APInt B = C2->getValue()->getValue().abs();
+ uint32_t ABW = A.getBitWidth();
+ uint32_t BBW = B.getBitWidth();
+
+ if (ABW > BBW)
+ B = B.zext(ABW);
+ else if (ABW < BBW)
+ A = A.zext(BBW);
+
+ return APIntOps::GreatestCommonDivisor(A, B);
+}
+
+static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) {
+ APInt A = C1->getValue()->getValue();
+ APInt B = C2->getValue()->getValue();
+ uint32_t ABW = A.getBitWidth();
+ uint32_t BBW = B.getBitWidth();
+
+ if (ABW > BBW)
+ B = B.sext(ABW);
+ else if (ABW < BBW)
+ A = A.sext(BBW);
+
+ return APIntOps::srem(A, B);
+}
+
+static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) {
+ APInt A = C1->getValue()->getValue();
+ APInt B = C2->getValue()->getValue();
+ uint32_t ABW = A.getBitWidth();
+ uint32_t BBW = B.getBitWidth();
+
+ if (ABW > BBW)
+ B = B.sext(ABW);
+ else if (ABW < BBW)
+ A = A.sext(BBW);
+
+ return APIntOps::sdiv(A, B);
+}
+
+namespace {
+struct SCEVGCD : public SCEVVisitor<SCEVGCD, const SCEV *> {
+public:
+ // Pattern match Step into Start. When Step is a multiply expression, find
+ // the largest subexpression of Step that appears in Start. When Start is an
+ // add expression, try to match Step in the subexpressions of Start, non
+ // matching subexpressions are returned under Remainder.
+ static const SCEV *findGCD(ScalarEvolution &SE, const SCEV *Start,
+ const SCEV *Step, const SCEV **Remainder) {
+ assert(Remainder && "Remainder should not be NULL");
+ SCEVGCD R(SE, Step, SE.getConstant(Step->getType(), 0));
+ const SCEV *Res = R.visit(Start);
+ *Remainder = R.Remainder;
+ return Res;
+ }
+
+ SCEVGCD(ScalarEvolution &S, const SCEV *G, const SCEV *R)
+ : SE(S), GCD(G), Remainder(R) {
+ Zero = SE.getConstant(GCD->getType(), 0);
+ One = SE.getConstant(GCD->getType(), 1);
+ }
+
+ const SCEV *visitConstant(const SCEVConstant *Constant) {
+ if (GCD == Constant || Constant == Zero)
+ return GCD;
+
+ if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD)) {
+ const SCEV *Res = SE.getConstant(gcd(Constant, CGCD));
+ if (Res != One)
+ return Res;
+
+ Remainder = SE.getConstant(srem(Constant, CGCD));
+ Constant = cast<SCEVConstant>(SE.getMinusSCEV(Constant, Remainder));
+ Res = SE.getConstant(gcd(Constant, CGCD));
+ return Res;
+ }
+
+ // When GCD is not a constant, it could be that the GCD is an Add, Mul,
+ // AddRec, etc., in which case we want to find out how many times the
+ // Constant divides the GCD: we then return that as the new GCD.
+ const SCEV *Rem = Zero;
+ const SCEV *Res = findGCD(SE, GCD, Constant, &Rem);
+
+ if (Res == One || Rem != Zero) {
+ Remainder = Constant;
+ return One;
+ }
+
+ assert(isa<SCEVConstant>(Res) && "Res should be a constant");
+ Remainder = SE.getConstant(srem(Constant, cast<SCEVConstant>(Res)));
+ return Res;
+ }
+
+ const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
+ if (GCD == Expr)
+ return GCD;
+
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+ const SCEV *Rem = Zero;
+ const SCEV *Res = findGCD(SE, Expr->getOperand(e - 1 - i), GCD, &Rem);
+
+ // FIXME: There may be ambiguous situations: for instance,
+ // GCD(-4 + (3 * %m), 2 * %m) where 2 divides -4 and %m divides (3 * %m).
+ // The order in which the AddExpr is traversed computes a different GCD
+ // and Remainder.
+ if (Res != One)
+ GCD = Res;
+ if (Rem != Zero)
+ Remainder = SE.getAddExpr(Remainder, Rem);
+ }
+
+ return GCD;
+ }
+
+ const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
+ if (GCD == Expr)
+ return GCD;
+
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+ if (Expr->getOperand(i) == GCD)
+ return GCD;
+ }
+
+ // If we have not returned yet, it means that GCD is not part of Expr.
+ const SCEV *PartialGCD = One;
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+ const SCEV *Rem = Zero;
+ const SCEV *Res = findGCD(SE, Expr->getOperand(i), GCD, &Rem);
+ if (Rem != Zero)
+ // GCD does not divide Expr->getOperand(i).
+ continue;
+
+ if (Res == GCD)
+ return GCD;
+ PartialGCD = SE.getMulExpr(PartialGCD, Res);
+ if (PartialGCD == GCD)
+ return GCD;
+ }
+
+ if (PartialGCD != One)
+ return PartialGCD;
+
+ Remainder = Expr;
+ const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(GCD);
+ if (!Mul)
+ return PartialGCD;
+
+ // When the GCD is a multiply expression, try to decompose it:
+ // this occurs when Step does not divide the Start expression
+ // as in: {(-4 + (3 * %m)),+,(2 * %m)}
+ for (int i = 0, e = Mul->getNumOperands(); i < e; ++i) {
+ const SCEV *Rem = Zero;
+ const SCEV *Res = findGCD(SE, Expr, Mul->getOperand(i), &Rem);
+ if (Rem == Zero) {
+ Remainder = Rem;
+ return Res;
+ }
+ }
+
+ return PartialGCD;
+ }
+
+ const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+ if (GCD == Expr)
+ return GCD;
+
+ if (!Expr->isAffine()) {
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *Rem = Zero;
+ const SCEV *Res = findGCD(SE, Expr->getOperand(0), GCD, &Rem);
+ if (Rem != Zero)
+ Remainder = SE.getAddExpr(Remainder, Rem);
+
+ Rem = Zero;
+ Res = findGCD(SE, Expr->getOperand(1), Res, &Rem);
+ if (Rem != Zero) {
+ Remainder = Expr;
+ return GCD;
+ }
+
+ return Res;
+ }
+
+ const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
+ return One;
+ }
+
+private:
+ ScalarEvolution &SE;
+ const SCEV *GCD, *Remainder, *Zero, *One;
+};
+
+struct SCEVDivision : public SCEVVisitor<SCEVDivision, const SCEV *> {
+public:
+ // Remove from Start all multiples of Step.
+ static const SCEV *divide(ScalarEvolution &SE, const SCEV *Start,
+ const SCEV *Step) {
+ SCEVDivision D(SE, Step);
+ const SCEV *Rem = D.Zero;
+ (void)Rem;
+ // The division is guaranteed to succeed: Step should divide Start with no
+ // remainder.
+ assert(Step == SCEVGCD::findGCD(SE, Start, Step, &Rem) && Rem == D.Zero &&
+ "Step should divide Start with no remainder.");
+ return D.visit(Start);
+ }
+
+ SCEVDivision(ScalarEvolution &S, const SCEV *G) : SE(S), GCD(G) {
+ Zero = SE.getConstant(GCD->getType(), 0);
+ One = SE.getConstant(GCD->getType(), 1);
+ }
+
+ const SCEV *visitConstant(const SCEVConstant *Constant) {
+ if (GCD == Constant)
+ return One;
+
+ if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD))
+ return SE.getConstant(sdiv(Constant, CGCD));
+ return Constant;
+ }
+
+ const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+
+ SmallVector<const SCEV *, 2> Operands;
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+ Operands.push_back(divide(SE, Expr->getOperand(i), GCD));
+
+ if (Operands.size() == 1)
+ return Operands[0];
+ return SE.getAddExpr(Operands);
+ }
+
+ const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+
+ bool FoundGCDTerm = false;
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+ if (Expr->getOperand(i) == GCD)
+ FoundGCDTerm = true;
+
+ SmallVector<const SCEV *, 2> Operands;
+ if (FoundGCDTerm) {
+ FoundGCDTerm = false;
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+ if (FoundGCDTerm)
+ Operands.push_back(Expr->getOperand(i));
+ else if (Expr->getOperand(i) == GCD)
+ FoundGCDTerm = true;
+ else
+ Operands.push_back(Expr->getOperand(i));
+ }
+ } else {
+ FoundGCDTerm = false;
+ const SCEV *PartialGCD = One;
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+ if (PartialGCD == GCD) {
+ Operands.push_back(Expr->getOperand(i));
+ continue;
+ }
+
+ const SCEV *Rem = Zero;
+ const SCEV *Res = SCEVGCD::findGCD(SE, Expr->getOperand(i), GCD, &Rem);
+ if (Rem == Zero) {
+ PartialGCD = SE.getMulExpr(PartialGCD, Res);
+ Operands.push_back(divide(SE, Expr->getOperand(i), GCD));
+ } else {
+ Operands.push_back(Expr->getOperand(i));
+ }
+ }
+ }
+
+ if (Operands.size() == 1)
+ return Operands[0];
+ return SE.getMulExpr(Operands);
+ }
+
+ const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+
+ assert(Expr->isAffine() && "Expr should be affine");
+
+ const SCEV *Start = divide(SE, Expr->getStart(), GCD);
+ const SCEV *Step = divide(SE, Expr->getStepRecurrence(SE), GCD);
+
+ return SE.getAddRecExpr(Start, Step, Expr->getLoop(),
+ Expr->getNoWrapFlags());
+ }
+
+ const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
+ return Expr;
+ }
+
+private:
+ ScalarEvolution &SE;
+ const SCEV *GCD, *Zero, *One;
+};
+}
+
+/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
+/// sizes of an array access. Returns the remainder of the delinearization that
+/// is the offset start of the array. The SCEV->delinearize algorithm computes
+/// the multiples of SCEV coefficients: that is a pattern matching of sub
+/// expressions in the stride and base of a SCEV corresponding to the
+/// computation of a GCD (greatest common divisor) of base and stride. When
+/// SCEV->delinearize fails, it returns the SCEV unchanged.
+///
+/// For example: when analyzing the memory access A[i][j][k] in this loop nest
+///
+/// void foo(long n, long m, long o, double A[n][m][o]) {
+///
+/// for (long i = 0; i < n; i++)
+/// for (long j = 0; j < m; j++)
+/// for (long k = 0; k < o; k++)
+/// A[i][j][k] = 1.0;
+/// }
+///
+/// the delinearization input is the following AddRec SCEV:
+///
+/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+///
+/// From this SCEV, we are able to say that the base offset of the access is %A
+/// because it appears as an offset that does not divide any of the strides in
+/// the loops:
+///
+/// CHECK: Base offset: %A
+///
+/// and then SCEV->delinearize determines the size of some of the dimensions of
+/// the array as these are the multiples by which the strides are happening:
+///
+/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
+///
+/// Note that the outermost dimension remains of UnknownSize because there are
+/// no strides that would help identifying the size of the last dimension: when
+/// the array has been statically allocated, one could compute the size of that
+/// dimension by dividing the overall size of the array by the size of the known
+/// dimensions: %m * %o * 8.
+///
+/// Finally delinearize provides the access functions for the array reference
+/// that does correspond to A[i][j][k] of the above C testcase:
+///
+/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
+///
+/// The testcases are checking the output of a function pass:
+/// DelinearizationPass that walks through all loads and stores of a function
+/// asking for the SCEV of the memory access with respect to all enclosing
+/// loops, calling SCEV->delinearize on that and printing the results.
+const SCEV *
+SCEVAddRecExpr::delinearize(ScalarEvolution &SE,
+ SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<const SCEV *> &Sizes) const {
+ // Early exit in case this SCEV is not an affine multivariate function.
+ if (!this->isAffine())
+ return this;
+
+ const SCEV *Start = this->getStart();
+ const SCEV *Step = this->getStepRecurrence(SE);
+
+ // Build the SCEV representation of the cannonical induction variable in the
+ // loop of this SCEV.
+ const SCEV *Zero = SE.getConstant(this->getType(), 0);
+ const SCEV *One = SE.getConstant(this->getType(), 1);
+ const SCEV *IV =
+ SE.getAddRecExpr(Zero, One, this->getLoop(), this->getNoWrapFlags());
+
+ DEBUG(dbgs() << "(delinearize: " << *this << "\n");
+
+ // Currently we fail to delinearize when the stride of this SCEV is 1. We
+ // could decide to not fail in this case: we could just return 1 for the size
+ // of the subscript, and this same SCEV for the access function.
+ if (Step == One) {
+ DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n");
+ return this;
+ }
+
+ // Find the GCD and Remainder of the Start and Step coefficients of this SCEV.
+ const SCEV *Remainder = NULL;
+ const SCEV *GCD = SCEVGCD::findGCD(SE, Start, Step, &Remainder);
+
+ DEBUG(dbgs() << "GCD: " << *GCD << "\n");
+ DEBUG(dbgs() << "Remainder: " << *Remainder << "\n");
+
+ // Same remark as above: we currently fail the delinearization, although we
+ // can very well handle this special case.
+ if (GCD == One) {
+ DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n");
+ return this;
+ }
+
+ // As findGCD computed Remainder, GCD divides "Start - Remainder." The
+ // Quotient is then this SCEV without Remainder, scaled down by the GCD. The
+ // Quotient is what will be used in the next subscript delinearization.
+ const SCEV *Quotient =
+ SCEVDivision::divide(SE, SE.getMinusSCEV(Start, Remainder), GCD);
+ DEBUG(dbgs() << "Quotient: " << *Quotient << "\n");
+
+ const SCEV *Rem;
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Quotient))
+ // Recursively call delinearize on the Quotient until there are no more
+ // multiples that can be recognized.
+ Rem = AR->delinearize(SE, Subscripts, Sizes);
+ else
+ Rem = Quotient;
+
+ // Scale up the cannonical induction variable IV by whatever remains from the
+ // Step after division by the GCD: the GCD is the size of all the sub-array.
+ if (Step != GCD) {
+ Step = SCEVDivision::divide(SE, Step, GCD);
+ IV = SE.getMulExpr(IV, Step);
+ }
+ // The access function in the current subscript is computed as the cannonical
+ // induction variable IV (potentially scaled up by the step) and offset by
+ // Rem, the offset of delinearization in the sub-array.
+ const SCEV *Index = SE.getAddExpr(IV, Rem);
+
+ // Record the access function and the size of the current subscript.
+ Subscripts.push_back(Index);
+ Sizes.push_back(GCD);
+
+#ifndef NDEBUG
+ int Size = Sizes.size();
+ DEBUG(dbgs() << "succeeded to delinearize " << *this << "\n");
+ DEBUG(dbgs() << "ArrayDecl[UnknownSize]");
+ for (int i = 0; i < Size - 1; i++)
+ DEBUG(dbgs() << "[" << *Sizes[i] << "]");
+ DEBUG(dbgs() << " with elements of " << *Sizes[Size - 1] << " bytes.\n");
+
+ DEBUG(dbgs() << "ArrayRef");
+ for (int i = 0; i < Size; i++)
+ DEBUG(dbgs() << "[" << *Subscripts[i] << "]");
+ DEBUG(dbgs() << "\n)\n");
+#endif
+
+ return Remainder;
+}
//===----------------------------------------------------------------------===//
// SCEVCallbackVH Class Implementation
@@ -6683,7 +7260,7 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
//===----------------------------------------------------------------------===//
ScalarEvolution::ScalarEvolution()
- : FunctionPass(ID), FirstUnknown(0) {
+ : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64), FirstUnknown(0) {
initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
}
@@ -6821,14 +7398,21 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
ScalarEvolution::LoopDisposition
ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
- std::map<const Loop *, LoopDisposition> &Values = LoopDispositions[S];
- std::pair<std::map<const Loop *, LoopDisposition>::iterator, bool> Pair =
- Values.insert(std::make_pair(L, LoopVariant));
- if (!Pair.second)
- return Pair.first->second;
-
+ SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values = LoopDispositions[S];
+ for (unsigned u = 0; u < Values.size(); u++) {
+ if (Values[u].first == L)
+ return Values[u].second;
+ }
+ Values.push_back(std::make_pair(L, LoopVariant));
LoopDisposition D = computeLoopDisposition(S, L);
- return LoopDispositions[S][L] = D;
+ SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values2 = LoopDispositions[S];
+ for (unsigned u = Values2.size(); u > 0; u--) {
+ if (Values2[u - 1].first == L) {
+ Values2[u - 1].second = D;
+ break;
+ }
+ }
+ return D;
}
ScalarEvolution::LoopDisposition
@@ -6920,14 +7504,21 @@ bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
ScalarEvolution::BlockDisposition
ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
- std::map<const BasicBlock *, BlockDisposition> &Values = BlockDispositions[S];
- std::pair<std::map<const BasicBlock *, BlockDisposition>::iterator, bool>
- Pair = Values.insert(std::make_pair(BB, DoesNotDominateBlock));
- if (!Pair.second)
- return Pair.first->second;
-
+ SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values = BlockDispositions[S];
+ for (unsigned u = 0; u < Values.size(); u++) {
+ if (Values[u].first == BB)
+ return Values[u].second;
+ }
+ Values.push_back(std::make_pair(BB, DoesNotDominateBlock));
BlockDisposition D = computeBlockDisposition(S, BB);
- return BlockDispositions[S][BB] = D;
+ SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values2 = BlockDispositions[S];
+ for (unsigned u = Values2.size(); u > 0; u--) {
+ if (Values2[u - 1].first == BB) {
+ Values2[u - 1].second = D;
+ break;
+ }
+ }
+ return D;
}
ScalarEvolution::BlockDisposition
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index c434b40..86a557b 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -176,8 +177,8 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
}
// Save the original insertion point so we can restore it when we're done.
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
+ BuilderType::InsertPointGuard Guard(Builder);
// Move the insertion point out of as many loops as we can.
while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
@@ -191,13 +192,9 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
// If we haven't found this binop, insert it.
Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS));
- BO->setDebugLoc(SaveInsertPt->getDebugLoc());
+ BO->setDebugLoc(Loc);
rememberInstruction(BO);
- // Restore the original insert point.
- if (SaveInsertBB)
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
-
return BO;
}
@@ -406,6 +403,10 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
// without the other.
SplitAddRecs(Ops, Ty, SE);
+ Type *IntPtrTy = SE.TD
+ ? SE.TD->getIntPtrType(PTy)
+ : Type::getInt64Ty(PTy->getContext());
+
// Descend down the pointer's type and attempt to convert the other
// operands into GEP indices, at each level. The first index in a GEP
// indexes into the array implied by the pointer operand; the rest of
@@ -416,7 +417,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
// array indexing.
SmallVector<const SCEV *, 8> ScaledOps;
if (ElTy->isSized()) {
- const SCEV *ElSize = SE.getSizeOfExpr(ElTy);
+ const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy);
if (!ElSize->isZero()) {
SmallVector<const SCEV *, 8> NewOps;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
@@ -548,8 +549,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
}
// Save the original insertion point so we can restore it when we're done.
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ BuilderType::InsertPointGuard Guard(Builder);
// Move the insertion point out of as many loops as we can.
while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
@@ -565,16 +565,11 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
rememberInstruction(GEP);
- // Restore the original insert point.
- if (SaveInsertBB)
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
-
return GEP;
}
// Save the original insertion point so we can restore it when we're done.
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ BuilderType::InsertPoint SaveInsertPt = Builder.saveIP();
// Move the insertion point out of as many loops as we can.
while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
@@ -610,8 +605,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
rememberInstruction(GEP);
// Restore the original insert point.
- if (SaveInsertBB)
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+ Builder.restoreIP(SaveInsertPt);
return expand(SE.getAddExpr(Ops));
}
@@ -1076,8 +1070,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
}
// Save the original insertion point so we can restore it when we're done.
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ BuilderType::InsertPointGuard Guard(Builder);
// Another AddRec may need to be recursively expanded below. For example, if
// this AddRec is quadratic, the StepV may itself be an AddRec in this
@@ -1144,10 +1137,6 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
PN->addIncoming(IncV, Pred);
}
- // Restore the original insert point.
- if (SaveInsertBB)
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
-
// After expanding subexpressions, restore the PostIncLoops set so the caller
// can ensure that IVIncrement dominates the current uses.
PostIncLoops = SavedPostIncLoops;
@@ -1232,19 +1221,19 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
!ExpandTy->isPointerTy() && Step->isNonConstantNegative();
if (useSubtract)
Step = SE.getNegativeSCEV(Step);
- // Expand the step somewhere that dominates the loop header.
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
- Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
- // Restore the insertion point to the place where the caller has
- // determined dominates all uses.
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+ Value *StepV;
+ {
+ // Expand the step somewhere that dominates the loop header.
+ BuilderType::InsertPointGuard Guard(Builder);
+ StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+ }
Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
}
}
// Re-apply any non-loop-dominating scale.
if (PostLoopScale) {
+ assert(S->isAffine() && "Can't linearly scale non-affine recurrences.");
Result = InsertNoopCastOfTo(Result, IntTy);
Result = Builder.CreateMul(Result,
expandCodeFor(PostLoopScale, IntTy));
@@ -1289,16 +1278,14 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType());
Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
S->getNoWrapFlags(SCEV::FlagNW)));
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
BasicBlock::iterator NewInsertPt =
llvm::next(BasicBlock::iterator(cast<Instruction>(V)));
+ BuilderType::InsertPointGuard Guard(Builder);
while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) ||
isa<LandingPadInst>(NewInsertPt))
++NewInsertPt;
V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
NewInsertPt);
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
return V;
}
@@ -1342,9 +1329,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
Header->begin());
rememberInstruction(CanonicalIV);
+ SmallSet<BasicBlock *, 4> PredSeen;
Constant *One = ConstantInt::get(Ty, 1);
for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
BasicBlock *HP = *HPI;
+ if (!PredSeen.insert(HP))
+ continue;
+
if (L->contains(HP)) {
// Insert a unit add instruction right before the terminator
// corresponding to the back-edge.
@@ -1527,8 +1518,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
if (I != InsertedExpressions.end())
return I->second;
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ BuilderType::InsertPointGuard Guard(Builder);
Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
// Expand the expression into instructions.
@@ -1541,8 +1531,6 @@ Value *SCEVExpander::expand(const SCEV *S) {
// a postinc expansion, it could be reused by a non postinc user, but only if
// its insertion point was already at the head of the loop.
InsertedExpressions[std::make_pair(S, InsertPt)] = V;
-
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
return V;
}
@@ -1553,10 +1541,6 @@ void SCEVExpander::rememberInstruction(Value *I) {
InsertedValues.insert(I);
}
-void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
- Builder.SetInsertPoint(BB, I);
-}
-
/// getOrInsertCanonicalInductionVariable - This method returns the
/// canonical induction variable of the specified type for the specified
/// loop (inserting one if there is none). A canonical induction variable
@@ -1572,11 +1556,8 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap);
// Emit code for it.
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ BuilderType::InsertPointGuard Guard(Builder);
PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin()));
- if (SaveInsertBB)
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
return V;
}
@@ -1724,28 +1705,43 @@ namespace {
// Currently, we only allow division by a nonzero constant here. If this is
// inadequate, we could easily allow division by SCEVUnknown by using
// ValueTracking to check isKnownNonZero().
+//
+// We cannot generally expand recurrences unless the step dominates the loop
+// header. The expander handles the special case of affine recurrences by
+// scaling the recurrence outside the loop, but this technique isn't generally
+// applicable. Expanding a nested recurrence outside a loop requires computing
+// binomial coefficients. This could be done, but the recurrence has to be in a
+// perfectly reduced form, which can't be guaranteed.
struct SCEVFindUnsafe {
+ ScalarEvolution &SE;
bool IsUnsafe;
- SCEVFindUnsafe(): IsUnsafe(false) {}
+ SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {}
bool follow(const SCEV *S) {
- const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S);
- if (!D)
- return true;
- const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
- if (SC && !SC->getValue()->isZero())
- return true;
- IsUnsafe = true;
- return false;
+ if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
+ if (!SC || SC->getValue()->isZero()) {
+ IsUnsafe = true;
+ return false;
+ }
+ }
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) {
+ IsUnsafe = true;
+ return false;
+ }
+ }
+ return true;
}
bool isDone() const { return IsUnsafe; }
};
}
namespace llvm {
-bool isSafeToExpand(const SCEV *S) {
- SCEVFindUnsafe Search;
+bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) {
+ SCEVFindUnsafe Search(SE);
visitAll(S, Search);
return !Search.IsUnsafe;
}
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index dd2ed4f..f110616 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -119,11 +119,19 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap);
switch (Kind) {
case NormalizeAutodetect:
- if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
- const SCEV *TransformedStep =
- TransformSubExpr(AR->getStepRecurrence(SE),
- User, OperandValToReplace);
- Result = SE.getMinusSCEV(Result, TransformedStep);
+ // Normalize this SCEV by subtracting the expression for the final step.
+ // We only allow affine AddRecs to be normalized, otherwise we would not
+ // be able to correctly denormalize.
+ // e.g. {1,+,3,+,2} == {-2,+,1,+,2} + {3,+,2}
+ // Normalized form: {-2,+,1,+,2}
+ // Denormalized form: {1,+,3,+,2}
+ //
+ // However, denormalization would use the a different step expression than
+ // normalization (see getPostIncExpr), generating the wrong final
+ // expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2}
+ if (AR->isAffine() &&
+ IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
+ Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE));
Loops.insert(L);
}
#if 0
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 4ad7162..0353295 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -96,6 +96,11 @@ bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
return PrevTTI->isLoweredToCall(F);
}
+void TargetTransformInfo::getUnrollingPreferences(Loop *L,
+ UnrollingPreferences &UP) const {
+ PrevTTI->getUnrollingPreferences(L, UP);
+}
+
bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
return PrevTTI->isLegalAddImmediate(Imm);
}
@@ -145,6 +150,10 @@ TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
return PrevTTI->getPopcntSupport(IntTyWidthInBit);
}
+bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
+ return PrevTTI->haveFastSqrt(Ty);
+}
+
unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
return PrevTTI->getIntImmCost(Imm, Ty);
}
@@ -215,6 +224,11 @@ unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp,
return PrevTTI->getAddressComputationCost(Tp, IsComplex);
}
+unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty,
+ bool IsPairwise) const {
+ return PrevTTI->getReductionCost(Opcode, Ty, IsPairwise);
+}
+
namespace {
struct NoTTI : ImmutablePass, TargetTransformInfo {
@@ -265,26 +279,34 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
// Otherwise, the default basic cost is used.
return TCC_Basic;
- case Instruction::IntToPtr:
+ case Instruction::IntToPtr: {
+ if (!DL)
+ return TCC_Basic;
+
// An inttoptr cast is free so long as the input is a legal integer type
// which doesn't contain values outside the range of a pointer.
- if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) &&
- OpTy->getScalarSizeInBits() <= DL->getPointerSizeInBits())
+ unsigned OpSize = OpTy->getScalarSizeInBits();
+ if (DL->isLegalInteger(OpSize) &&
+ OpSize <= DL->getPointerTypeSizeInBits(Ty))
return TCC_Free;
// Otherwise it's not a no-op.
return TCC_Basic;
+ }
+ case Instruction::PtrToInt: {
+ if (!DL)
+ return TCC_Basic;
- case Instruction::PtrToInt:
// A ptrtoint cast is free so long as the result is large enough to store
// the pointer, and a legal integer type.
- if (DL && DL->isLegalInteger(Ty->getScalarSizeInBits()) &&
- Ty->getScalarSizeInBits() >= DL->getPointerSizeInBits())
+ unsigned DestSize = Ty->getScalarSizeInBits();
+ if (DL->isLegalInteger(DestSize) &&
+ DestSize >= DL->getPointerTypeSizeInBits(OpTy))
return TCC_Free;
// Otherwise it's not a no-op.
return TCC_Basic;
-
+ }
case Instruction::Trunc:
// trunc to a native type is free (assuming the target has compare and
// shift-right of the same width).
@@ -457,6 +479,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
return true;
}
+ void getUnrollingPreferences(Loop *, UnrollingPreferences &) const { }
+
bool isLegalAddImmediate(int64_t Imm) const {
return false;
}
@@ -505,6 +529,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
return PSK_Software;
}
+ bool haveFastSqrt(Type *Ty) const {
+ return false;
+ }
+
unsigned getIntImmCost(const APInt &Imm, Type *Ty) const {
return 1;
}
@@ -569,6 +597,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
unsigned getAddressComputationCost(Type *Tp, bool) const {
return 0;
}
+
+ unsigned getReductionCost(unsigned, Type *, bool) const {
+ return 1;
+ }
};
} // end anonymous namespace
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index bbf3c3a..6791d4b 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -16,7 +16,12 @@
// typical C/C++ TBAA, but it can also be used to implement custom alias
// analysis behavior for other languages.
//
-// The current metadata format is very simple. TBAA MDNodes have up to
+// We now support two types of metadata format: scalar TBAA and struct-path
+// aware TBAA. After all testing cases are upgraded to use struct-path aware
+// TBAA and we can auto-upgrade existing bc files, the support for scalar TBAA
+// can be dropped.
+//
+// The scalar TBAA metadata format is very simple. TBAA MDNodes have up to
// three fields, e.g.:
// !0 = metadata !{ metadata !"an example type tree" }
// !1 = metadata !{ metadata !"int", metadata !0 }
@@ -40,6 +45,65 @@
// should return true; see
// http://llvm.org/docs/AliasAnalysis.html#OtherItfs).
//
+// With struct-path aware TBAA, the MDNodes attached to an instruction using
+// "!tbaa" are called path tag nodes.
+//
+// The path tag node has 4 fields with the last field being optional.
+//
+// The first field is the base type node, it can be a struct type node
+// or a scalar type node. The second field is the access type node, it
+// must be a scalar type node. The third field is the offset into the base type.
+// The last field has the same meaning as the last field of our scalar TBAA:
+// it's an integer which if equal to 1 indicates that the access is "constant".
+//
+// The struct type node has a name and a list of pairs, one pair for each member
+// of the struct. The first element of each pair is a type node (a struct type
+// node or a sclar type node), specifying the type of the member, the second
+// element of each pair is the offset of the member.
+//
+// Given an example
+// typedef struct {
+// short s;
+// } A;
+// typedef struct {
+// uint16_t s;
+// A a;
+// } B;
+//
+// For an acess to B.a.s, we attach !5 (a path tag node) to the load/store
+// instruction. The base type is !4 (struct B), the access type is !2 (scalar
+// type short) and the offset is 4.
+//
+// !0 = metadata !{metadata !"Simple C/C++ TBAA"}
+// !1 = metadata !{metadata !"omnipotent char", metadata !0} // Scalar type node
+// !2 = metadata !{metadata !"short", metadata !1} // Scalar type node
+// !3 = metadata !{metadata !"A", metadata !2, i64 0} // Struct type node
+// !4 = metadata !{metadata !"B", metadata !2, i64 0, metadata !3, i64 4}
+// // Struct type node
+// !5 = metadata !{metadata !4, metadata !2, i64 4} // Path tag node
+//
+// The struct type nodes and the scalar type nodes form a type DAG.
+// Root (!0)
+// char (!1) -- edge to Root
+// short (!2) -- edge to char
+// A (!3) -- edge with offset 0 to short
+// B (!4) -- edge with offset 0 to short and edge with offset 4 to A
+//
+// To check if two tags (tagX and tagY) can alias, we start from the base type
+// of tagX, follow the edge with the correct offset in the type DAG and adjust
+// the offset until we reach the base type of tagY or until we reach the Root
+// node.
+// If we reach the base type of tagY, compare the adjusted offset with
+// offset of tagY, return Alias if the offsets are the same, return NoAlias
+// otherwise.
+// If we reach the Root node, perform the above starting from base type of tagY
+// to see if we reach base type of tagX.
+//
+// If they have different roots, they're part of different potentially
+// unrelated type systems, so we return Alias to be conservative.
+// If neither node is an ancestor of the other and they have the same root,
+// then we say NoAlias.
+//
// TODO: The current metadata format doesn't support struct
// fields. For example:
// struct X {
@@ -71,7 +135,6 @@ using namespace llvm;
// achieved by stripping the !tbaa tags from IR, but this option is sometimes
// more convenient.
static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
-static cl::opt<bool> EnableStructPathTBAA("struct-path-tbaa", cl::init(false));
namespace {
/// TBAANode - This is a simple wrapper around an MDNode which provides a
@@ -168,8 +231,12 @@ namespace {
if (Node->getNumOperands() < 2)
return TBAAStructTypeNode();
- // Special handling for a scalar type node.
+ // Fast path for a scalar type node and a struct type node with a single
+ // field.
if (Node->getNumOperands() <= 3) {
+ uint64_t Cur = Node->getNumOperands() == 2 ? 0 :
+ cast<ConstantInt>(Node->getOperand(2))->getZExtValue();
+ Offset -= Cur;
MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
if (!P)
return TBAAStructTypeNode();
@@ -259,12 +326,21 @@ TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AliasAnalysis::getAnalysisUsage(AU);
}
+/// Check the first operand of the tbaa tag node, if it is a MDNode, we treat
+/// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA
+/// format.
+static bool isStructPathTBAA(const MDNode *MD) {
+ // Anonymous TBAA root starts with a MDNode and dragonegg uses it as
+ // a TBAA tag.
+ return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3;
+}
+
/// Aliases - Test whether the type represented by A may alias the
/// type represented by B.
bool
TypeBasedAliasAnalysis::Aliases(const MDNode *A,
const MDNode *B) const {
- if (EnableStructPathTBAA)
+ if (isStructPathTBAA(A))
return PathAliases(A, B);
// Keep track of the root node for A and B.
@@ -397,8 +473,8 @@ bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
// If this is an "immutable" type, we can assume the pointer is pointing
// to constant memory.
- if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) ||
- (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable()))
+ if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
+ (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
return true;
return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
@@ -414,8 +490,8 @@ TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
// If this is an "immutable" type, we can assume the call doesn't write
// to memory.
if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
- if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) ||
- (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable()))
+ if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
+ (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
Min = OnlyReadsMemory;
return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
@@ -458,6 +534,25 @@ TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
return AliasAnalysis::getModRefInfo(CS1, CS2);
}
+bool MDNode::isTBAAVtableAccess() const {
+ if (!isStructPathTBAA(this)) {
+ if (getNumOperands() < 1) return false;
+ if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) {
+ if (Tag1->getString() == "vtable pointer") return true;
+ }
+ return false;
+ }
+
+ // For struct-path aware TBAA, we use the access type of the tag.
+ if (getNumOperands() < 2) return false;
+ MDNode *Tag = cast_or_null<MDNode>(getOperand(1));
+ if (!Tag) return false;
+ if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
+ if (Tag1->getString() == "vtable pointer") return true;
+ }
+ return false;
+}
+
MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
if (!A || !B)
return NULL;
@@ -466,7 +561,8 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
return A;
// For struct-path aware TBAA, we use the access type of the tag.
- if (EnableStructPathTBAA) {
+ bool StructPath = isStructPathTBAA(A);
+ if (StructPath) {
A = cast_or_null<MDNode>(A->getOperand(1));
if (!A) return 0;
B = cast_or_null<MDNode>(B->getOperand(1));
@@ -499,7 +595,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
--IA;
--IB;
}
- if (!EnableStructPathTBAA)
+ if (!StructPath)
return Ret;
if (!Ret)
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 4591af8..e39ee62 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -15,6 +15,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalAlias.h"
@@ -39,8 +40,8 @@ const unsigned MaxDepth = 6;
static unsigned getBitWidth(Type *Ty, const DataLayout *TD) {
if (unsigned BitWidth = Ty->getScalarSizeInBits())
return BitWidth;
- assert(isa<PointerType>(Ty) && "Expected a pointer type!");
- return TD ? TD->getPointerSizeInBits() : 0;
+
+ return TD ? TD->getPointerTypeSizeInBits(Ty) : 0;
}
static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
@@ -629,9 +630,19 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
Value *Index = I->getOperand(i);
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
// Handle struct member offset arithmetic.
- if (!TD) return;
- const StructLayout *SL = TD->getStructLayout(STy);
+ if (!TD)
+ return;
+
+ // Handle case when index is vector zeroinitializer
+ Constant *CIndex = cast<Constant>(Index);
+ if (CIndex->isZeroValue())
+ continue;
+
+ if (CIndex->getType()->isVectorTy())
+ Index = CIndex->getSplatValue();
+
unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
+ const StructLayout *SL = TD->getStructLayout(STy);
uint64_t Offset = SL->getElementOffset(Idx);
TrailZ = std::min<unsigned>(TrailZ,
countTrailingZeros(Offset));
@@ -749,7 +760,6 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
break;
}
- case Intrinsic::x86_sse42_crc32_64_8:
case Intrinsic::x86_sse42_crc32_64_64:
KnownZero = APInt::getHighBitsSet(64, 32);
break;
@@ -1704,20 +1714,24 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
/// it can be expressed as a base pointer plus a constant offset. Return the
/// base and offset to the caller.
Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
- const DataLayout *TD) {
+ const DataLayout *DL) {
// Without DataLayout, conservatively assume 64-bit offsets, which is
// the widest we support.
- unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+ unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(Ptr->getType()) : 64;
APInt ByteOffset(BitWidth, 0);
while (1) {
if (Ptr->getType()->isVectorTy())
break;
if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
- APInt GEPOffset(BitWidth, 0);
- if (TD && !GEP->accumulateConstantOffset(*TD, GEPOffset))
- break;
- ByteOffset += GEPOffset;
+ if (DL) {
+ APInt GEPOffset(BitWidth, 0);
+ if (!GEP->accumulateConstantOffset(*DL, GEPOffset))
+ break;
+
+ ByteOffset += GEPOffset;
+ }
+
Ptr = GEP->getPointerOperand();
} else if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
Ptr = cast<Operator>(Ptr)->getOperand(0);
@@ -2050,7 +2064,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
/// isKnownNonNull - Return true if we know that the specified value is never
/// null.
-bool llvm::isKnownNonNull(const Value *V) {
+bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
// Alloca never returns null, malloc might.
if (isa<AllocaInst>(V)) return true;
@@ -2061,5 +2075,10 @@ bool llvm::isKnownNonNull(const Value *V) {
// Global values are not null unless extern weak.
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
return !GV->hasExternalWeakLinkage();
+
+ // operator new never returns null.
+ if (isOperatorNewLikeFn(V, TLI, /*LookThroughBitCast=*/true))
+ return true;
+
return false;
}