aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Analysis/MemoryDependenceAnalysis.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Analysis/MemoryDependenceAnalysis.cpp')
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp193
1 files changed, 147 insertions, 46 deletions
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 9eaf109..187eada 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/PHITransAddr.h"
@@ -48,13 +49,17 @@ STATISTIC(NumCacheCompleteNonLocalPtr,
"Number of block queries that were completely cached");
// Limit for the number of instructions to scan in a block.
-static const int BlockScanLimit = 100;
+static const unsigned int BlockScanLimit = 100;
+
+// Limit on the number of memdep results to process.
+static const unsigned int NumResultsLimit = 100;
char MemoryDependenceAnalysis::ID = 0;
// Register this pass...
INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
"Memory Dependence Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
"Memory Dependence Analysis", false, true)
@@ -83,11 +88,13 @@ void MemoryDependenceAnalysis::releaseMemory() {
///
void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
+ AU.addRequired<AssumptionTracker>();
AU.addRequiredTransitive<AliasAnalysis>();
}
bool MemoryDependenceAnalysis::runOnFunction(Function &) {
AA = &getAnalysis<AliasAnalysis>();
+ AT = &getAnalysis<AssumptionTracker>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
DominatorTreeWrapperPass *DTWP =
@@ -158,29 +165,32 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
return AliasAnalysis::Mod;
}
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ AAMDNodes AAInfo;
+
switch (II->getIntrinsicID()) {
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::invariant_start:
+ II->getAAMetadata(AAInfo);
Loc = AliasAnalysis::Location(II->getArgOperand(1),
cast<ConstantInt>(II->getArgOperand(0))
- ->getZExtValue(),
- II->getMetadata(LLVMContext::MD_tbaa));
+ ->getZExtValue(), AAInfo);
// These intrinsics don't really modify the memory, but returning Mod
// will allow them to be handled conservatively.
return AliasAnalysis::Mod;
case Intrinsic::invariant_end:
+ II->getAAMetadata(AAInfo);
Loc = AliasAnalysis::Location(II->getArgOperand(2),
cast<ConstantInt>(II->getArgOperand(1))
- ->getZExtValue(),
- II->getMetadata(LLVMContext::MD_tbaa));
+ ->getZExtValue(), AAInfo);
// These intrinsics don't really modify the memory, but returning Mod
// will allow them to be handled conservatively.
return AliasAnalysis::Mod;
default:
break;
}
+ }
// Otherwise, just do the coarse-grained thing that always works.
if (Inst->mayWriteToMemory())
@@ -367,6 +377,36 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
int64_t MemLocOffset = 0;
unsigned Limit = BlockScanLimit;
bool isInvariantLoad = false;
+
+ // We must be careful with atomic accesses, as they may allow another thread
+ // to touch this location, cloberring it. We are conservative: if the
+ // QueryInst is not a simple (non-atomic) memory access, we automatically
+ // return getClobber.
+ // If it is simple, we know based on the results of
+ // "Compiler testing via a theory of sound optimisations in the C11/C++11
+ // memory model" in PLDI 2013, that a non-atomic location can only be
+ // clobbered between a pair of a release and an acquire action, with no
+ // access to the location in between.
+ // Here is an example for giving the general intuition behind this rule.
+ // In the following code:
+ // store x 0;
+ // release action; [1]
+ // acquire action; [4]
+ // %val = load x;
+ // It is unsafe to replace %val by 0 because another thread may be running:
+ // acquire action; [2]
+ // store x 42;
+ // release action; [3]
+ // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
+ // being 42. A key property of this program however is that if either
+ // 1 or 4 were missing, there would be a race between the store of 42
+ // either the store of 0 or the load (making the whole progam racy).
+ // The paper mentionned above shows that the same property is respected
+ // by every program that can detect any optimisation of that kind: either
+ // it is racy (undefined) or there is a release followed by an acquire
+ // between the pair of accesses under consideration.
+ bool HasSeenAcquire = false;
+
if (isLoad && QueryInst) {
LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
@@ -404,10 +444,37 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// Values depend on loads if the pointers are must aliased. This means that
// a load depends on another must aliased load from the same value.
+ // One exception is atomic loads: a value can depend on an atomic load that it
+ // does not alias with when this atomic load indicates that another thread may
+ // be accessing the location.
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
// Atomic loads have complications involved.
+ // A Monotonic (or higher) load is OK if the query inst is itself not atomic.
+ // An Acquire (or higher) load sets the HasSeenAcquire flag, so that any
+ // release store will know to return getClobber.
// FIXME: This is overly conservative.
- if (!LI->isUnordered())
+ if (!LI->isUnordered()) {
+ if (!QueryInst)
+ return MemDepResult::getClobber(LI);
+ if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
+ if (!QueryLI->isSimple())
+ return MemDepResult::getClobber(LI);
+ } else if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst)) {
+ if (!QuerySI->isSimple())
+ return MemDepResult::getClobber(LI);
+ } else if (QueryInst->mayReadOrWriteMemory()) {
+ return MemDepResult::getClobber(LI);
+ }
+
+ if (isAtLeastAcquire(LI->getOrdering()))
+ HasSeenAcquire = true;
+ }
+
+ // FIXME: this is overly conservative.
+ // While volatile access cannot be eliminated, they do not have to clobber
+ // non-aliasing locations, as normal accesses can for example be reordered
+ // with volatile accesses.
+ if (LI->isVolatile())
return MemDepResult::getClobber(LI);
AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
@@ -466,8 +533,32 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// Atomic stores have complications involved.
+ // A Monotonic store is OK if the query inst is itself not atomic.
+ // A Release (or higher) store further requires that no acquire load
+ // has been seen.
// FIXME: This is overly conservative.
- if (!SI->isUnordered())
+ if (!SI->isUnordered()) {
+ if (!QueryInst)
+ return MemDepResult::getClobber(SI);
+ if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
+ if (!QueryLI->isSimple())
+ return MemDepResult::getClobber(SI);
+ } else if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst)) {
+ if (!QuerySI->isSimple())
+ return MemDepResult::getClobber(SI);
+ } else if (QueryInst->mayReadOrWriteMemory()) {
+ return MemDepResult::getClobber(SI);
+ }
+
+ if (HasSeenAcquire && isAtLeastRelease(SI->getOrdering()))
+ return MemDepResult::getClobber(SI);
+ }
+
+ // FIXME: this is overly conservative.
+ // While volatile access cannot be eliminated, they do not have to clobber
+ // non-aliasing locations, as normal accesses can for example be reordered
+ // with volatile accesses.
+ if (SI->isVolatile())
return MemDepResult::getClobber(SI);
// If alias analysis can tell that this store is guaranteed to not modify
@@ -685,7 +776,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
DirtyBlocks.pop_back();
// Already processed this block?
- if (!Visited.insert(DirtyBB))
+ if (!Visited.insert(DirtyBB).second)
continue;
// Do a binary search to see if we already have an entry for this block in
@@ -775,7 +866,7 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
"Can't get pointer deps of a non-pointer!");
Result.clear();
- PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL);
+ PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, AT);
// This is the set of blocks we've inspected, and the pointer we consider in
// each block. Because of critical edges, we currently bail out if querying
@@ -861,7 +952,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
return Dep;
}
-/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain
+/// SortNonLocalDepInfoCache - Sort the NonLocalDepInfo cache, given a certain
/// number of elements in the array that are already properly ordered. This is
/// optimized for the case when only a few entries are added.
static void
@@ -922,10 +1013,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// Set up a temporary NLPI value. If the map doesn't yet have an entry for
// CacheKey, this value will be inserted as the associated value. Otherwise,
// it'll be ignored, and we'll have to check to see if the cached size and
- // tbaa tag are consistent with the current query.
+ // aa tags are consistent with the current query.
NonLocalPointerInfo InitialNLPI;
InitialNLPI.Size = Loc.Size;
- InitialNLPI.TBAATag = Loc.TBAATag;
+ InitialNLPI.AATags = Loc.AATags;
// Get the NLPI for CacheKey, inserting one into the map if it doesn't
// already have one.
@@ -955,21 +1046,21 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
SkipFirstBlock);
}
- // If the query's TBAATag is inconsistent with the cached one,
+ // If the query's AATags are inconsistent with the cached one,
// conservatively throw out the cached data and restart the query with
// no tag if needed.
- if (CacheInfo->TBAATag != Loc.TBAATag) {
- if (CacheInfo->TBAATag) {
+ if (CacheInfo->AATags != Loc.AATags) {
+ if (CacheInfo->AATags) {
CacheInfo->Pair = BBSkipFirstBlockPair();
- CacheInfo->TBAATag = nullptr;
+ CacheInfo->AATags = AAMDNodes();
for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
if (Instruction *Inst = DI->getResult().getInst())
RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
CacheInfo->NonLocalDeps.clear();
}
- if (Loc.TBAATag)
- return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutTBAATag(),
+ if (Loc.AATags)
+ return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutAATags(),
isLoad, StartBB, Result, Visited,
SkipFirstBlock);
}
@@ -1045,6 +1136,25 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
while (!Worklist.empty()) {
BasicBlock *BB = Worklist.pop_back_val();
+ // If we do process a large number of blocks it becomes very expensive and
+ // likely it isn't worth worrying about
+ if (Result.size() > NumResultsLimit) {
+ Worklist.clear();
+ // Sort it now (if needed) so that recursive invocations of
+ // getNonLocalPointerDepFromBB and other routines that could reuse the
+ // cache value will only see properly sorted cache arrays.
+ if (Cache && NumSortedEntries != Cache->size()) {
+ SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+ NumSortedEntries = Cache->size();
+ }
+ // Since we bail out, the "Cache" set won't contain all of the
+ // results for the query. This is ok (we can still use it to accelerate
+ // specific block queries) but we can't do the fastpath "return all
+ // results from the set". Clear out the indicator for this.
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+ return true;
+ }
+
// Skip the first block if we have it.
if (!SkipFirstBlock) {
// Analyze the dependency of *Pointer in FromBB. See if we already have
@@ -1369,14 +1479,11 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
if (ReverseDepIt != ReverseLocalDeps.end()) {
- SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second;
// RemInst can't be the terminator if it has local stuff depending on it.
- assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) &&
+ assert(!ReverseDepIt->second.empty() && !isa<TerminatorInst>(RemInst) &&
"Nothing can locally depend on a terminator");
- for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(),
- E = ReverseDeps.end(); I != E; ++I) {
- Instruction *InstDependingOnRemInst = *I;
+ for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
assert(InstDependingOnRemInst != RemInst &&
"Already removed our local dep info");
@@ -1402,12 +1509,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
if (ReverseDepIt != ReverseNonLocalDeps.end()) {
- SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second;
- for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end();
- I != E; ++I) {
- assert(*I != RemInst && "Already removed NonLocalDep info for RemInst");
+ for (Instruction *I : ReverseDepIt->second) {
+ assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
- PerInstNLInfo &INLD = NonLocalDeps[*I];
+ PerInstNLInfo &INLD = NonLocalDeps[I];
// The information is now dirty!
INLD.second = true;
@@ -1419,7 +1524,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
DI->setResult(NewDirtyVal);
if (Instruction *NextI = NewDirtyVal.getInst())
- ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
+ ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
}
}
@@ -1438,12 +1543,9 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
ReverseNonLocalPtrDeps.find(RemInst);
if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
- SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second;
SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
- for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(),
- E = Set.end(); I != E; ++I) {
- ValueIsLoadPair P = *I;
+ for (ValueIsLoadPair P : ReversePtrDepIt->second) {
assert(P.getPointer() != RemInst &&
"Already removed NonLocalPointerDeps info for RemInst");
@@ -1484,8 +1586,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
DEBUG(verifyRemoved(RemInst));
}
/// verifyRemoved - Verify that the specified instruction does not occur
-/// in our internal data structures.
+/// in our internal data structures. This function verifies by asserting in
+/// debug builds.
void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
+#ifndef NDEBUG
for (LocalDepMapType::const_iterator I = LocalDeps.begin(),
E = LocalDeps.end(); I != E; ++I) {
assert(I->first != D && "Inst occurs in data structures");
@@ -1514,18 +1618,16 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
E = ReverseLocalDeps.end(); I != E; ++I) {
assert(I->first != D && "Inst occurs in data structures");
- for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
- EE = I->second.end(); II != EE; ++II)
- assert(*II != D && "Inst occurs in data structures");
+ for (Instruction *Inst : I->second)
+ assert(Inst != D && "Inst occurs in data structures");
}
for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(),
E = ReverseNonLocalDeps.end();
I != E; ++I) {
assert(I->first != D && "Inst occurs in data structures");
- for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
- EE = I->second.end(); II != EE; ++II)
- assert(*II != D && "Inst occurs in data structures");
+ for (Instruction *Inst : I->second)
+ assert(Inst != D && "Inst occurs in data structures");
}
for (ReverseNonLocalPtrDepTy::const_iterator
@@ -1533,11 +1635,10 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
assert(I->first != D && "Inst occurs in rev NLPD map");
- for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(),
- E = I->second.end(); II != E; ++II)
- assert(*II != ValueIsLoadPair(D, false) &&
- *II != ValueIsLoadPair(D, true) &&
+ for (ValueIsLoadPair P : I->second)
+ assert(P != ValueIsLoadPair(D, false) &&
+ P != ValueIsLoadPair(D, true) &&
"Inst occurs in ReverseNonLocalPtrDeps map");
}
-
+#endif
}