diff options
author | Chris Lattner <sabre@nondot.org> | 2008-12-15 03:35:32 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2008-12-15 03:35:32 +0000 |
commit | 9e59c64c14cfe55e7cc9086c6bff8cfeecac361e (patch) | |
tree | beb35f47a4da2768af1f2b6aecbcd93c0ea438b4 /lib/Analysis | |
parent | 255dafc5de1e0e15ccebc9f5947330833008374a (diff) | |
download | external_llvm-9e59c64c14cfe55e7cc9086c6bff8cfeecac361e.zip external_llvm-9e59c64c14cfe55e7cc9086c6bff8cfeecac361e.tar.gz external_llvm-9e59c64c14cfe55e7cc9086c6bff8cfeecac361e.tar.bz2 |
Implement initial support for PHI translation in memdep. This means that
memdep keeps track of how PHIs affect the pointer in dep queries, which
allows it to eliminate the load in cases like rle-phi-translate.ll, which
basically end up being:
BB1:
X = load P
br BB3
BB2:
Y = load Q
br BB3
BB3:
R = phi [P] [Q]
load R
turning "load R" into a phi of X/Y. In addition to additional exposed
opportunities, this makes memdep safe in many cases that it wasn't before
(which is required for load PRE) and also makes it substantially more
efficient. For example, consider:
bb1: // has many predecessors.
P = some_operator()
load P
In this example, previously memdep would scan all the predecessors of BB1
to see if they had something that would mustalias P. In some cases (e.g.
test/Transforms/GVN/rle-must-alias.ll) it would actually find them and end
up eliminating something. In many other cases though, it would scan and not
find anything useful. MemDep now stops at a block if the pointer is defined
in that block and cannot be phi translated to predecessors. This causes it
to miss the (rare) cases like rle-must-alias.ll, but makes it faster by not
scanning tons of stuff that is unlikely to be useful. For example, this
speeds up GVN as a whole from 3.928s to 2.448s (60%)!. IMO, scalar GVN
should be enhanced to simplify the rle-must-alias pointer base anyway, which
would allow the loads to be eliminated.
In the future, this should be enhanced to phi translate through geps and
bitcasts as well (as indicated by FIXMEs) making memdep even more powerful.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61022 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Analysis')
-rw-r--r-- | lib/Analysis/MemoryDependenceAnalysis.cpp | 205 |
1 files changed, 166 insertions, 39 deletions
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index a9c9c17..1dfa1fa 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -382,8 +382,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { // isReadonlyCall - If this is a read-only call, we can be more aggressive. bool isReadonlyCall = AA->onlyReadsMemory(QueryCS); - - // Visited checked first, vector in sorted order. + SmallPtrSet<BasicBlock*, 64> Visited; unsigned NumSortedEntries = Cache.size(); @@ -487,10 +486,16 @@ getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB, const Type *EltTy = cast<PointerType>(Pointer->getType())->getElementType(); uint64_t PointeeSize = TD->getTypeStoreSize(EltTy); - // While we have blocks to analyze, get their values. - SmallPtrSet<BasicBlock*, 64> Visited; - getNonLocalPointerDepFromBB(Pointer, PointeeSize, isLoad, FromBB, - Result, Visited); + // This is the set of blocks we've inspected, and the pointer we consider in + // each block. Because of critical edges, we currently bail out if querying + // a block with multiple different pointers. This can happen during PHI + // translation. + DenseMap<BasicBlock*, Value*> Visited; + if (!getNonLocalPointerDepFromBB(Pointer, PointeeSize, isLoad, FromBB, + Result, Visited, true)) + return; + Result.push_back(std::make_pair(FromBB, + MemDepResult::getClobber(FromBB->begin()))); } /// GetNonLocalInfoForBlock - Compute the memdep value for BB with @@ -566,35 +571,51 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize, } -/// getNonLocalPointerDepFromBB - -void MemoryDependenceAnalysis:: +/// getNonLocalPointerDepFromBB - Perform a dependency query based on +/// pointer/pointeesize starting at the end of StartBB. Add any clobber/def +/// results to the results vector and keep track of which blocks are visited in +/// 'Visited'. +/// +/// This has special behavior for the first block queries (when SkipFirstBlock +/// is true). In this special case, it ignores the contents of the specified +/// block and starts returning dependence info for its predecessors. +/// +/// This function returns false on success, or true to indicate that it could +/// not compute dependence information for some reason. This should be treated +/// as a clobber dependence on the first instruction in the predecessor block. +bool MemoryDependenceAnalysis:: getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, bool isLoad, BasicBlock *StartBB, SmallVectorImpl<NonLocalDepEntry> &Result, - SmallPtrSet<BasicBlock*, 64> &Visited) { + DenseMap<BasicBlock*, Value*> &Visited, + bool SkipFirstBlock) { + // Look up the cached info for Pointer. ValueIsLoadPair CacheKey(Pointer, isLoad); - std::pair<BasicBlock*, NonLocalDepInfo> &CacheInfo = - NonLocalPointerDeps[CacheKey]; - NonLocalDepInfo *Cache = &CacheInfo.second; + std::pair<BBSkipFirstBlockPair, NonLocalDepInfo> *CacheInfo = + &NonLocalPointerDeps[CacheKey]; + NonLocalDepInfo *Cache = &CacheInfo->second; // If we have valid cached information for exactly the block we are // investigating, just return it with no recomputation. - if (CacheInfo.first == StartBB) { + if (CacheInfo->first == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) { for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); I != E; ++I) if (!I->second.isNonLocal()) Result.push_back(*I); ++NumCacheCompleteNonLocalPtr; - return; + return false; } // Otherwise, either this is a new block, a block with an invalid cache // pointer or one that we're about to invalidate by putting more info into it // than its valid cache info. If empty, the result will be valid cache info, // otherwise it isn't. - CacheInfo.first = Cache->empty() ? StartBB : 0; + if (Cache->empty()) + CacheInfo->first = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); + else + CacheInfo->first = BBSkipFirstBlockPair(); SmallVector<BasicBlock*, 32> Worklist; Worklist.push_back(StartBB); @@ -606,23 +627,14 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, // revisit blocks after we insert info for them. unsigned NumSortedEntries = Cache->size(); - // SkipFirstBlock - If this is the very first block that we're processing, we - // don't want to scan or think about its body, because the client was supposed - // to do a local dependence query. Instead, just start processing it by - // adding its predecessors to the worklist and iterating. - bool SkipFirstBlock = Visited.empty(); - while (!Worklist.empty()) { BasicBlock *BB = Worklist.pop_back_val(); // Skip the first block if we have it. - if (SkipFirstBlock) { - SkipFirstBlock = false; - } else { + if (!SkipFirstBlock) { // Analyze the dependency of *Pointer in FromBB. See if we already have // been here. - if (!Visited.insert(BB)) - continue; + assert(Visited.count(BB) && "Should check 'visited' before adding to WL"); // Get the dependency info for Pointer in BB. If we have cached // information, we will use it, otherwise we compute it. @@ -636,11 +648,123 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, } } - // Otherwise, we have to process all the predecessors of this block to scan - // them as well. - for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { - // TODO: PHI TRANSLATE. - Worklist.push_back(*PI); + // If 'Pointer' is an instruction defined in this block, then we need to do + // phi translation to change it into a value live in the predecessor block. + // If phi translation fails, then we can't continue dependence analysis. + Instruction *PtrInst = dyn_cast<Instruction>(Pointer); + bool NeedsPHITranslation = PtrInst && PtrInst->getParent() == BB; + + // If no PHI translation is needed, just add all the predecessors of this + // block to scan them as well. + if (!NeedsPHITranslation) { + SkipFirstBlock = false; + for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { + // Verify that we haven't looked at this block yet. + std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> + InsertRes = Visited.insert(std::make_pair(*PI, Pointer)); + if (InsertRes.second) { + // First time we've looked at *PI. + Worklist.push_back(*PI); + continue; + } + + // If we have seen this block before, but it was with a different + // pointer then we have a phi translation failure and we have to treat + // this as a clobber. + if (InsertRes.first->second != Pointer) + goto PredTranslationFailure; + } + continue; + } + + // If we do need to do phi translation, then there are a bunch of different + // cases, because we have to find a Value* live in the predecessor block. We + // know that PtrInst is defined in this block at least. + + // If this is directly a PHI node, just use the incoming values for each + // pred as the phi translated version. + if (PHINode *PtrPHI = dyn_cast<PHINode>(PtrInst)) { + for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI){ + BasicBlock *Pred = *PI; + Value *PredPtr = PtrPHI->getIncomingValueForBlock(Pred); + + // Check to see if we have already visited this pred block with another + // pointer. If so, we can't do this lookup. This failure can occur + // with PHI translation when a critical edge exists and the PHI node in + // the successor translates to a pointer value different than the + // pointer the block was first analyzed with. + std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> + InsertRes = Visited.insert(std::make_pair(Pred, PredPtr)); + + if (!InsertRes.second) { + // If the predecessor was visited with PredPtr, then we already did + // the analysis and can ignore it. + if (InsertRes.first->second == PredPtr) + continue; + + // Otherwise, the block was previously analyzed with a different + // pointer. We can't represent the result of this case, so we just + // treat this as a phi translation failure. + goto PredTranslationFailure; + } + + // If we have a problem phi translating, fall through to the code below + // to handle the failure condition. + if (getNonLocalPointerDepFromBB(PredPtr, PointeeSize, isLoad, Pred, + Result, Visited)) + goto PredTranslationFailure; + } + + // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. + CacheInfo = &NonLocalPointerDeps[CacheKey]; + Cache = &CacheInfo->second; + + // Since we did phi translation, the "Cache" set won't contain all of the + // results for the query. This is ok (we can still use it to accelerate + // specific block queries) but we can't do the fastpath "return all + // results from the set" Clear out the indicator for this. + CacheInfo->first = BBSkipFirstBlockPair(); + SkipFirstBlock = false; + continue; + } + + // TODO: BITCAST, GEP. + + // cerr << "MEMDEP: Could not PHI translate: " << *Pointer; + // if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst)) + // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0); + PredTranslationFailure: + + // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. + CacheInfo = &NonLocalPointerDeps[CacheKey]; + Cache = &CacheInfo->second; + + // Since we did phi translation, the "Cache" set won't contain all of the + // results for the query. This is ok (we can still use it to accelerate + // specific block queries) but we can't do the fastpath "return all + // results from the set" Clear out the indicator for this. + CacheInfo->first = BBSkipFirstBlockPair(); + + // If *nothing* works, mark the pointer as being clobbered by the first + // instruction in this block. + // + // If this is the magic first block, return this as a clobber of the whole + // incoming value. Since we can't phi translate to one of the predecessors, + // we have to bail out. + if (SkipFirstBlock) + return true; + + for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) { + assert(I != Cache->rend() && "Didn't find current block??"); + if (I->first != BB) + continue; + + assert(I->second.isNonLocal() && + "Should only be here with transparent block"); + I->second = MemDepResult::getClobber(BB->begin()); + ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey.getOpaqueValue()); + Result.push_back(*I); + break; } } @@ -658,19 +782,22 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, Cache->insert(Entry, Val); // FALL THROUGH. } - case 1: { + case 1: // One new entry, Just insert the new value at the appropriate position. - NonLocalDepEntry Val = Cache->back(); - Cache->pop_back(); - NonLocalDepInfo::iterator Entry = - std::upper_bound(Cache->begin(), Cache->end(), Val); - Cache->insert(Entry, Val); + if (Cache->size() != 1) { + NonLocalDepEntry Val = Cache->back(); + Cache->pop_back(); + NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache->begin(), Cache->end(), Val); + Cache->insert(Entry, Val); + } break; - } default: // Added many values, do a full scale sort. std::sort(Cache->begin(), Cache->end()); } + + return false; } /// RemoveCachedNonLocalPointerDependencies - If P exists in @@ -851,7 +978,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].second; // The cache is not valid for any specific block anymore. - NonLocalPointerDeps[P].first = 0; + NonLocalPointerDeps[P].first = BBSkipFirstBlockPair(); // Update any entries for RemInst to use the instruction after it. for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end(); |