diff options
-rw-r--r-- | include/llvm/Analysis/MemoryDependenceAnalysis.h | 17 | ||||
-rw-r--r-- | lib/Analysis/MemoryDependenceAnalysis.cpp | 205 | ||||
-rw-r--r-- | test/Transforms/GVN/rle-must-alias.ll | 5 | ||||
-rw-r--r-- | test/Transforms/GVN/rle-no-phi-translate.ll | 3 | ||||
-rw-r--r-- | test/Transforms/GVN/rle-phi-translate.ll | 32 |
5 files changed, 218 insertions, 44 deletions
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h index 8a5b07e..ac92b90 100644 --- a/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -156,12 +156,18 @@ namespace llvm { /// ValueIsLoadPair - This is a pair<Value*, bool> where the bool is true if /// the dependence is a read only dependence, false if read/write. typedef PointerIntPair<Value*, 1, bool> ValueIsLoadPair; - + + /// BBSkipFirstBlockPair - This pair is used when caching information for a + /// block. If the pointer is null, the cache value is not a full query that + /// starts at the specified block. If non-null, the bool indicates whether + /// or not the contents of the block was skipped. + typedef PointerIntPair<BasicBlock*, 1, bool> BBSkipFirstBlockPair; + /// CachedNonLocalPointerInfo - This map stores the cached results of doing /// a pointer lookup at the bottom of a block. The key of this map is the /// pointer+isload bit, the value is a list of <bb->result> mappings. - typedef DenseMap<ValueIsLoadPair, - std::pair<BasicBlock*, NonLocalDepInfo> > CachedNonLocalPointerInfo; + typedef DenseMap<ValueIsLoadPair, std::pair<BBSkipFirstBlockPair, + NonLocalDepInfo> > CachedNonLocalPointerInfo; CachedNonLocalPointerInfo NonLocalPointerDeps; // A map from instructions to their non-local pointer dependencies. @@ -259,10 +265,11 @@ namespace llvm { MemDepResult getCallSiteDependencyFrom(CallSite C, bool isReadOnlyCall, BasicBlock::iterator ScanIt, BasicBlock *BB); - void getNonLocalPointerDepFromBB(Value *Pointer, uint64_t Size, + bool getNonLocalPointerDepFromBB(Value *Pointer, uint64_t Size, bool isLoad, BasicBlock *BB, SmallVectorImpl<NonLocalDepEntry> &Result, - SmallPtrSet<BasicBlock*, 64> &Visited); + DenseMap<BasicBlock*, Value*> &Visited, + bool SkipFirstBlock = false); MemDepResult GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize, bool isLoad, BasicBlock *BB, NonLocalDepInfo *Cache, diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index a9c9c17..1dfa1fa 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -382,8 +382,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { // isReadonlyCall - If this is a read-only call, we can be more aggressive. bool isReadonlyCall = AA->onlyReadsMemory(QueryCS); - - // Visited checked first, vector in sorted order. + SmallPtrSet<BasicBlock*, 64> Visited; unsigned NumSortedEntries = Cache.size(); @@ -487,10 +486,16 @@ getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB, const Type *EltTy = cast<PointerType>(Pointer->getType())->getElementType(); uint64_t PointeeSize = TD->getTypeStoreSize(EltTy); - // While we have blocks to analyze, get their values. - SmallPtrSet<BasicBlock*, 64> Visited; - getNonLocalPointerDepFromBB(Pointer, PointeeSize, isLoad, FromBB, - Result, Visited); + // This is the set of blocks we've inspected, and the pointer we consider in + // each block. Because of critical edges, we currently bail out if querying + // a block with multiple different pointers. This can happen during PHI + // translation. + DenseMap<BasicBlock*, Value*> Visited; + if (!getNonLocalPointerDepFromBB(Pointer, PointeeSize, isLoad, FromBB, + Result, Visited, true)) + return; + Result.push_back(std::make_pair(FromBB, + MemDepResult::getClobber(FromBB->begin()))); } /// GetNonLocalInfoForBlock - Compute the memdep value for BB with @@ -566,35 +571,51 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize, } -/// getNonLocalPointerDepFromBB - -void MemoryDependenceAnalysis:: +/// getNonLocalPointerDepFromBB - Perform a dependency query based on +/// pointer/pointeesize starting at the end of StartBB. Add any clobber/def +/// results to the results vector and keep track of which blocks are visited in +/// 'Visited'. +/// +/// This has special behavior for the first block queries (when SkipFirstBlock +/// is true). In this special case, it ignores the contents of the specified +/// block and starts returning dependence info for its predecessors. +/// +/// This function returns false on success, or true to indicate that it could +/// not compute dependence information for some reason. This should be treated +/// as a clobber dependence on the first instruction in the predecessor block. +bool MemoryDependenceAnalysis:: getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, bool isLoad, BasicBlock *StartBB, SmallVectorImpl<NonLocalDepEntry> &Result, - SmallPtrSet<BasicBlock*, 64> &Visited) { + DenseMap<BasicBlock*, Value*> &Visited, + bool SkipFirstBlock) { + // Look up the cached info for Pointer. ValueIsLoadPair CacheKey(Pointer, isLoad); - std::pair<BasicBlock*, NonLocalDepInfo> &CacheInfo = - NonLocalPointerDeps[CacheKey]; - NonLocalDepInfo *Cache = &CacheInfo.second; + std::pair<BBSkipFirstBlockPair, NonLocalDepInfo> *CacheInfo = + &NonLocalPointerDeps[CacheKey]; + NonLocalDepInfo *Cache = &CacheInfo->second; // If we have valid cached information for exactly the block we are // investigating, just return it with no recomputation. - if (CacheInfo.first == StartBB) { + if (CacheInfo->first == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) { for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); I != E; ++I) if (!I->second.isNonLocal()) Result.push_back(*I); ++NumCacheCompleteNonLocalPtr; - return; + return false; } // Otherwise, either this is a new block, a block with an invalid cache // pointer or one that we're about to invalidate by putting more info into it // than its valid cache info. If empty, the result will be valid cache info, // otherwise it isn't. - CacheInfo.first = Cache->empty() ? StartBB : 0; + if (Cache->empty()) + CacheInfo->first = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); + else + CacheInfo->first = BBSkipFirstBlockPair(); SmallVector<BasicBlock*, 32> Worklist; Worklist.push_back(StartBB); @@ -606,23 +627,14 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, // revisit blocks after we insert info for them. unsigned NumSortedEntries = Cache->size(); - // SkipFirstBlock - If this is the very first block that we're processing, we - // don't want to scan or think about its body, because the client was supposed - // to do a local dependence query. Instead, just start processing it by - // adding its predecessors to the worklist and iterating. - bool SkipFirstBlock = Visited.empty(); - while (!Worklist.empty()) { BasicBlock *BB = Worklist.pop_back_val(); // Skip the first block if we have it. - if (SkipFirstBlock) { - SkipFirstBlock = false; - } else { + if (!SkipFirstBlock) { // Analyze the dependency of *Pointer in FromBB. See if we already have // been here. - if (!Visited.insert(BB)) - continue; + assert(Visited.count(BB) && "Should check 'visited' before adding to WL"); // Get the dependency info for Pointer in BB. If we have cached // information, we will use it, otherwise we compute it. @@ -636,11 +648,123 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, } } - // Otherwise, we have to process all the predecessors of this block to scan - // them as well. - for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { - // TODO: PHI TRANSLATE. - Worklist.push_back(*PI); + // If 'Pointer' is an instruction defined in this block, then we need to do + // phi translation to change it into a value live in the predecessor block. + // If phi translation fails, then we can't continue dependence analysis. + Instruction *PtrInst = dyn_cast<Instruction>(Pointer); + bool NeedsPHITranslation = PtrInst && PtrInst->getParent() == BB; + + // If no PHI translation is needed, just add all the predecessors of this + // block to scan them as well. + if (!NeedsPHITranslation) { + SkipFirstBlock = false; + for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { + // Verify that we haven't looked at this block yet. + std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> + InsertRes = Visited.insert(std::make_pair(*PI, Pointer)); + if (InsertRes.second) { + // First time we've looked at *PI. + Worklist.push_back(*PI); + continue; + } + + // If we have seen this block before, but it was with a different + // pointer then we have a phi translation failure and we have to treat + // this as a clobber. + if (InsertRes.first->second != Pointer) + goto PredTranslationFailure; + } + continue; + } + + // If we do need to do phi translation, then there are a bunch of different + // cases, because we have to find a Value* live in the predecessor block. We + // know that PtrInst is defined in this block at least. + + // If this is directly a PHI node, just use the incoming values for each + // pred as the phi translated version. + if (PHINode *PtrPHI = dyn_cast<PHINode>(PtrInst)) { + for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI){ + BasicBlock *Pred = *PI; + Value *PredPtr = PtrPHI->getIncomingValueForBlock(Pred); + + // Check to see if we have already visited this pred block with another + // pointer. If so, we can't do this lookup. This failure can occur + // with PHI translation when a critical edge exists and the PHI node in + // the successor translates to a pointer value different than the + // pointer the block was first analyzed with. + std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> + InsertRes = Visited.insert(std::make_pair(Pred, PredPtr)); + + if (!InsertRes.second) { + // If the predecessor was visited with PredPtr, then we already did + // the analysis and can ignore it. + if (InsertRes.first->second == PredPtr) + continue; + + // Otherwise, the block was previously analyzed with a different + // pointer. We can't represent the result of this case, so we just + // treat this as a phi translation failure. + goto PredTranslationFailure; + } + + // If we have a problem phi translating, fall through to the code below + // to handle the failure condition. + if (getNonLocalPointerDepFromBB(PredPtr, PointeeSize, isLoad, Pred, + Result, Visited)) + goto PredTranslationFailure; + } + + // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. + CacheInfo = &NonLocalPointerDeps[CacheKey]; + Cache = &CacheInfo->second; + + // Since we did phi translation, the "Cache" set won't contain all of the + // results for the query. This is ok (we can still use it to accelerate + // specific block queries) but we can't do the fastpath "return all + // results from the set" Clear out the indicator for this. + CacheInfo->first = BBSkipFirstBlockPair(); + SkipFirstBlock = false; + continue; + } + + // TODO: BITCAST, GEP. + + // cerr << "MEMDEP: Could not PHI translate: " << *Pointer; + // if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst)) + // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0); + PredTranslationFailure: + + // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. + CacheInfo = &NonLocalPointerDeps[CacheKey]; + Cache = &CacheInfo->second; + + // Since we did phi translation, the "Cache" set won't contain all of the + // results for the query. This is ok (we can still use it to accelerate + // specific block queries) but we can't do the fastpath "return all + // results from the set" Clear out the indicator for this. + CacheInfo->first = BBSkipFirstBlockPair(); + + // If *nothing* works, mark the pointer as being clobbered by the first + // instruction in this block. + // + // If this is the magic first block, return this as a clobber of the whole + // incoming value. Since we can't phi translate to one of the predecessors, + // we have to bail out. + if (SkipFirstBlock) + return true; + + for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) { + assert(I != Cache->rend() && "Didn't find current block??"); + if (I->first != BB) + continue; + + assert(I->second.isNonLocal() && + "Should only be here with transparent block"); + I->second = MemDepResult::getClobber(BB->begin()); + ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey.getOpaqueValue()); + Result.push_back(*I); + break; } } @@ -658,19 +782,22 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, Cache->insert(Entry, Val); // FALL THROUGH. } - case 1: { + case 1: // One new entry, Just insert the new value at the appropriate position. - NonLocalDepEntry Val = Cache->back(); - Cache->pop_back(); - NonLocalDepInfo::iterator Entry = - std::upper_bound(Cache->begin(), Cache->end(), Val); - Cache->insert(Entry, Val); + if (Cache->size() != 1) { + NonLocalDepEntry Val = Cache->back(); + Cache->pop_back(); + NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache->begin(), Cache->end(), Val); + Cache->insert(Entry, Val); + } break; - } default: // Added many values, do a full scale sort. std::sort(Cache->begin(), Cache->end()); } + + return false; } /// RemoveCachedNonLocalPointerDependencies - If P exists in @@ -851,7 +978,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].second; // The cache is not valid for any specific block anymore. - NonLocalPointerDeps[P].first = 0; + NonLocalPointerDeps[P].first = BBSkipFirstBlockPair(); // Update any entries for RemInst to use the instruction after it. for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end(); diff --git a/test/Transforms/GVN/rle-must-alias.ll b/test/Transforms/GVN/rle-must-alias.ll index e507556..7108916 100644 --- a/test/Transforms/GVN/rle-must-alias.ll +++ b/test/Transforms/GVN/rle-must-alias.ll @@ -1,4 +1,9 @@ ; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep {DEAD.rle = phi i32} +; XFAIL: * + +; FIXME: GVN should eliminate the fully redundant %9 GEP which +; allows DEAD to be removed. This is PR3198. + ; The %7 and %4 loads combine to make %DEAD unneeded. target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin7" diff --git a/test/Transforms/GVN/rle-no-phi-translate.ll b/test/Transforms/GVN/rle-no-phi-translate.ll index b7f0dbc..9ffbe21 100644 --- a/test/Transforms/GVN/rle-no-phi-translate.ll +++ b/test/Transforms/GVN/rle-no-phi-translate.ll @@ -1,4 +1,7 @@ ; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep load +; FIXME: This should be promotable, but memdep/gvn don't track values +; path/edge sensitively enough. + target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin7" diff --git a/test/Transforms/GVN/rle-phi-translate.ll b/test/Transforms/GVN/rle-phi-translate.ll new file mode 100644 index 0000000..0f9c58f --- /dev/null +++ b/test/Transforms/GVN/rle-phi-translate.ll @@ -0,0 +1,32 @@ +; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep {%cv.rle = phi i32} +; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep {%bv.rle = phi i32} +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin7" + +define i32 @g(i32* %b, i32* %c) nounwind { +entry: + %g = alloca i32 ; <i32*> [#uses=4] + %t1 = icmp eq i32* %b, null ; <i1> [#uses=1] + br i1 %t1, label %bb, label %bb1 + +bb: ; preds = %entry + %t2 = load i32* %c, align 4 ; <i32> [#uses=1] + %t3 = add i32 %t2, 1 ; <i32> [#uses=1] + store i32 %t3, i32* %g, align 4 + br label %bb2 + +bb1: ; preds = %entry + %t5 = load i32* %b, align 4 ; <i32> [#uses=1] + %t6 = add i32 %t5, 1 ; <i32> [#uses=1] + store i32 %t6, i32* %g, align 4 + br label %bb2 + +bb2: ; preds = %bb1, %bb + %c_addr.0 = phi i32* [ %g, %bb1 ], [ %c, %bb ] ; <i32*> [#uses=1] + %b_addr.0 = phi i32* [ %b, %bb1 ], [ %g, %bb ] ; <i32*> [#uses=1] + %cv = load i32* %c_addr.0, align 4 ; <i32> [#uses=1] + %bv = load i32* %b_addr.0, align 4 ; <i32> [#uses=1] + %ret = add i32 %cv, %bv ; <i32> [#uses=1] + ret i32 %ret +} + |