diff options
author | Chris Lattner <sabre@nondot.org> | 2011-05-22 17:39:56 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2011-05-22 17:39:56 +0000 |
commit | 4f81b5419295cfc26a1349d6c23a55c6d2a683e1 (patch) | |
tree | b3b8121012a48cca6bc1350a04794771da37ec5d /lib | |
parent | 5649ba70fb39f2fda4791d255ae8bb373071874f (diff) | |
download | external_llvm-4f81b5419295cfc26a1349d6c23a55c6d2a683e1.zip external_llvm-4f81b5419295cfc26a1349d6c23a55c6d2a683e1.tar.gz external_llvm-4f81b5419295cfc26a1349d6c23a55c6d2a683e1.tar.bz2 |
Fix PR9815: I was trying to get out of "generating code and then
failing to form a memset, then having to delete it" but my approximation
isn't safe for self recurrent loops. Instead of doign a hack, just
do it the right way.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@131858 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 110 |
1 files changed, 66 insertions, 44 deletions
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 08d96ed..dbf6eec 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -128,11 +128,11 @@ INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } -/// DeleteDeadInstruction - Delete this instruction. Before we do, go through +/// deleteDeadInstruction - Delete this instruction. Before we do, go through /// and zero out all the operands of this instruction. If any of them become /// dead, delete them and the computation tree that feeds them. /// -static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { +static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { SmallVector<Instruction*, 32> NowDeadInsts; NowDeadInsts.push_back(I); @@ -162,6 +162,14 @@ static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { } while (!NowDeadInsts.empty()); } +/// deleteIfDeadInstruction - If the specified value is a dead instruction, +/// delete it and any recursively used instructions. +static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) { + if (Instruction *I = dyn_cast<Instruction>(V)) + if (isInstructionTriviallyDead(I)) + deleteDeadInstruction(I, SE); +} + bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { CurLoop = L; @@ -454,31 +462,35 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, return false; } - - // Okay, we have a strided store "p[i]" of a splattable value. We can turn - // this into a memset in the loop preheader now if we want. However, this - // would be unsafe to do if there is anything else in the loop that may read - // or write to the aliased location. Check for an alias. - if (mayLoopAccessLocation(DestPtr, AliasAnalysis::ModRef, - CurLoop, BECount, - StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) - return false; - - // Okay, everything looks good, insert the memset. - BasicBlock *Preheader = CurLoop->getLoopPreheader(); - - IRBuilder<> Builder(Preheader->getTerminator()); - // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the - // header. Just insert code for it in the preheader. + // header. This allows us to insert code for it in the preheader. + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + IRBuilder<> Builder(Preheader->getTerminator()); SCEVExpander Expander(*SE); - + + // Okay, we have a strided store "p[i]" of a splattable value. We can turn + // this into a memset in the loop preheader now if we want. However, this + // would be unsafe to do if there is anything else in the loop that may read + // or write to the aliased location. Check for any overlap by generating the + // base pointer and checking the region. unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace(); Value *BasePtr = Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace), Preheader->getTerminator()); + + if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef, + CurLoop, BECount, + StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){ + Expander.clear(); + // If we generated new code for the base pointer, clean up. + deleteIfDeadInstruction(BasePtr, *SE); + return false; + } + + // Okay, everything looks good, insert the memset. + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); @@ -521,7 +533,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - DeleteDeadInstruction(TheStore, *SE); + deleteDeadInstruction(TheStore, *SE); ++NumMemSet; return true; } @@ -539,41 +551,51 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, LoadInst *LI = cast<LoadInst>(SI->getValueOperand()); + // The trip count of the loop and the base pointer of the addrec SCEV is + // guaranteed to be loop invariant, which means that it should dominate the + // header. This allows us to insert code for it in the preheader. + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + IRBuilder<> Builder(Preheader->getTerminator()); + SCEVExpander Expander(*SE); + // Okay, we have a strided store "p[i]" of a loaded value. We can turn // this into a memcpy in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read - // or write to the stored location (including the load feeding the stores). - // Check for an alias. - if (mayLoopAccessLocation(SI->getPointerOperand(), AliasAnalysis::ModRef, + // or write the memory region we're storing to. This includes the load that + // feeds the stores. Check for an alias by generating the base address and + // checking everything. + Value *StoreBasePtr = + Expander.expandCodeFor(StoreEv->getStart(), + Builder.getInt8PtrTy(SI->getPointerAddressSpace()), + Preheader->getTerminator()); + + if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef, CurLoop, BECount, StoreSize, - getAnalysis<AliasAnalysis>(), SI)) + getAnalysis<AliasAnalysis>(), SI)) { + Expander.clear(); + // If we generated new code for the base pointer, clean up. + deleteIfDeadInstruction(StoreBasePtr, *SE); return false; + } // For a memcpy, we have to make sure that the input array is not being // mutated by the loop. - if (mayLoopAccessLocation(LI->getPointerOperand(), AliasAnalysis::Mod, - CurLoop, BECount, StoreSize, - getAnalysis<AliasAnalysis>(), SI)) - return false; - - // Okay, everything looks good, insert the memcpy. - BasicBlock *Preheader = CurLoop->getLoopPreheader(); - - IRBuilder<> Builder(Preheader->getTerminator()); - - // The trip count of the loop and the base pointer of the addrec SCEV is - // guaranteed to be loop invariant, which means that it should dominate the - // header. Just insert code for it in the preheader. - SCEVExpander Expander(*SE); - Value *LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(), Builder.getInt8PtrTy(LI->getPointerAddressSpace()), Preheader->getTerminator()); - Value *StoreBasePtr = - Expander.expandCodeFor(StoreEv->getStart(), - Builder.getInt8PtrTy(SI->getPointerAddressSpace()), - Preheader->getTerminator()); + + if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount, + StoreSize, getAnalysis<AliasAnalysis>(), SI)) { + Expander.clear(); + // If we generated new code for the base pointer, clean up. + deleteIfDeadInstruction(LoadBasePtr, *SE); + deleteIfDeadInstruction(StoreBasePtr, *SE); + return false; + } + + // Okay, everything is safe, we can transform this! + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. @@ -601,7 +623,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - DeleteDeadInstruction(SI, *SE); + deleteDeadInstruction(SI, *SE); ++NumMemCpy; return true; } |