diff options
author | Chris Lattner <sabre@nondot.org> | 2011-05-22 17:39:56 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2011-05-22 17:39:56 +0000 |
commit | 4f81b5419295cfc26a1349d6c23a55c6d2a683e1 (patch) | |
tree | b3b8121012a48cca6bc1350a04794771da37ec5d | |
parent | 5649ba70fb39f2fda4791d255ae8bb373071874f (diff) | |
download | external_llvm-4f81b5419295cfc26a1349d6c23a55c6d2a683e1.zip external_llvm-4f81b5419295cfc26a1349d6c23a55c6d2a683e1.tar.gz external_llvm-4f81b5419295cfc26a1349d6c23a55c6d2a683e1.tar.bz2 |
Fix PR9815: I was trying to get out of "generating code and then
failing to form a memset, then having to delete it" but my approximation
isn't safe for self recurrent loops. Instead of doign a hack, just
do it the right way.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@131858 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 110 | ||||
-rw-r--r-- | test/Transforms/LoopIdiom/basic.ll | 37 |
2 files changed, 103 insertions, 44 deletions
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 08d96ed..dbf6eec 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -128,11 +128,11 @@ INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } -/// DeleteDeadInstruction - Delete this instruction. Before we do, go through +/// deleteDeadInstruction - Delete this instruction. Before we do, go through /// and zero out all the operands of this instruction. If any of them become /// dead, delete them and the computation tree that feeds them. /// -static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { +static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { SmallVector<Instruction*, 32> NowDeadInsts; NowDeadInsts.push_back(I); @@ -162,6 +162,14 @@ static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { } while (!NowDeadInsts.empty()); } +/// deleteIfDeadInstruction - If the specified value is a dead instruction, +/// delete it and any recursively used instructions. +static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) { + if (Instruction *I = dyn_cast<Instruction>(V)) + if (isInstructionTriviallyDead(I)) + deleteDeadInstruction(I, SE); +} + bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { CurLoop = L; @@ -454,31 +462,35 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, return false; } - - // Okay, we have a strided store "p[i]" of a splattable value. We can turn - // this into a memset in the loop preheader now if we want. However, this - // would be unsafe to do if there is anything else in the loop that may read - // or write to the aliased location. Check for an alias. - if (mayLoopAccessLocation(DestPtr, AliasAnalysis::ModRef, - CurLoop, BECount, - StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) - return false; - - // Okay, everything looks good, insert the memset. - BasicBlock *Preheader = CurLoop->getLoopPreheader(); - - IRBuilder<> Builder(Preheader->getTerminator()); - // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the - // header. Just insert code for it in the preheader. + // header. This allows us to insert code for it in the preheader. + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + IRBuilder<> Builder(Preheader->getTerminator()); SCEVExpander Expander(*SE); - + + // Okay, we have a strided store "p[i]" of a splattable value. We can turn + // this into a memset in the loop preheader now if we want. However, this + // would be unsafe to do if there is anything else in the loop that may read + // or write to the aliased location. Check for any overlap by generating the + // base pointer and checking the region. unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace(); Value *BasePtr = Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace), Preheader->getTerminator()); + + if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef, + CurLoop, BECount, + StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){ + Expander.clear(); + // If we generated new code for the base pointer, clean up. + deleteIfDeadInstruction(BasePtr, *SE); + return false; + } + + // Okay, everything looks good, insert the memset. + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); @@ -521,7 +533,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - DeleteDeadInstruction(TheStore, *SE); + deleteDeadInstruction(TheStore, *SE); ++NumMemSet; return true; } @@ -539,41 +551,51 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, LoadInst *LI = cast<LoadInst>(SI->getValueOperand()); + // The trip count of the loop and the base pointer of the addrec SCEV is + // guaranteed to be loop invariant, which means that it should dominate the + // header. This allows us to insert code for it in the preheader. + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + IRBuilder<> Builder(Preheader->getTerminator()); + SCEVExpander Expander(*SE); + // Okay, we have a strided store "p[i]" of a loaded value. We can turn // this into a memcpy in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read - // or write to the stored location (including the load feeding the stores). - // Check for an alias. - if (mayLoopAccessLocation(SI->getPointerOperand(), AliasAnalysis::ModRef, + // or write the memory region we're storing to. This includes the load that + // feeds the stores. Check for an alias by generating the base address and + // checking everything. + Value *StoreBasePtr = + Expander.expandCodeFor(StoreEv->getStart(), + Builder.getInt8PtrTy(SI->getPointerAddressSpace()), + Preheader->getTerminator()); + + if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef, CurLoop, BECount, StoreSize, - getAnalysis<AliasAnalysis>(), SI)) + getAnalysis<AliasAnalysis>(), SI)) { + Expander.clear(); + // If we generated new code for the base pointer, clean up. + deleteIfDeadInstruction(StoreBasePtr, *SE); return false; + } // For a memcpy, we have to make sure that the input array is not being // mutated by the loop. - if (mayLoopAccessLocation(LI->getPointerOperand(), AliasAnalysis::Mod, - CurLoop, BECount, StoreSize, - getAnalysis<AliasAnalysis>(), SI)) - return false; - - // Okay, everything looks good, insert the memcpy. - BasicBlock *Preheader = CurLoop->getLoopPreheader(); - - IRBuilder<> Builder(Preheader->getTerminator()); - - // The trip count of the loop and the base pointer of the addrec SCEV is - // guaranteed to be loop invariant, which means that it should dominate the - // header. Just insert code for it in the preheader. - SCEVExpander Expander(*SE); - Value *LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(), Builder.getInt8PtrTy(LI->getPointerAddressSpace()), Preheader->getTerminator()); - Value *StoreBasePtr = - Expander.expandCodeFor(StoreEv->getStart(), - Builder.getInt8PtrTy(SI->getPointerAddressSpace()), - Preheader->getTerminator()); + + if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount, + StoreSize, getAnalysis<AliasAnalysis>(), SI)) { + Expander.clear(); + // If we generated new code for the base pointer, clean up. + deleteIfDeadInstruction(LoadBasePtr, *SE); + deleteIfDeadInstruction(StoreBasePtr, *SE); + return false; + } + + // Okay, everything is safe, we can transform this! + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. @@ -601,7 +623,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - DeleteDeadInstruction(SI, *SE); + deleteDeadInstruction(SI, *SE); ++NumMemCpy; return true; } diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll index 485114c..9695418 100644 --- a/test/Transforms/LoopIdiom/basic.ll +++ b/test/Transforms/LoopIdiom/basic.ll @@ -347,3 +347,40 @@ for.end: ; preds = %for.body ; CHECK-NOT: store ; CHECK: ret void } + + + +; PR9815 - This is a partial overlap case that cannot be safely transformed +; into a memcpy. +@g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16 + +define i32 @test14() nounwind { +entry: + br label %for.body + +for.body: ; preds = %for.inc, %for.body.lr.ph + %tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %add = add nsw i32 %tmp5, 4 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds [7 x i32]* @g_50, i32 0, i64 %idxprom + %tmp2 = load i32* %arrayidx, align 4 + %add4 = add nsw i32 %tmp5, 5 + %idxprom5 = sext i32 %add4 to i64 + %arrayidx6 = getelementptr inbounds [7 x i32]* @g_50, i32 0, i64 %idxprom5 + store i32 %tmp2, i32* %arrayidx6, align 4 + %inc = add nsw i32 %tmp5, 1 + %cmp = icmp slt i32 %inc, 2 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.inc + %tmp8 = load i32* getelementptr inbounds ([7 x i32]* @g_50, i32 0, i64 6), align 4 + ret i32 %tmp8 +; CHECK: @test14 +; CHECK: for.body: +; CHECK: load i32 +; CHECK: store i32 +; CHECK: br i1 %cmp + +} + + |