aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2011-05-22 17:39:56 +0000
committerChris Lattner <sabre@nondot.org>2011-05-22 17:39:56 +0000
commit4f81b5419295cfc26a1349d6c23a55c6d2a683e1 (patch)
treeb3b8121012a48cca6bc1350a04794771da37ec5d
parent5649ba70fb39f2fda4791d255ae8bb373071874f (diff)
downloadexternal_llvm-4f81b5419295cfc26a1349d6c23a55c6d2a683e1.zip
external_llvm-4f81b5419295cfc26a1349d6c23a55c6d2a683e1.tar.gz
external_llvm-4f81b5419295cfc26a1349d6c23a55c6d2a683e1.tar.bz2
Fix PR9815: I was trying to get out of "generating code and then
failing to form a memset, then having to delete it" but my approximation isn't safe for self recurrent loops. Instead of doign a hack, just do it the right way. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@131858 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp110
-rw-r--r--test/Transforms/LoopIdiom/basic.ll37
2 files changed, 103 insertions, 44 deletions
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 08d96ed..dbf6eec 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -128,11 +128,11 @@ INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
-/// DeleteDeadInstruction - Delete this instruction. Before we do, go through
+/// deleteDeadInstruction - Delete this instruction. Before we do, go through
/// and zero out all the operands of this instruction. If any of them become
/// dead, delete them and the computation tree that feeds them.
///
-static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
+static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
SmallVector<Instruction*, 32> NowDeadInsts;
NowDeadInsts.push_back(I);
@@ -162,6 +162,14 @@ static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
} while (!NowDeadInsts.empty());
}
+/// deleteIfDeadInstruction - If the specified value is a dead instruction,
+/// delete it and any recursively used instructions.
+static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (isInstructionTriviallyDead(I))
+ deleteDeadInstruction(I, SE);
+}
+
bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
CurLoop = L;
@@ -454,31 +462,35 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
return false;
}
-
- // Okay, we have a strided store "p[i]" of a splattable value. We can turn
- // this into a memset in the loop preheader now if we want. However, this
- // would be unsafe to do if there is anything else in the loop that may read
- // or write to the aliased location. Check for an alias.
- if (mayLoopAccessLocation(DestPtr, AliasAnalysis::ModRef,
- CurLoop, BECount,
- StoreSize, getAnalysis<AliasAnalysis>(), TheStore))
- return false;
-
- // Okay, everything looks good, insert the memset.
- BasicBlock *Preheader = CurLoop->getLoopPreheader();
-
- IRBuilder<> Builder(Preheader->getTerminator());
-
// The trip count of the loop and the base pointer of the addrec SCEV is
// guaranteed to be loop invariant, which means that it should dominate the
- // header. Just insert code for it in the preheader.
+ // header. This allows us to insert code for it in the preheader.
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+ IRBuilder<> Builder(Preheader->getTerminator());
SCEVExpander Expander(*SE);
-
+
+ // Okay, we have a strided store "p[i]" of a splattable value. We can turn
+ // this into a memset in the loop preheader now if we want. However, this
+ // would be unsafe to do if there is anything else in the loop that may read
+ // or write to the aliased location. Check for any overlap by generating the
+ // base pointer and checking the region.
unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
Value *BasePtr =
Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
Preheader->getTerminator());
+
+ if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
+ CurLoop, BECount,
+ StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){
+ Expander.clear();
+ // If we generated new code for the base pointer, clean up.
+ deleteIfDeadInstruction(BasePtr, *SE);
+ return false;
+ }
+
+ // Okay, everything looks good, insert the memset.
+
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
@@ -521,7 +533,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
- DeleteDeadInstruction(TheStore, *SE);
+ deleteDeadInstruction(TheStore, *SE);
++NumMemSet;
return true;
}
@@ -539,41 +551,51 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
+ // The trip count of the loop and the base pointer of the addrec SCEV is
+ // guaranteed to be loop invariant, which means that it should dominate the
+ // header. This allows us to insert code for it in the preheader.
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+ IRBuilder<> Builder(Preheader->getTerminator());
+ SCEVExpander Expander(*SE);
+
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
// this into a memcpy in the loop preheader now if we want. However, this
// would be unsafe to do if there is anything else in the loop that may read
- // or write to the stored location (including the load feeding the stores).
- // Check for an alias.
- if (mayLoopAccessLocation(SI->getPointerOperand(), AliasAnalysis::ModRef,
+ // or write the memory region we're storing to. This includes the load that
+ // feeds the stores. Check for an alias by generating the base address and
+ // checking everything.
+ Value *StoreBasePtr =
+ Expander.expandCodeFor(StoreEv->getStart(),
+ Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
+ Preheader->getTerminator());
+
+ if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef,
CurLoop, BECount, StoreSize,
- getAnalysis<AliasAnalysis>(), SI))
+ getAnalysis<AliasAnalysis>(), SI)) {
+ Expander.clear();
+ // If we generated new code for the base pointer, clean up.
+ deleteIfDeadInstruction(StoreBasePtr, *SE);
return false;
+ }
// For a memcpy, we have to make sure that the input array is not being
// mutated by the loop.
- if (mayLoopAccessLocation(LI->getPointerOperand(), AliasAnalysis::Mod,
- CurLoop, BECount, StoreSize,
- getAnalysis<AliasAnalysis>(), SI))
- return false;
-
- // Okay, everything looks good, insert the memcpy.
- BasicBlock *Preheader = CurLoop->getLoopPreheader();
-
- IRBuilder<> Builder(Preheader->getTerminator());
-
- // The trip count of the loop and the base pointer of the addrec SCEV is
- // guaranteed to be loop invariant, which means that it should dominate the
- // header. Just insert code for it in the preheader.
- SCEVExpander Expander(*SE);
-
Value *LoadBasePtr =
Expander.expandCodeFor(LoadEv->getStart(),
Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
Preheader->getTerminator());
- Value *StoreBasePtr =
- Expander.expandCodeFor(StoreEv->getStart(),
- Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
- Preheader->getTerminator());
+
+ if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount,
+ StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
+ Expander.clear();
+ // If we generated new code for the base pointer, clean up.
+ deleteIfDeadInstruction(LoadBasePtr, *SE);
+ deleteIfDeadInstruction(StoreBasePtr, *SE);
+ return false;
+ }
+
+ // Okay, everything is safe, we can transform this!
+
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
@@ -601,7 +623,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
- DeleteDeadInstruction(SI, *SE);
+ deleteDeadInstruction(SI, *SE);
++NumMemCpy;
return true;
}
diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll
index 485114c..9695418 100644
--- a/test/Transforms/LoopIdiom/basic.ll
+++ b/test/Transforms/LoopIdiom/basic.ll
@@ -347,3 +347,40 @@ for.end: ; preds = %for.body
; CHECK-NOT: store
; CHECK: ret void
}
+
+
+
+; PR9815 - This is a partial overlap case that cannot be safely transformed
+; into a memcpy.
+@g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
+
+define i32 @test14() nounwind {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc, %for.body.lr.ph
+ %tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %add = add nsw i32 %tmp5, 4
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds [7 x i32]* @g_50, i32 0, i64 %idxprom
+ %tmp2 = load i32* %arrayidx, align 4
+ %add4 = add nsw i32 %tmp5, 5
+ %idxprom5 = sext i32 %add4 to i64
+ %arrayidx6 = getelementptr inbounds [7 x i32]* @g_50, i32 0, i64 %idxprom5
+ store i32 %tmp2, i32* %arrayidx6, align 4
+ %inc = add nsw i32 %tmp5, 1
+ %cmp = icmp slt i32 %inc, 2
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.inc
+ %tmp8 = load i32* getelementptr inbounds ([7 x i32]* @g_50, i32 0, i64 6), align 4
+ ret i32 %tmp8
+; CHECK: @test14
+; CHECK: for.body:
+; CHECK: load i32
+; CHECK: store i32
+; CHECK: br i1 %cmp
+
+}
+
+