diff options
-rw-r--r-- | lib/Transforms/Scalar/SROA.cpp | 16 | ||||
-rw-r--r-- | test/Transforms/SROA/basictest.ll | 45 |
2 files changed, 40 insertions, 21 deletions
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 04e350c..c03b37d 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -662,11 +662,14 @@ private: bool Inserted = false; llvm::tie(PMI, Inserted) = MemTransferPartitionMap.insert(std::make_pair(&II, NewIdx)); - if (!Inserted && Offsets.IsSplittable) { + if (Offsets.IsSplittable && + (!Inserted || II.getRawSource() == II.getRawDest())) { // We've found a memory transfer intrinsic which refers to the alloca as - // both a source and dest. We refuse to split these to simplify splitting - // logic. If possible, SROA will still split them into separate allocas - // and then re-analyze. + // both a source and dest. This is detected either by direct equality of + // the operand values, or when we visit the intrinsic twice due to two + // different chains of values leading to it. We refuse to split these to + // simplify splitting logic. If possible, SROA will still split them into + // separate allocas and then re-analyze. Offsets.IsSplittable = false; P.Partitions[PMI->second].IsSplittable = false; P.Partitions[NewIdx].IsSplittable = false; @@ -2228,10 +2231,7 @@ private: // alloca that should be re-examined after rewriting this instruction. if (AllocaInst *AI = dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) - // Don't revisit the alloca if both sides of the memory transfer are - // referring to the same alloca. - if (AI != &NewAI) - Pass.Worklist.insert(AI); + Pass.Worklist.insert(AI); if (EmitMemCpy) { Value *OurPtr diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll index f7a6e0e..e31315f 100644 --- a/test/Transforms/SROA/basictest.ll +++ b/test/Transforms/SROA/basictest.ll @@ -856,26 +856,45 @@ entry: ret i8 %result } -%test22.struct = type { i8 } +%PR13916.struct = type { i8 } + +define void @PR13916.1() { +; Ensure that we handle overlapping memcpy intrinsics correctly, especially in +; the case where there is a directly identical value for both source and dest. +; CHECK: @PR13916.1 +; FIXME: We shouldn't leave this alloca around. +; CHECK: alloca +; CHECK: ret void -define void @test22() { -; CHECK: @test22 -; CHECK-NOT: alloca +entry: + %a = alloca i8 + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 1, i32 1, i1 false) + %tmp2 = load i8* %a + ret void +} + +define void @PR13916.2() { +; Check whether we continue to handle them correctly when they start off with +; different pointer value chains, but during rewriting we coalesce them into the +; same value. +; CHECK: @PR13916.2 +; FIXME: We shouldn't leave this alloca around. +; CHECK: alloca ; CHECK: ret void -; PR13916 entry: - %A = alloca %test22.struct + %a = alloca %PR13916.struct, align 1 br i1 undef, label %if.then, label %if.end -if.then: ; preds = %entry - %tmp = bitcast %test22.struct* %A to i8* - %tmp1 = bitcast %test22.struct* %A to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* %tmp1, i32 1, i32 1, i1 false) - unreachable +if.then: + %tmp0 = bitcast %PR13916.struct* %a to i8* + %tmp1 = bitcast %PR13916.struct* %a to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp0, i8* %tmp1, i32 1, i32 1, i1 false) + br label %if.end -if.end: ; preds = %entry - %tmp2 = load %test22.struct* %A +if.end: + %gep = getelementptr %PR13916.struct* %a, i32 0, i32 0 + %tmp2 = load i8* %gep ret void } |