diff options
author | Chandler Carruth <chandlerc@gmail.com> | 2012-10-01 12:16:54 +0000 |
---|---|---|
committer | Chandler Carruth <chandlerc@gmail.com> | 2012-10-01 12:16:54 +0000 |
commit | 673850aa2dceb8fe1b7a20b72339bf803af8609f (patch) | |
tree | 36c32dd5a1769c76218f88ba8c118ea451d27d7a | |
parent | 90012586f79895a8104e1e65689041fec52d788d (diff) | |
download | external_llvm-673850aa2dceb8fe1b7a20b72339bf803af8609f.zip external_llvm-673850aa2dceb8fe1b7a20b72339bf803af8609f.tar.gz external_llvm-673850aa2dceb8fe1b7a20b72339bf803af8609f.tar.bz2 |
Fix several issues with alignment. We weren't always accounting for type
alignment requirements of the new alloca. As one consequence which was
reported as a bug by Duncan, we overaligned memcpy calls to ranges of
allocas after they were rewritten to types with lower alignment
requirements. Other consquences are possible, but I don't have any test
cases for them.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@164937 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/Scalar/SROA.cpp | 63 | ||||
-rw-r--r-- | test/Transforms/SROA/alignment.ll | 31 |
2 files changed, 63 insertions, 31 deletions
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index c3f95ad..e9a3c38 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -2145,6 +2145,20 @@ private: return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName("")); } + unsigned getAdjustedAlign(uint64_t Offset) { + unsigned NewAIAlign = NewAI.getAlignment(); + if (!NewAIAlign) + NewAIAlign = TD.getABITypeAlignment(NewAI.getAllocatedType()); + return MinAlign(NewAIAlign, Offset); + } + unsigned getAdjustedAlign() { + return getAdjustedAlign(BeginOffset - NewAllocaBeginOffset); + } + + bool isTypeAlignSufficient(Type *Ty) { + return TD.getABITypeAlignment(Ty) >= getAdjustedAlign(); + } + ConstantInt *getIndex(IRBuilder<> &IRB, uint64_t Offset) { assert(VecTy && "Can only call getIndex when rewriting a vector"); uint64_t RelOffset = Offset - NewAllocaBeginOffset; @@ -2253,9 +2267,8 @@ private: Value *NewPtr = getAdjustedAllocaPtr(IRB, LI.getPointerOperand()->getType()); LI.setOperand(0, NewPtr); - if (LI.getAlignment()) - LI.setAlignment(MinAlign(NewAI.getAlignment(), - BeginOffset - NewAllocaBeginOffset)); + if (LI.getAlignment() || !isTypeAlignSufficient(LI.getType())) + LI.setAlignment(getAdjustedAlign()); DEBUG(dbgs() << " to: " << LI << "\n"); deleteIfTriviallyDead(OldOp); @@ -2307,6 +2320,9 @@ private: Value *NewPtr = getAdjustedAllocaPtr(IRB, SI.getPointerOperand()->getType()); SI.setOperand(1, NewPtr); + if (SI.getAlignment() || + !isTypeAlignSufficient(SI.getValueOperand()->getType())) + SI.setAlignment(getAdjustedAlign()); if (SI.getAlignment()) SI.setAlignment(MinAlign(NewAI.getAlignment(), BeginOffset - NewAllocaBeginOffset)); @@ -2325,14 +2341,8 @@ private: // pointer to the new alloca. if (!isa<Constant>(II.getLength())) { II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType())); - Type *CstTy = II.getAlignmentCst()->getType(); - if (!NewAI.getAlignment()) - II.setAlignment(ConstantInt::get(CstTy, 0)); - else - II.setAlignment( - ConstantInt::get(CstTy, MinAlign(NewAI.getAlignment(), - BeginOffset - NewAllocaBeginOffset))); + II.setAlignment(ConstantInt::get(CstTy, getAdjustedAlign())); deleteIfTriviallyDead(OldPtr); return false; @@ -2353,15 +2363,10 @@ private: !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) { Type *SizeTy = II.getLength()->getType(); Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset); - unsigned Align = 1; - if (NewAI.getAlignment()) - Align = MinAlign(NewAI.getAlignment(), - BeginOffset - NewAllocaBeginOffset); - CallInst *New = IRB.CreateMemSet(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType()), - II.getValue(), Size, Align, + II.getValue(), Size, getAdjustedAlign(), II.isVolatile()); (void)New; DEBUG(dbgs() << " to: " << *New << "\n"); @@ -2443,6 +2448,16 @@ private: const AllocaPartitioning::MemTransferOffsets &MTO = P.getMemTransferOffsets(II); + // Compute the relative offset within the transfer. + unsigned IntPtrWidth = TD.getPointerSizeInBits(); + APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin + : MTO.SourceBegin)); + + unsigned Align = II.getAlignment(); + if (Align > 1) + Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(), + MinAlign(II.getAlignment(), getAdjustedAlign())); + // For unsplit intrinsics, we simply modify the source and destination // pointers in place. This isn't just an optimization, it is a matter of // correctness. With unsplit intrinsics we may be dealing with transfers @@ -2458,11 +2473,7 @@ private: II.setSource(getAdjustedAllocaPtr(IRB, II.getRawSource()->getType())); Type *CstTy = II.getAlignmentCst()->getType(); - if (II.getAlignment() > 1) - II.setAlignment(ConstantInt::get( - CstTy, MinAlign(II.getAlignment(), - MinAlign(NewAI.getAlignment(), - BeginOffset - NewAllocaBeginOffset)))); + II.setAlignment(ConstantInt::get(CstTy, Align)); DEBUG(dbgs() << " to: " << II << "\n"); deleteIfTriviallyDead(OldOp); @@ -2474,11 +2485,6 @@ private: // memmove with memcpy, and we don't need to worry about all manner of // downsides to splitting and transforming the operations. - // Compute the relative offset within the transfer. - unsigned IntPtrWidth = TD.getPointerSizeInBits(); - APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin - : MTO.SourceBegin)); - // If this doesn't map cleanly onto the alloca type, and that type isn't // a single value type, just emit a memcpy. bool EmitMemCpy @@ -2521,11 +2527,6 @@ private: OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, getName("." + OtherPtr->getName())); - unsigned Align = II.getAlignment(); - if (Align > 1) - Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(), - MinAlign(II.getAlignment(), NewAI.getAlignment())); - // Strip all inbounds GEPs and pointer casts to try to dig out any root // alloca that should be re-examined after rewriting this instruction. if (AllocaInst *AI diff --git a/test/Transforms/SROA/alignment.ll b/test/Transforms/SROA/alignment.ll index 02a6755..192b77e 100644 --- a/test/Transforms/SROA/alignment.ll +++ b/test/Transforms/SROA/alignment.ll @@ -83,3 +83,34 @@ entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_gep, i8* %x, i32 18, i32 2, i1 false) ret void } + +%struct.S = type { i8, { i64 } } + +define void @test4() { +; This test case triggered very strange alginment behavior with memcpy due to +; strang splitting. Reported by Duncan. +; CHECK: @test4 + +entry: + %D.2113 = alloca %struct.S + %Op = alloca %struct.S + %D.2114 = alloca %struct.S + %gep1 = getelementptr inbounds %struct.S* %Op, i32 0, i32 0 + store i8 0, i8* %gep1, align 8 + %gep2 = getelementptr inbounds %struct.S* %Op, i32 0, i32 1, i32 0 + %cast = bitcast i64* %gep2 to double* + store double 0.000000e+00, double* %cast, align 8 + store i64 0, i64* %gep2, align 8 + %dst1 = bitcast %struct.S* %D.2114 to i8* + %src1 = bitcast %struct.S* %Op to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst1, i8* %src1, i32 16, i32 8, i1 false) + %dst2 = bitcast %struct.S* %D.2113 to i8* + %src2 = bitcast %struct.S* %D.2114 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst2, i8* %src2, i32 16, i32 8, i1 false) +; We get 3 memcpy calls with various reasons to shrink their alignment to 1. +; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 3, i32 1, i1 false) +; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 8, i32 1, i1 false) +; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 11, i32 1, i1 false) + + ret void +} |