aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2009-01-07 08:11:13 +0000
committerChris Lattner <sabre@nondot.org>2009-01-07 08:11:13 +0000
commitd2fa781169175b827e50953a1d0b7edc6b0c4801 (patch)
treec2421b82d9706cbc476aa1e887390ecbae0edfae
parentd93afec1dbbb1abb3df55e2e007b5f256d09f84a (diff)
downloadexternal_llvm-d2fa781169175b827e50953a1d0b7edc6b0c4801.zip
external_llvm-d2fa781169175b827e50953a1d0b7edc6b0c4801.tar.gz
external_llvm-d2fa781169175b827e50953a1d0b7edc6b0c4801.tar.bz2
Implement the first half of PR3290: if there is a store of an
integer to a (transitive) bitcast the alloca and if that integer has the full size of the alloca, then it clobbers the whole thing. Handle this by extracting pieces out of the stored integer and filing them away in the SROA'd elements. This triggers fairly frequently because the CFE uses integers to pass small structs by value and the inliner exposes these. For example, in kimwitu++, I see a bunch of these with i64 stores to "%struct.std::pair<std::_Rb_tree_const_iterator<kc::impl_abstract_phylum*>,bool>" In 176.gcc I see a few i32 stores to "%struct..0anon". In the testcase, this is a difference between compiling test1 to: _test1: subl $12, %esp movl 20(%esp), %eax movl %eax, 4(%esp) movl 16(%esp), %eax movl %eax, (%esp) movl (%esp), %eax addl 4(%esp), %eax addl $12, %esp ret vs: _test1: movl 8(%esp), %eax addl 4(%esp), %eax ret The second half of this will be to handle loads of the same form. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61853 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp138
-rw-r--r--test/Transforms/ScalarRepl/copy-aggregate.ll31
2 files changed, 164 insertions, 5 deletions
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 23684e4..241e0cd 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -120,7 +120,8 @@ namespace {
void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
AllocationInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
-
+ void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocationInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts);
const Type *CanConvertToScalar(Value *V, bool &IsNotTrivial);
void ConvertToScalar(AllocationInst *AI, const Type *Ty);
@@ -586,6 +587,18 @@ void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocationInst *AI,
isSafeUseOfBitCastedAllocation(BCU, AI, Info);
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) {
isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
+ // If storing the entire alloca in one chunk through a bitcasted pointer
+ // to integer, we can transform it. This happens (for example) when you
+ // cast a {i32,i32}* to i64* and store through it. This is similar to the
+ // memcpy case and occurs in various "byval" cases and emulated memcpys.
+ if (isa<IntegerType>(SI->getOperand(0)->getType()) &&
+ TD->getABITypeSize(SI->getOperand(0)->getType()) ==
+ TD->getABITypeSize(AI->getType()->getElementType())) {
+ Info.isMemCpyDst = true;
+ continue;
+ }
+ return MarkUnsafe(Info);
} else {
return MarkUnsafe(Info);
}
@@ -603,7 +616,7 @@ void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
Instruction *User = cast<Instruction>(*UI++);
if (BitCastInst *BCU = dyn_cast<BitCastInst>(User)) {
RewriteBitCastUserOfAlloca(BCU, AI, NewElts);
- BCU->eraseFromParent();
+ if (BCU->use_empty()) BCU->eraseFromParent();
continue;
}
@@ -611,12 +624,17 @@ void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
// This must be memcpy/memmove/memset of the entire aggregate.
// Split into one per element.
RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts);
- MI->eraseFromParent();
continue;
}
- // If it's not a mem intrinsic, it must be some other user of a gep of the
- // first pointer. Just leave these alone.
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // This must be a store of the entire alloca from an integer.
+ RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
+ continue;
+ }
+
+ // Otherwise it must be some other user of a gep of the first pointer. Just
+ // leave these alone.
continue;
}
}
@@ -772,8 +790,118 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
}
}
+ MI->eraseFromParent();
}
+
+/// RewriteStoreUserOfWholeAlloca - We found an store of an integer that
+/// overwrites the entire allocation. Extract out the pieces of the stored
+/// integer and store them individually.
+void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
+ AllocationInst *AI,
+ SmallVector<AllocaInst*, 32> &NewElts){
+ // Extract each element out of the integer according to its structure offset
+ // and store the element value to the individual alloca.
+ Value *SrcVal = SI->getOperand(0);
+ const Type *AllocaEltTy = AI->getType()->getElementType();
+ uint64_t AllocaSizeBits = TD->getABITypeSizeInBits(AllocaEltTy);
+ // If this isn't a store of an integer to the whole alloca, it may be a store
+ // to the first element. Just ignore the store in this case and normal SROA
+ // will handle it.
+ if (!isa<IntegerType>(SrcVal->getType()) ||
+ TD->getABITypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
+ return;
+
+ DOUT << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << *SI;
+
+ // There are two forms here: AI could be an array or struct. Both cases
+ // have different ways to compute the element offset.
+ if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
+ const StructLayout *Layout = TD->getStructLayout(EltSTy);
+
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+ // Get the number of bits to shift SrcVal to get the value.
+ const Type *FieldTy = EltSTy->getElementType(i);
+ uint64_t Shift = Layout->getElementOffsetInBits(i);
+
+ if (TD->isBigEndian())
+ Shift = AllocaSizeBits-Shift-TD->getABITypeSizeInBits(FieldTy);
+
+ Value *EltVal = SrcVal;
+ if (Shift) {
+ Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
+ EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
+ "sroa.store.elt", SI);
+ }
+
+ // Truncate down to an integer of the right size.
+ uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
+ if (FieldSizeBits != AllocaSizeBits)
+ EltVal = new TruncInst(EltVal, IntegerType::get(FieldSizeBits), "", SI);
+ Value *DestField = NewElts[i];
+ if (EltVal->getType() == FieldTy) {
+ // Storing to an integer field of this size, just do it.
+ } else if (FieldTy->isFloatingPoint() || isa<VectorType>(FieldTy)) {
+ // Bitcast to the right element type (for fp/vector values).
+ EltVal = new BitCastInst(EltVal, FieldTy, "", SI);
+ } else {
+ // Otherwise, bitcast the dest pointer (for aggregates).
+ DestField = new BitCastInst(DestField,
+ PointerType::getUnqual(EltVal->getType()),
+ "", SI);
+ }
+ new StoreInst(EltVal, DestField, SI);
+ }
+
+ } else {
+ const ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
+ const Type *ArrayEltTy = ATy->getElementType();
+ uint64_t ElementOffset = TD->getABITypeSizeInBits(ArrayEltTy);
+ uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
+
+ uint64_t Shift;
+
+ if (TD->isBigEndian())
+ Shift = AllocaSizeBits-ElementOffset;
+ else
+ Shift = 0;
+
+ for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+
+ Value *EltVal = SrcVal;
+ if (Shift) {
+ Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
+ EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
+ "sroa.store.elt", SI);
+ }
+
+ // Truncate down to an integer of the right size.
+ if (ElementSizeBits != AllocaSizeBits)
+ EltVal = new TruncInst(EltVal, IntegerType::get(ElementSizeBits),"",SI);
+ Value *DestField = NewElts[i];
+ if (EltVal->getType() == ArrayEltTy) {
+ // Storing to an integer field of this size, just do it.
+ } else if (ArrayEltTy->isFloatingPoint() || isa<VectorType>(ArrayEltTy)) {
+ // Bitcast to the right element type (for fp/vector values).
+ EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI);
+ } else {
+ // Otherwise, bitcast the dest pointer (for aggregates).
+ DestField = new BitCastInst(DestField,
+ PointerType::getUnqual(EltVal->getType()),
+ "", SI);
+ }
+ new StoreInst(EltVal, DestField, SI);
+
+ if (TD->isBigEndian())
+ Shift -= ElementOffset;
+ else
+ Shift += ElementOffset;
+ }
+ }
+
+ SI->eraseFromParent();
+}
+
/// HasPadding - Return true if the specified type has any structure or
/// alignment padding, false otherwise.
diff --git a/test/Transforms/ScalarRepl/copy-aggregate.ll b/test/Transforms/ScalarRepl/copy-aggregate.ll
new file mode 100644
index 0000000..c3685d0
--- /dev/null
+++ b/test/Transforms/ScalarRepl/copy-aggregate.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep alloca
+; PR3290
+
+;; Store of integer to whole alloca struct.
+define i32 @test1(i64 %V) nounwind {
+ %X = alloca {{i32, i32}}
+ %Y = bitcast {{i32,i32}}* %X to i64*
+ store i64 %V, i64* %Y
+
+ %A = getelementptr {{i32,i32}}* %X, i32 0, i32 0, i32 0
+ %B = getelementptr {{i32,i32}}* %X, i32 0, i32 0, i32 1
+ %a = load i32* %A
+ %b = load i32* %B
+ %c = add i32 %a, %b
+ ret i32 %c
+}
+
+;; Store of integer to whole struct/array alloca.
+define float @test2(i128 %V) nounwind {
+ %X = alloca {[4 x float]}
+ %Y = bitcast {[4 x float]}* %X to i128*
+ store i128 %V, i128* %Y
+
+ %A = getelementptr {[4 x float]}* %X, i32 0, i32 0, i32 0
+ %B = getelementptr {[4 x float]}* %X, i32 0, i32 0, i32 3
+ %a = load float* %A
+ %b = load float* %B
+ %c = add float %a, %b
+ ret float %c
+}
+