diff options
-rw-r--r-- | lib/Transforms/Scalar/ScalarReplAggregates.cpp | 101 | ||||
-rw-r--r-- | test/Transforms/ScalarRepl/copy-aggregate.ll | 26 |
2 files changed, 125 insertions, 2 deletions
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 241e0cd..041fd49 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -122,6 +122,8 @@ namespace { SmallVector<AllocaInst*, 32> &NewElts); void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocationInst *AI, SmallVector<AllocaInst*, 32> &NewElts); + void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, + SmallVector<AllocaInst*, 32> &NewElts); const Type *CanConvertToScalar(Value *V, bool &IsNotTrivial); void ConvertToScalar(AllocationInst *AI, const Type *Ty); @@ -599,6 +601,18 @@ void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocationInst *AI, continue; } return MarkUnsafe(Info); + } else if (LoadInst *LI = dyn_cast<LoadInst>(UI)) { + // If loading the entire alloca in one chunk through a bitcasted pointer + // to integer, we can transform it. This happens (for example) when you + // cast a {i32,i32}* to i64* and load through it. This is similar to the + // memcpy case and occurs in various "byval" cases and emulated memcpys. + if (isa<IntegerType>(LI->getType()) && + TD->getABITypeSize(LI->getType()) == + TD->getABITypeSize(AI->getType()->getElementType())) { + Info.isMemCpySrc = true; + continue; + } + return MarkUnsafe(Info); } else { return MarkUnsafe(Info); } @@ -628,15 +642,21 @@ void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI, } if (StoreInst *SI = dyn_cast<StoreInst>(User)) { - // This must be a store of the entire alloca from an integer. + // If this is a store of the entire alloca from an integer, rewrite it. RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); continue; } + + if (LoadInst *LI = dyn_cast<LoadInst>(User)) { + // If this is a load of the entire alloca to an integer, rewrite it. + RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); + continue; + } // Otherwise it must be some other user of a gep of the first pointer. Just // leave these alone. continue; - } + } } /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. @@ -902,6 +922,83 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, SI->eraseFromParent(); } +/// RewriteLoadUserOfWholeAlloca - We found an load of the entire allocation to +/// an integer. Load the individual pieces to form the aggregate value. +void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, + SmallVector<AllocaInst*, 32> &NewElts) { + // Extract each element out of the NewElts according to its structure offset + // and form the result value. + const Type *AllocaEltTy = AI->getType()->getElementType(); + uint64_t AllocaSizeBits = TD->getABITypeSizeInBits(AllocaEltTy); + + // If this isn't a load of the whole alloca to an integer, it may be a load + // of the first element. Just ignore the load in this case and normal SROA + // will handle it. + if (!isa<IntegerType>(LI->getType()) || + TD->getABITypeSizeInBits(LI->getType()) != AllocaSizeBits) + return; + + DOUT << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << *LI; + + // There are two forms here: AI could be an array or struct. Both cases + // have different ways to compute the element offset. + const StructLayout *Layout = 0; + uint64_t ArrayEltBitOffset = 0; + if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { + Layout = TD->getStructLayout(EltSTy); + } else { + const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); + ArrayEltBitOffset = TD->getABITypeSizeInBits(ArrayEltTy); + } + + Value *ResultVal = Constant::getNullValue(LI->getType()); + + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { + // Load the value from the alloca. If the NewElt is an aggregate, cast + // the pointer to an integer of the same size before doing the load. + Value *SrcField = NewElts[i]; + const Type *FieldTy = + cast<PointerType>(SrcField->getType())->getElementType(); + const IntegerType *FieldIntTy = + IntegerType::get(TD->getTypeSizeInBits(FieldTy)); + if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPoint() && + !isa<VectorType>(FieldTy)) + SrcField = new BitCastInst(SrcField, PointerType::getUnqual(FieldIntTy), + "", LI); + SrcField = new LoadInst(SrcField, "sroa.load.elt", LI); + + // If SrcField is a fp or vector of the right size but that isn't an + // integer type, bitcast to an integer so we can shift it. + if (SrcField->getType() != FieldIntTy) + SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI); + + // Zero extend the field to be the same size as the final alloca so that + // we can shift and insert it. + if (SrcField->getType() != ResultVal->getType()) + SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI); + + // Determine the number of bits to shift SrcField. + uint64_t Shift; + if (Layout) // Struct case. + Shift = Layout->getElementOffsetInBits(i); + else // Array case. + Shift = i*ArrayEltBitOffset; + + if (TD->isBigEndian()) + Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth(); + + if (Shift) { + Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift); + SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI); + } + + ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); + } + + LI->replaceAllUsesWith(ResultVal); + LI->eraseFromParent(); +} + /// HasPadding - Return true if the specified type has any structure or /// alignment padding, false otherwise. diff --git a/test/Transforms/ScalarRepl/copy-aggregate.ll b/test/Transforms/ScalarRepl/copy-aggregate.ll index c3685d0..4ab17ae 100644 --- a/test/Transforms/ScalarRepl/copy-aggregate.ll +++ b/test/Transforms/ScalarRepl/copy-aggregate.ll @@ -29,3 +29,29 @@ define float @test2(i128 %V) nounwind { ret float %c } +;; Load of whole alloca struct as integer +define i64 @test3(i32 %a, i32 %b) nounwind { + %X = alloca {{i32, i32}} + + %A = getelementptr {{i32,i32}}* %X, i32 0, i32 0, i32 0 + %B = getelementptr {{i32,i32}}* %X, i32 0, i32 0, i32 1 + store i32 %a, i32* %A + store i32 %b, i32* %B + + %Y = bitcast {{i32,i32}}* %X to i64* + %Z = load i64* %Y + ret i64 %Z +} + +;; load of integer from whole struct/array alloca. +define i128 @test4(float %a, float %b) nounwind { + %X = alloca {[4 x float]} + %A = getelementptr {[4 x float]}* %X, i32 0, i32 0, i32 0 + %B = getelementptr {[4 x float]}* %X, i32 0, i32 0, i32 3 + store float %a, float* %A + store float %b, float* %B + + %Y = bitcast {[4 x float]}* %X to i128* + %V = load i128* %Y + ret i128 %V +} |