aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Scalar/ScalarReplAggregates.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar/ScalarReplAggregates.cpp')
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp139
1 files changed, 127 insertions, 12 deletions
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index bc6035e..1f64ad2 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -295,12 +295,16 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
/// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy)
/// so far at the offset specified by Offset (which is specified in bytes).
///
-/// There are two cases we handle here:
+/// There are three cases we handle here:
/// 1) A union of vector types of the same size and potentially its elements.
/// Here we turn element accesses into insert/extract element operations.
/// This promotes a <4 x float> with a store of float to the third element
/// into a <4 x float> that uses insert element.
-/// 2) A fully general blob of memory, which we turn into some (potentially
+/// 2) A union of vector types with power-of-2 size differences, e.g. a float,
+/// <2 x float> and <4 x float>. Here we turn element accesses into insert
+/// and extract element operations, and <2 x float> accesses into a cast to
+/// <2 x double>, an extract, and a cast back to <2 x float>.
+/// 3) A fully general blob of memory, which we turn into some (potentially
/// large) integer type with extract and insert operations where the loads
/// and stores would mutate the memory. We mark this by setting VectorTy
/// to VoidTy.
@@ -346,18 +350,68 @@ bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
// Remember if we saw a vector type.
HadAVector = true;
- if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
- // If we're storing/loading a vector of the right size, allow it as a
- // vector. If this the first vector we see, remember the type so that
- // we know the element size. If this is a subsequent access, ignore it
- // even if it is a differing type but the same size. Worst case we can
- // bitcast the resultant vectors.
- if (VectorTy == 0)
- VectorTy = VInTy;
+ // TODO: Support nonzero offsets?
+ if (Offset != 0)
+ return false;
+
+ // Only allow vectors that are a power-of-2 away from the size of the alloca.
+ if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8)))
+ return false;
+
+ // If this the first vector we see, remember the type so that we know the
+ // element size.
+ if (!VectorTy) {
+ VectorTy = VInTy;
return true;
}
- return false;
+ unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
+ unsigned InBitWidth = VInTy->getBitWidth();
+
+ // Vectors of the same size can be converted using a simple bitcast.
+ if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8))
+ return true;
+
+ const Type *ElementTy = cast<VectorType>(VectorTy)->getElementType();
+ const Type *InElementTy = cast<VectorType>(VectorTy)->getElementType();
+
+ // Do not allow mixed integer and floating-point accesses from vectors of
+ // different sizes.
+ if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy())
+ return false;
+
+ if (ElementTy->isFloatingPointTy()) {
+ // Only allow floating-point vectors of different sizes if they have the
+ // same element type.
+ // TODO: This could be loosened a bit, but would anything benefit?
+ if (ElementTy != InElementTy)
+ return false;
+
+ // There are no arbitrary-precision floating-point types, which limits the
+ // number of legal vector types with larger element types that we can form
+ // to bitcast and extract a subvector.
+ // TODO: We could support some more cases with mixed fp128 and double here.
+ if (!(BitWidth == 64 || BitWidth == 128) ||
+ !(InBitWidth == 64 || InBitWidth == 128))
+ return false;
+ } else {
+ assert(ElementTy->isIntegerTy() && "Vector elements must be either integer "
+ "or floating-point.");
+ unsigned BitWidth = ElementTy->getPrimitiveSizeInBits();
+ unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits();
+
+ // Do not allow integer types smaller than a byte or types whose widths are
+ // not a multiple of a byte.
+ if (BitWidth < 8 || InBitWidth < 8 ||
+ BitWidth % 8 != 0 || InBitWidth % 8 != 0)
+ return false;
+ }
+
+ // Pick the largest of the two vector types.
+ if (InBitWidth > BitWidth)
+ VectorTy = VInTy;
+
+ return true;
}
/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
@@ -586,6 +640,26 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
}
}
+/// getScaledElementType - Gets a scaled element type for a partial vector
+/// access of an alloca. The input type must be an integer or float, and
+/// the resulting type must be an integer, float or double.
+static const Type *getScaledElementType(const Type *OldTy, unsigned Scale) {
+ assert((OldTy->isIntegerTy() || OldTy->isFloatTy()) && "Partial vector "
+ "accesses must be scaled from integer or float elements.");
+
+ LLVMContext &Context = OldTy->getContext();
+ unsigned Size = OldTy->getPrimitiveSizeInBits() * Scale;
+
+ if (OldTy->isIntegerTy())
+ return Type::getIntNTy(Context, Size);
+ if (Size == 32)
+ return Type::getFloatTy(Context);
+ if (Size == 64)
+ return Type::getDoubleTy(Context);
+
+ llvm_unreachable("Invalid type for a partial vector access of an alloca!");
+}
+
/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
/// or vector value FromVal, extracting the bits from the offset specified by
/// Offset. This returns the value, which is of type ToType.
@@ -606,8 +680,27 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
// If the result alloca is a vector type, this is either an element
// access or a bitcast to another vector type of the same size.
if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
- if (ToType->isVectorTy())
+ if (ToType->isVectorTy()) {
+ if (isPowerOf2_64(AllocaSize / TD.getTypeAllocSize(ToType))) {
+ assert(Offset == 0 && "Can't extract a value of a smaller vector type "
+ "from a nonzero offset.");
+
+ const Type *ToElementTy = cast<VectorType>(ToType)->getElementType();
+ unsigned Scale = AllocaSize / TD.getTypeAllocSize(ToType);
+ const Type *CastElementTy = getScaledElementType(ToElementTy, Scale);
+ unsigned NumCastVectorElements = VTy->getNumElements() / Scale;
+
+ LLVMContext &Context = FromVal->getContext();
+ const Type *CastTy = VectorType::get(CastElementTy,
+ NumCastVectorElements);
+ Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp");
+ Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get(
+ Type::getInt32Ty(Context), 0), "tmp");
+ return Builder.CreateBitCast(Extract, ToType, "tmp");
+ }
+
return Builder.CreateBitCast(FromVal, ToType, "tmp");
+ }
// Otherwise it must be an element access.
unsigned Elt = 0;
@@ -728,6 +821,28 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
if (ValSize == VecSize)
return Builder.CreateBitCast(SV, AllocaType, "tmp");
+ if (SV->getType()->isVectorTy() && isPowerOf2_64(VecSize / ValSize)) {
+ assert(Offset == 0 && "Can't insert a value of a smaller vector type at "
+ "a nonzero offset.");
+
+ const Type *ToElementTy =
+ cast<VectorType>(SV->getType())->getElementType();
+ unsigned Scale = VecSize / ValSize;
+ const Type *CastElementTy = getScaledElementType(ToElementTy, Scale);
+ unsigned NumCastVectorElements = VTy->getNumElements() / Scale;
+
+ LLVMContext &Context = SV->getContext();
+ const Type *OldCastTy = VectorType::get(CastElementTy,
+ NumCastVectorElements);
+ Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp");
+
+ Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp");
+ Value *Insert =
+ Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get(
+ Type::getInt32Ty(Context), 0), "tmp");
+ return Builder.CreateBitCast(Insert, AllocaType, "tmp");
+ }
+
uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
// Must be an element insertion.