aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp2
-rw-r--r--lib/Transforms/Scalar/GVN.cpp10
-rw-r--r--lib/Transforms/Scalar/GlobalMerge.cpp83
-rw-r--r--lib/Transforms/Scalar/SROA.cpp431
4 files changed, 306 insertions, 220 deletions
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index d71dd5d..015fd2e 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -154,7 +154,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
/// This optimization identifies DIV instructions that can be
/// profitably bypassed and carried out with a shorter, faster divide.
- if (TLI && TLI->isSlowDivBypassed()) {
+ if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
const DenseMap<unsigned int, unsigned int> &BypassWidths =
TLI->getBypassSlowDivWidths();
for (Function::iterator I = F.begin(); I != F.end(); I++)
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index c04b447..129af8d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1714,7 +1714,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return true;
}
-static void patchReplacementInstruction(Value *Repl, Instruction *I) {
+static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// Patch the replacement so that it is not more restrictive than the value
// being replaced.
BinaryOperator *Op = dyn_cast<BinaryOperator>(I);
@@ -1756,8 +1756,8 @@ static void patchReplacementInstruction(Value *Repl, Instruction *I) {
}
}
-static void patchAndReplaceAllUsesWith(Value *Repl, Instruction *I) {
- patchReplacementInstruction(Repl, I);
+static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
+ patchReplacementInstruction(I, Repl);
I->replaceAllUsesWith(Repl);
}
@@ -1919,7 +1919,7 @@ bool GVN::processLoad(LoadInst *L) {
}
// Remove it!
- patchAndReplaceAllUsesWith(AvailableVal, L);
+ patchAndReplaceAllUsesWith(L, AvailableVal);
if (DepLI->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(DepLI);
markInstructionForDeletion(L);
@@ -2260,7 +2260,7 @@ bool GVN::processInstruction(Instruction *I) {
}
// Remove it!
- patchAndReplaceAllUsesWith(repl, I);
+ patchAndReplaceAllUsesWith(I, repl);
if (MD && repl->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(repl);
markInstructionForDeletion(I);
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 1601a8d..14e463a 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -53,6 +53,7 @@
#define DEBUG_TYPE "global-merge"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
@@ -60,14 +61,22 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
+static cl::opt<bool>
+EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
+ cl::desc("Enable global merge pass on constants"),
+ cl::init(false));
+
STATISTIC(NumMerged , "Number of globals merged");
namespace {
class GlobalMerge : public FunctionPass {
@@ -78,6 +87,23 @@ namespace {
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const;
+ /// \brief Check if the given variable has been identified as must keep
+ /// \pre setMustKeepGlobalVariables must have been called on the Module that
+ /// contains GV
+ bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
+ return MustKeepGlobalVariables.count(GV);
+ }
+
+ /// Collect every variables marked as "used" or used in a landing pad
+ /// instruction for this Module.
+ void setMustKeepGlobalVariables(Module &M);
+
+ /// Collect every variables marked as "used"
+ void collectUsedGlobalVariables(Module &M);
+
+ /// Keep track of the GlobalVariable that are marked as "used"
+ SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables;
+
public:
static char ID; // Pass identification, replacement for typeid.
explicit GlobalMerge(const TargetLowering *tli = 0)
@@ -169,6 +195,46 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
return true;
}
+void GlobalMerge::collectUsedGlobalVariables(Module &M) {
+ // Extract global variables from llvm.used array
+ const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+ if (!GV || !GV->hasInitializer()) return;
+
+ // Should be an array of 'i8*'.
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (const GlobalVariable *G =
+ dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(G);
+}
+
+void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
+ // If we already processed a Module, UsedGlobalVariables may have been
+ // populated. Reset the information for this module.
+ MustKeepGlobalVariables.clear();
+ collectUsedGlobalVariables(M);
+
+ for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn;
+ ++IFn) {
+ for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end();
+ IBB != IEndBB; ++IBB) {
+ // Follow the inwoke link to find the landing pad instruction
+ const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator());
+ if (!II) continue;
+
+ const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst();
+ // Look for globals in the clauses of the landing pad instruction
+ for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses();
+ Idx != NumClauses; ++Idx)
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(LPInst->getClause(Idx)
+ ->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(GV);
+ }
+ }
+}
bool GlobalMerge::doInitialization(Module &M) {
DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
@@ -176,6 +242,7 @@ bool GlobalMerge::doInitialization(Module &M) {
const DataLayout *TD = TLI->getDataLayout();
unsigned MaxOffset = TLI->getMaximalGlobalOffset();
bool Changed = false;
+ setMustKeepGlobalVariables(M);
// Grab all non-const globals.
for (Module::global_iterator I = M.global_begin(),
@@ -200,6 +267,12 @@ bool GlobalMerge::doInitialization(Module &M) {
I->getName().startswith(".llvm."))
continue;
+ // Ignore all "required" globals:
+ // - the ones used for EH
+ // - the ones marked with "used" attribute
+ if (isMustKeepGlobalVariable(I))
+ continue;
+
if (TD->getTypeAllocSize(Ty) < MaxOffset) {
if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine())
.isBSSLocal())
@@ -221,11 +294,11 @@ bool GlobalMerge::doInitialization(Module &M) {
if (I->second.size() > 1)
Changed |= doMerge(I->second, M, false, I->first);
- // FIXME: This currently breaks the EH processing due to way how the
- // typeinfo detection works. We might want to detect the TIs and ignore
- // them in the future.
- // if (ConstGlobals.size() > 1)
- // Changed |= doMerge(ConstGlobals, M, true);
+ if (EnableGlobalMergeOnConst)
+ for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+ I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I)
+ if (I->second.size() > 1)
+ Changed |= doMerge(I->second, M, true, I->first);
return Changed;
}
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index e90fe90..0e57e5c 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -69,112 +69,130 @@ static cl::opt<bool>
ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden);
namespace {
-/// \brief Alloca partitioning representation.
-///
-/// This class represents a partitioning of an alloca into slices, and
-/// information about the nature of uses of each slice of the alloca. The goal
-/// is that this information is sufficient to decide if and how to split the
-/// alloca apart and replace slices with scalars. It is also intended that this
-/// structure can capture the relevant information needed both to decide about
-/// and to enact these transformations.
-class AllocaPartitioning {
-public:
- /// \brief A common base class for representing a half-open byte range.
- struct ByteRange {
- /// \brief The beginning offset of the range.
- uint64_t BeginOffset;
+/// \brief A common base class for representing a half-open byte range.
+struct ByteRange {
+ /// \brief The beginning offset of the range.
+ uint64_t BeginOffset;
- /// \brief The ending offset, not included in the range.
- uint64_t EndOffset;
+ /// \brief The ending offset, not included in the range.
+ uint64_t EndOffset;
- ByteRange() : BeginOffset(), EndOffset() {}
- ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
- : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
+ ByteRange() : BeginOffset(), EndOffset() {}
+ ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
+ : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
- /// \brief Support for ordering ranges.
- ///
- /// This provides an ordering over ranges such that start offsets are
- /// always increasing, and within equal start offsets, the end offsets are
- /// decreasing. Thus the spanning range comes first in a cluster with the
- /// same start position.
- bool operator<(const ByteRange &RHS) const {
- if (BeginOffset < RHS.BeginOffset) return true;
- if (BeginOffset > RHS.BeginOffset) return false;
- if (EndOffset > RHS.EndOffset) return true;
- return false;
- }
+ /// \brief Support for ordering ranges.
+ ///
+ /// This provides an ordering over ranges such that start offsets are
+ /// always increasing, and within equal start offsets, the end offsets are
+ /// decreasing. Thus the spanning range comes first in a cluster with the
+ /// same start position.
+ bool operator<(const ByteRange &RHS) const {
+ if (BeginOffset < RHS.BeginOffset) return true;
+ if (BeginOffset > RHS.BeginOffset) return false;
+ if (EndOffset > RHS.EndOffset) return true;
+ return false;
+ }
- /// \brief Support comparison with a single offset to allow binary searches.
- friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
- return LHS.BeginOffset < RHSOffset;
- }
+ /// \brief Support comparison with a single offset to allow binary searches.
+ friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
+ return LHS.BeginOffset < RHSOffset;
+ }
- friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
- const ByteRange &RHS) {
- return LHSOffset < RHS.BeginOffset;
- }
+ friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
+ const ByteRange &RHS) {
+ return LHSOffset < RHS.BeginOffset;
+ }
- bool operator==(const ByteRange &RHS) const {
- return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
- }
- bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
- };
+ bool operator==(const ByteRange &RHS) const {
+ return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
+ }
+ bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
+};
- /// \brief A partition of an alloca.
+/// \brief A partition of an alloca.
+///
+/// This structure represents a contiguous partition of the alloca. These are
+/// formed by examining the uses of the alloca. During formation, they may
+/// overlap but once an AllocaPartitioning is built, the Partitions within it
+/// are all disjoint.
+struct Partition : public ByteRange {
+ /// \brief Whether this partition is splittable into smaller partitions.
///
- /// This structure represents a contiguous partition of the alloca. These are
- /// formed by examining the uses of the alloca. During formation, they may
- /// overlap but once an AllocaPartitioning is built, the Partitions within it
- /// are all disjoint.
- struct Partition : public ByteRange {
- /// \brief Whether this partition is splittable into smaller partitions.
- ///
- /// We flag partitions as splittable when they are formed entirely due to
- /// accesses by trivially splittable operations such as memset and memcpy.
- bool IsSplittable;
-
- /// \brief Test whether a partition has been marked as dead.
- bool isDead() const {
- if (BeginOffset == UINT64_MAX) {
- assert(EndOffset == UINT64_MAX);
- return true;
- }
- return false;
+ /// We flag partitions as splittable when they are formed entirely due to
+ /// accesses by trivially splittable operations such as memset and memcpy.
+ bool IsSplittable;
+
+ /// \brief Test whether a partition has been marked as dead.
+ bool isDead() const {
+ if (BeginOffset == UINT64_MAX) {
+ assert(EndOffset == UINT64_MAX);
+ return true;
}
+ return false;
+ }
- /// \brief Kill a partition.
- /// This is accomplished by setting both its beginning and end offset to
- /// the maximum possible value.
- void kill() {
- assert(!isDead() && "He's Dead, Jim!");
- BeginOffset = EndOffset = UINT64_MAX;
- }
+ /// \brief Kill a partition.
+ /// This is accomplished by setting both its beginning and end offset to
+ /// the maximum possible value.
+ void kill() {
+ assert(!isDead() && "He's Dead, Jim!");
+ BeginOffset = EndOffset = UINT64_MAX;
+ }
- Partition() : ByteRange(), IsSplittable() {}
- Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
- : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
- };
+ Partition() : ByteRange(), IsSplittable() {}
+ Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
+ : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
+};
+
+/// \brief A particular use of a partition of the alloca.
+///
+/// This structure is used to associate uses of a partition with it. They
+/// mark the range of bytes which are referenced by a particular instruction,
+/// and includes a handle to the user itself and the pointer value in use.
+/// The bounds of these uses are determined by intersecting the bounds of the
+/// memory use itself with a particular partition. As a consequence there is
+/// intentionally overlap between various uses of the same partition.
+class PartitionUse : public ByteRange {
+ /// \brief Combined storage for both the Use* and split state.
+ PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit;
- /// \brief A particular use of a partition of the alloca.
+public:
+ PartitionUse() : ByteRange(), UsePtrAndIsSplit() {}
+ PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
+ bool IsSplit)
+ : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {}
+
+ /// \brief The use in question. Provides access to both user and used value.
///
- /// This structure is used to associate uses of a partition with it. They
- /// mark the range of bytes which are referenced by a particular instruction,
- /// and includes a handle to the user itself and the pointer value in use.
- /// The bounds of these uses are determined by intersecting the bounds of the
- /// memory use itself with a particular partition. As a consequence there is
- /// intentionally overlap between various uses of the same partition.
- struct PartitionUse : public ByteRange {
- /// \brief The use in question. Provides access to both user and used value.
- ///
- /// Note that this may be null if the partition use is *dead*, that is, it
- /// should be ignored.
- Use *U;
+ /// Note that this may be null if the partition use is *dead*, that is, it
+ /// should be ignored.
+ Use *getUse() const { return UsePtrAndIsSplit.getPointer(); }
- PartitionUse() : ByteRange(), U() {}
- PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U)
- : ByteRange(BeginOffset, EndOffset), U(U) {}
- };
+ /// \brief Set the use for this partition use range.
+ void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); }
+
+ /// \brief Whether this use is split across multiple partitions.
+ bool isSplit() const { return UsePtrAndIsSplit.getInt(); }
+};
+}
+
+namespace llvm {
+template <> struct isPodLike<Partition> : llvm::true_type {};
+template <> struct isPodLike<PartitionUse> : llvm::true_type {};
+}
+namespace {
+/// \brief Alloca partitioning representation.
+///
+/// This class represents a partitioning of an alloca into slices, and
+/// information about the nature of uses of each slice of the alloca. The goal
+/// is that this information is sufficient to decide if and how to split the
+/// alloca apart and replace slices with scalars. It is also intended that this
+/// structure can capture the relevant information needed both to decide about
+/// and to enact these transformations.
+class AllocaPartitioning {
+public:
/// \brief Construct a partitioning of a particular alloca.
///
/// Construction does most of the work for partitioning the alloca. This
@@ -456,10 +474,10 @@ private:
// Clamp the end offset to the end of the allocation. Note that this is
// formulated to handle even the case where "BeginOffset + Size" overflows.
- // NOTE! This may appear superficially to be something we could ignore
- // entirely, but that is not so! There may be PHI-node uses where some
- // instructions are dead but not others. We can't completely ignore the
- // PHI node, and so have to record at least the information here.
+ // This may appear superficially to be something we could ignore entirely,
+ // but that is not so! There may be widened loads or PHI-node uses where
+ // some instructions are dead but not others. We can't completely ignore
+ // them, and so have to record at least the information here.
assert(AllocSize >= BeginOffset); // Established above.
if (Size > AllocSize - BeginOffset) {
DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
@@ -474,33 +492,17 @@ private:
}
void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
- bool IsVolatile) {
- uint64_t Size = DL.getTypeStoreSize(Ty);
-
- // If this memory access can be shown to *statically* extend outside the
- // bounds of of the allocation, it's behavior is undefined, so simply
- // ignore it. Note that this is more strict than the generic clamping
- // behavior of insertUse. We also try to handle cases which might run the
- // risk of overflow.
- // FIXME: We should instead consider the pointer to have escaped if this
- // function is being instrumented for addressing bugs or race conditions.
- if (Offset.isNegative() || Size > AllocSize ||
- Offset.ugt(AllocSize - Size)) {
- DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte "
- << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset
- << " which extends past the end of the " << AllocSize
- << " byte alloca:\n"
- << " alloca: " << P.AI << "\n"
- << " use: " << I << "\n");
- return;
- }
-
+ uint64_t Size, bool IsVolatile) {
// We allow splitting of loads and stores where the type is an integer type
- // and which cover the entire alloca. Such integer loads and stores
- // often require decomposition into fine grained loads and stores.
- bool IsSplittable = false;
- if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
- IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8;
+ // and cover the entire alloca. This prevents us from splitting over
+ // eagerly.
+ // FIXME: In the great blue eventually, we should eagerly split all integer
+ // loads and stores, and then have a separate step that merges adjacent
+ // alloca partitions into a single partition suitable for integer widening.
+ // Or we should skip the merge step and rely on GVN and other passes to
+ // merge adjacent loads and stores that survive mem2reg.
+ bool IsSplittable =
+ Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
insertUse(I, Offset, Size, IsSplittable);
}
@@ -512,7 +514,8 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&LI);
- return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile());
+ uint64_t Size = DL.getTypeStoreSize(LI.getType());
+ return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
}
void visitStoreInst(StoreInst &SI) {
@@ -522,9 +525,28 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&SI);
+ uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of of the allocation, it's behavior is undefined, so simply
+ // ignore it. Note that this is more strict than the generic clamping
+ // behavior of insertUse. We also try to handle cases which might run the
+ // risk of overflow.
+ // FIXME: We should instead consider the pointer to have escaped if this
+ // function is being instrumented for addressing bugs or race conditions.
+ if (Offset.isNegative() || Size > AllocSize ||
+ Offset.ugt(AllocSize - Size)) {
+ DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
+ << " which extends past the end of the " << AllocSize
+ << " byte alloca:\n"
+ << " alloca: " << P.AI << "\n"
+ << " use: " << SI << "\n");
+ return;
+ }
+
assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
"All simple FCA stores should have been pre-split");
- handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile());
+ handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
}
@@ -795,13 +817,14 @@ private:
EndOffset = AllocSize;
// NB: This only works if we have zero overlapping partitions.
- iterator B = std::lower_bound(P.begin(), P.end(), BeginOffset);
- if (B != P.begin() && llvm::prior(B)->EndOffset > BeginOffset)
- B = llvm::prior(B);
- for (iterator I = B, E = P.end(); I != E && I->BeginOffset < EndOffset;
- ++I) {
+ iterator I = std::lower_bound(P.begin(), P.end(), BeginOffset);
+ if (I != P.begin() && llvm::prior(I)->EndOffset > BeginOffset)
+ I = llvm::prior(I);
+ iterator E = P.end();
+ bool IsSplit = llvm::next(I) != E && llvm::next(I)->BeginOffset < EndOffset;
+ for (; I != E && I->BeginOffset < EndOffset; ++I) {
PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset),
- std::min(I->EndOffset, EndOffset), U);
+ std::min(I->EndOffset, EndOffset), U, IsSplit);
P.use_push_back(I, NewPU);
if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser()))
P.PHIOrSelectOpMap[U]
@@ -809,20 +832,6 @@ private:
}
}
- void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset) {
- uint64_t Size = DL.getTypeStoreSize(Ty);
-
- // If this memory access can be shown to *statically* extend outside the
- // bounds of of the allocation, it's behavior is undefined, so simply
- // ignore it. Note that this is more strict than the generic clamping
- // behavior of insertUse.
- if (Offset.isNegative() || Size > AllocSize ||
- Offset.ugt(AllocSize - Size))
- return markAsDead(I);
-
- insertUse(I, Offset, Size);
- }
-
void visitBitCastInst(BitCastInst &BC) {
if (BC.use_empty())
return markAsDead(BC);
@@ -839,12 +848,23 @@ private:
void visitLoadInst(LoadInst &LI) {
assert(IsOffsetKnown);
- handleLoadOrStore(LI.getType(), LI, Offset);
+ uint64_t Size = DL.getTypeStoreSize(LI.getType());
+ insertUse(LI, Offset, Size);
}
void visitStoreInst(StoreInst &SI) {
assert(IsOffsetKnown);
- handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset);
+ uint64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType());
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of of the allocation, it's behavior is undefined, so simply
+ // ignore it. Note that this is more strict than the generic clamping
+ // behavior of insertUse.
+ if (Offset.isNegative() || Size > AllocSize ||
+ Offset.ugt(AllocSize - Size))
+ return markAsDead(SI);
+
+ insertUse(SI, Offset, Size);
}
void visitMemSetInst(MemSetInst &II) {
@@ -1089,21 +1109,21 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
Type *AllocaPartitioning::getCommonType(iterator I) const {
Type *Ty = 0;
for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
- if (!UI->U)
+ Use *U = UI->getUse();
+ if (!U)
continue; // Skip dead uses.
- if (isa<IntrinsicInst>(*UI->U->getUser()))
+ if (isa<IntrinsicInst>(*U->getUser()))
continue;
if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset)
continue;
Type *UserTy = 0;
- if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser())) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser()))
UserTy = LI->getType();
- } else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) {
+ else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser()))
UserTy = SI->getValueOperand()->getType();
- } else {
+ else
return 0; // Bail if we have weird uses.
- }
if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
// If the type is larger than the partition, skip it. We only encounter
@@ -1140,11 +1160,12 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I,
void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I,
StringRef Indent) const {
for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
- if (!UI->U)
+ if (!UI->getUse())
continue; // Skip dead uses.
OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") "
- << "used by: " << *UI->U->getUser() << "\n";
- if (MemTransferInst *II = dyn_cast<MemTransferInst>(UI->U->getUser())) {
+ << "used by: " << *UI->getUse()->getUser() << "\n";
+ if (MemTransferInst *II =
+ dyn_cast<MemTransferInst>(UI->getUse()->getUser())) {
const MemTransferOffsets &MTO = MemTransferInstData.lookup(II);
bool IsDest;
if (!MTO.IsSplittable)
@@ -1375,11 +1396,11 @@ public:
// may be grown during speculation. However, we never need to re-visit the
// new uses, and so we can use the initial size bound.
for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {
- const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx);
- if (!PU.U)
+ const PartitionUse &PU = P.getUse(PI, Idx);
+ if (!PU.getUse())
continue; // Skip dead use.
- visit(cast<Instruction>(PU.U->getUser()));
+ visit(cast<Instruction>(PU.getUse()->getUser()));
}
}
@@ -1523,8 +1544,8 @@ private:
// inside the load.
AllocaPartitioning::use_iterator UI
= P.findPartitionUseForPHIOrSelectOperand(InUse);
- assert(isa<PHINode>(*UI->U->getUser()));
- UI->U = &Load->getOperandUse(Load->getPointerOperandIndex());
+ assert(isa<PHINode>(*UI->getUse()->getUser()));
+ UI->setUse(&Load->getOperandUse(Load->getPointerOperandIndex()));
}
DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
}
@@ -1571,16 +1592,16 @@ private:
void visitSelectInst(SelectInst &SI) {
DEBUG(dbgs() << " original: " << SI << "\n");
- IRBuilder<> IRB(&SI);
// If the select isn't safe to speculate, just use simple logic to emit it.
SmallVector<LoadInst *, 4> Loads;
if (!isSafeSelectToSpeculate(SI, Loads))
return;
+ IRBuilder<> IRB(&SI);
Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };
AllocaPartitioning::iterator PIs[2];
- AllocaPartitioning::PartitionUse PUs[2];
+ PartitionUse PUs[2];
for (unsigned i = 0, e = 2; i != e; ++i) {
PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);
if (PIs[i] != P.end()) {
@@ -1591,7 +1612,7 @@ private:
PUs[i] = *UI;
// Clear out the use here so that the offsets into the use list remain
// stable but this use is ignored when rewriting.
- UI->U = 0;
+ UI->setUse(0);
}
}
@@ -1623,8 +1644,8 @@ private:
for (unsigned i = 0, e = 2; i != e; ++i) {
if (PIs[i] != P.end()) {
Use *LoadUse = &Loads[i]->getOperandUse(0);
- assert(PUs[i].U->get() == LoadUse->get());
- PUs[i].U = LoadUse;
+ assert(PUs[i].getUse()->get() == LoadUse->get());
+ PUs[i].setUse(LoadUse);
P.use_push_back(PIs[i], PUs[i]);
}
}
@@ -1911,6 +1932,10 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
if (OldTy == NewTy)
return true;
+ if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
+ if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
+ if (NewITy->getBitWidth() >= OldITy->getBitWidth())
+ return true;
if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
return false;
if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
@@ -1939,6 +1964,10 @@ static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
"Value not convertable to type");
if (V->getType() == Ty)
return V;
+ if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType()))
+ if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty))
+ if (NewITy->getBitWidth() > OldITy->getBitWidth())
+ return IRB.CreateZExt(V, NewITy);
if (V->getType()->isIntegerTy() && Ty->isPointerTy())
return IRB.CreateIntToPtr(V, Ty);
if (V->getType()->isPointerTy() && Ty->isIntegerTy())
@@ -1977,7 +2006,8 @@ static bool isVectorPromotionViable(const DataLayout &TD,
ElementSize /= 8;
for (; I != E; ++I) {
- if (!I->U)
+ Use *U = I->getUse();
+ if (!U)
continue; // Skip dead use.
uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;
@@ -1997,24 +2027,24 @@ static bool isVectorPromotionViable(const DataLayout &TD,
= (NumElements == 1) ? Ty->getElementType()
: VectorType::get(Ty->getElementType(), NumElements);
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
if (MI->isVolatile())
return false;
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
const AllocaPartitioning::MemTransferOffsets &MTO
= P.getMemTransferOffsets(*MTI);
if (!MTO.IsSplittable)
return false;
}
- } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
+ } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
// Disable vector promotion when there are loads or stores of an FCA.
return false;
- } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
if (!canConvertValue(TD, PartitionTy, LI->getType()))
return false;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
if (SI->isVolatile())
return false;
if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy))
@@ -2063,7 +2093,8 @@ static bool isIntegerWideningViable(const DataLayout &TD,
// unsplittable entry (which we may make splittable later).
bool WholeAllocaOp = false;
for (; I != E; ++I) {
- if (!I->U)
+ Use *U = I->getUse();
+ if (!U)
continue; // Skip dead use.
uint64_t RelBegin = I->BeginOffset - AllocBeginOffset;
@@ -2074,7 +2105,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelEnd > Size)
return false;
- if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
if (RelBegin == 0 && RelEnd == Size)
@@ -2089,7 +2120,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelBegin != 0 || RelEnd != Size ||
!canConvertValue(TD, AllocaTy, LI->getType()))
return false;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
Type *ValueTy = SI->getValueOperand()->getType();
if (SI->isVolatile())
return false;
@@ -2105,16 +2136,16 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelBegin != 0 || RelEnd != Size ||
!canConvertValue(TD, ValueTy, AllocaTy))
return false;
- } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
return false;
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
const AllocaPartitioning::MemTransferOffsets &MTO
= P.getMemTransferOffsets(*MTI);
if (!MTO.IsSplittable)
return false;
}
- } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) {
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
II->getIntrinsicID() != Intrinsic::lifetime_end)
return false;
@@ -2297,6 +2328,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
// The offset of the partition user currently being rewritten.
uint64_t BeginOffset, EndOffset;
+ bool IsSplit;
Use *OldUse;
Instruction *OldPtr;
@@ -2314,7 +2346,7 @@ public:
NewAllocaEndOffset(NewEndOffset),
NewAllocaTy(NewAI.getAllocatedType()),
VecTy(), ElementTy(), ElementSize(), IntTy(),
- BeginOffset(), EndOffset() {
+ BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr() {
}
/// \brief Visit the users of the alloca partition and rewrite them.
@@ -2336,14 +2368,15 @@ public:
}
bool CanSROA = true;
for (; I != E; ++I) {
- if (!I->U)
+ if (!I->getUse())
continue; // Skip dead uses.
BeginOffset = I->BeginOffset;
EndOffset = I->EndOffset;
- OldUse = I->U;
- OldPtr = cast<Instruction>(I->U->get());
+ IsSplit = I->isSplit();
+ OldUse = I->getUse();
+ OldPtr = cast<Instruction>(OldUse->get());
NamePrefix = (Twine(NewAI.getName()) + "." + Twine(BeginOffset)).str();
- CanSROA &= visit(cast<Instruction>(I->U->getUser()));
+ CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
}
if (VecTy) {
assert(CanSROA);
@@ -2450,29 +2483,12 @@ private:
DEBUG(dbgs() << " original: " << LI << "\n");
Value *OldOp = LI.getOperand(0);
assert(OldOp == OldPtr);
- IRBuilder<> IRB(&LI);
uint64_t Size = EndOffset - BeginOffset;
- bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType());
- // If this memory access can be shown to *statically* extend outside the
- // bounds of the original allocation it's behavior is undefined. Rather
- // than trying to transform it, just replace it with undef.
- // FIXME: We should do something more clever for functions being
- // instrumented by asan.
- // FIXME: Eventually, once ASan and friends can flush out bugs here, this
- // should be transformed to a load of null making it unreachable.
- uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType());
- if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) {
- LI.replaceAllUsesWith(UndefValue::get(LI.getType()));
- Pass.DeadInsts.insert(&LI);
- deleteIfTriviallyDead(OldOp);
- DEBUG(dbgs() << " to: undef!!\n");
- return true;
- }
-
- Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8)
- : LI.getType();
+ IRBuilder<> IRB(&LI);
+ Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8)
+ : LI.getType();
bool IsPtrAdjusted = false;
Value *V;
if (VecTy) {
@@ -2492,16 +2508,15 @@ private:
}
V = convertValue(TD, IRB, V, TargetTy);
- if (IsSplitIntLoad) {
+ if (IsSplit) {
assert(!LI.isVolatile());
assert(LI.getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
+ assert(Size < TD.getTypeStoreSize(LI.getType()) &&
+ "Split load isn't smaller than original load");
assert(LI.getType()->getIntegerBitWidth() ==
TD.getTypeStoreSizeInBits(LI.getType()) &&
"Non-byte-multiple bit width");
- assert(LI.getType()->getIntegerBitWidth() ==
- TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
- "Only alloca-wide loads can be split and recomposed");
// Move the insertion point just past the load so that we can refer to it.
IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
// Create a placeholder value with the same type as LI to use as the
@@ -2588,14 +2603,12 @@ private:
uint64_t Size = EndOffset - BeginOffset;
if (Size < TD.getTypeStoreSize(V->getType())) {
assert(!SI.isVolatile());
+ assert(IsSplit && "A seemingly split store isn't splittable");
assert(V->getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
assert(V->getType()->getIntegerBitWidth() ==
TD.getTypeStoreSizeInBits(V->getType()) &&
"Non-byte-multiple bit width");
- assert(V->getType()->getIntegerBitWidth() ==
- TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
- "Only alloca-wide stores can be split and recomposed");
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
getName(".extract"));
@@ -3381,7 +3394,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI,
for (AllocaPartitioning::use_iterator UI = P.use_begin(PI),
UE = P.use_end(PI);
UI != UE && !IsLive; ++UI)
- if (UI->U)
+ if (UI->getUse())
IsLive = true;
if (!IsLive)
return false; // No live uses left of this partition.