aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2013-08-11 02:17:11 +0000
committerChandler Carruth <chandlerc@gmail.com>2013-08-11 02:17:11 +0000
commit5b854f1ea55601790d9191c9720e77da35095340 (patch)
tree9c3fdd78bcb539ebc2f33954b8c782898b86805a
parent37508bb842d9beedd75139a589c6f538f90efbaa (diff)
downloadexternal_llvm-5b854f1ea55601790d9191c9720e77da35095340.zip
external_llvm-5b854f1ea55601790d9191c9720e77da35095340.tar.gz
external_llvm-5b854f1ea55601790d9191c9720e77da35095340.tar.bz2
Re-instate r187323 which fast-tracks promotable allocas as soon as the
SROA-based analysis has enough information. This should work now that both mem2reg *and* the SSAUpdater-based AllocaPromoter have been updated to be able to promote the types of allocas that the SROA analysis detects. I've included tests for the AllocaPromoter that were only possible to write once we fast-tracked promotable allocas without rewriting them. This includes a test both for r187347 and r188145. Original commit log for r187323: """ Now that mem2reg understands how to cope with a slightly wider set of uses of an alloca, we can pre-compute promotability while analyzing an alloca for splitting in SROA. That lets us short-circuit the common case of a bunch of trivially promotable allocas. This cuts 20% to 30% off the run time of SROA for typical frontend-generated IR sequneces I'm seeing. It gets the new SROA to within 20% of ScalarRepl for such code. My current benchmark for these numbers is PR15412, but it fits the general pattern of IR emitted by Clang so it should be widely applicable. """ git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188146 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Scalar/SROA.cpp93
1 files changed, 81 insertions, 12 deletions
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index d35c3b5..be3ef6f 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -197,6 +197,18 @@ public:
/// \brief Construct the slices of a particular alloca.
AllocaSlices(const DataLayout &DL, AllocaInst &AI);
+ /// \brief Whether we determined during the trivial analysis of the alloca
+ /// that it was immediately promotable with mem2reg.
+ bool isAllocaPromotable() const { return IsAllocaPromotable; }
+
+ /// \brief A list of directly stored values when \c isAllocaPromotable is
+ /// true.
+ ///
+ /// The contents are undefined if the alloca is not trivially promotable.
+ /// This is used to detect other allocas which should be iterated on when
+ /// doing direct promotion.
+ ArrayRef<Value *> getStoredValues() const { return StoredValues; }
+
/// \brief Test whether a pointer to the allocation escapes our analysis.
///
/// If this is true, the slices are never fully built and should be
@@ -253,10 +265,20 @@ private:
class SliceBuilder;
friend class AllocaSlices::SliceBuilder;
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// \brief Handle to alloca instruction to simplify method interfaces.
AllocaInst &AI;
-#endif
+
+ /// \brief A flag indicating if the alloca is trivially promotable.
+ ///
+ /// While walking the alloca's uses we track when the uses exceed what
+ /// mem2reg can trivially handle. This essentially should match the logic in
+ /// \c isAllocaPromotable but re-using the existing walk of the pointer uses.
+ bool IsAllocaPromotable;
+
+ /// \brief Storage for stored values.
+ ///
+ /// Only used while the alloca is trivially promotable.
+ SmallVector<Value *, 8> StoredValues;
/// \brief The instruction responsible for this alloca not having a known set
/// of slices.
@@ -325,9 +347,9 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
public:
- SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &S)
+ SliceBuilder(const DataLayout &DL, AllocaSlices &S)
: PtrUseVisitor<SliceBuilder>(DL),
- AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), S(S) {}
+ AllocSize(DL.getTypeAllocSize(S.AI.getAllocatedType())), S(S) {}
private:
void markAsDead(Instruction &I) {
@@ -380,6 +402,15 @@ private:
if (GEPI.use_empty())
return markAsDead(GEPI);
+ // FIXME: mem2reg shouldn't care about the nature of the GEP, but instead
+ // the offsets of the loads. Until then, we short-circuit here for the
+ // promotable case.
+ if (GEPI.hasAllZeroIndices())
+ return Base::enqueueUsers(GEPI);
+
+ // Otherwise, there is something in the GEP, so we disable mem2reg and
+ // accumulate it.
+ S.IsAllocaPromotable = false;
return Base::visitGetElementPtrInst(GEPI);
}
@@ -396,6 +427,13 @@ private:
bool IsSplittable =
Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
+ // mem2reg can only promote non-volatile loads and stores which exactly
+ // load the alloca (no offset and the right type).
+ if (IsVolatile || Offset != 0 || Ty != S.AI.getAllocatedType())
+ S.IsAllocaPromotable = false;
+ if (S.IsAllocaPromotable)
+ assert(Offset == 0);
+
insertUse(I, Offset, Size, IsSplittable);
}
@@ -436,6 +474,9 @@ private:
return markAsDead(SI);
}
+ if (S.IsAllocaPromotable)
+ S.StoredValues.push_back(ValOp);
+
assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
"All simple FCA stores should have been pre-split");
handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
@@ -453,6 +494,8 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&II);
+ S.IsAllocaPromotable = false;
+
insertUse(II, Offset,
Length ? Length->getLimitedValue()
: AllocSize - Offset.getLimitedValue(),
@@ -469,6 +512,8 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&II);
+ S.IsAllocaPromotable = false;
+
uint64_t RawOffset = Offset.getLimitedValue();
uint64_t Size = Length ? Length->getLimitedValue()
: AllocSize - RawOffset;
@@ -529,6 +574,8 @@ private:
return;
}
+ S.IsAllocaPromotable = false;
+
Base::visitIntrinsicInst(II);
}
@@ -603,6 +650,8 @@ private:
return;
}
+ S.IsAllocaPromotable = false;
+
insertUse(PN, Offset, PHISize);
}
@@ -610,14 +659,18 @@ private:
if (SI.use_empty())
return markAsDead(SI);
if (Value *Result = foldSelectInst(SI)) {
- if (Result == *U)
+ if (Result == *U) {
// If the result of the constant fold will be the pointer, recurse
// through the select as if we had RAUW'ed it.
enqueueUsers(SI);
- else
+
+ // FIXME: mem2reg should support this pattern, but it doesn't.
+ S.IsAllocaPromotable = false;
+ } else {
// Otherwise the operand to the select is dead, and we can replace it
// with undef.
S.DeadOperands.push_back(U);
+ }
return;
}
@@ -644,6 +697,8 @@ private:
return;
}
+ S.IsAllocaPromotable = false;
+
insertUse(SI, Offset, SelectSize);
}
@@ -654,12 +709,8 @@ private:
};
AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
- :
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- AI(AI),
-#endif
- PointerEscapingInstr(0) {
- SliceBuilder PB(DL, AI, *this);
+ : AI(AI), IsAllocaPromotable(true), PointerEscapingInstr(0) {
+ SliceBuilder PB(DL, *this);
SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
if (PtrI.isEscaped() || PtrI.isAborted()) {
// FIXME: We should sink the escape vs. abort info into the caller nicely,
@@ -3339,6 +3390,24 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
if (S.begin() == S.end())
return Changed;
+ // Trivially promotable, don't go through the splitting and rewriting.
+ if (S.isAllocaPromotable()) {
+ DEBUG(dbgs() << " Directly promoting alloca: " << AI << "\n");
+ PromotableAllocas.push_back(&AI);
+
+ // Walk through the stored values quickly here to handle directly
+ // promotable allocas that require iterating on other allocas.
+ ArrayRef<Value *> StoredValues = S.getStoredValues();
+ for (ArrayRef<Value *>::iterator SVI = StoredValues.begin(),
+ SVE = StoredValues.end();
+ SVI != SVE; ++SVI)
+ if ((*SVI)->getType()->isPointerTy())
+ if (AllocaInst *SAI =
+ dyn_cast<AllocaInst>((*SVI)->stripInBoundsOffsets()))
+ PostPromotionWorklist.insert(SAI);
+ return true;
+ }
+
Changed |= splitAlloca(AI, S);
DEBUG(dbgs() << " Speculating PHIs\n");