diff options
author | Chandler Carruth <chandlerc@gmail.com> | 2013-08-11 02:17:11 +0000 |
---|---|---|
committer | Chandler Carruth <chandlerc@gmail.com> | 2013-08-11 02:17:11 +0000 |
commit | 5b854f1ea55601790d9191c9720e77da35095340 (patch) | |
tree | 9c3fdd78bcb539ebc2f33954b8c782898b86805a | |
parent | 37508bb842d9beedd75139a589c6f538f90efbaa (diff) | |
download | external_llvm-5b854f1ea55601790d9191c9720e77da35095340.zip external_llvm-5b854f1ea55601790d9191c9720e77da35095340.tar.gz external_llvm-5b854f1ea55601790d9191c9720e77da35095340.tar.bz2 |
Re-instate r187323 which fast-tracks promotable allocas as soon as the
SROA-based analysis has enough information. This should work now that
both mem2reg *and* the SSAUpdater-based AllocaPromoter have been updated
to be able to promote the types of allocas that the SROA analysis
detects.
I've included tests for the AllocaPromoter that were only possible to
write once we fast-tracked promotable allocas without rewriting them.
This includes a test both for r187347 and r188145.
Original commit log for r187323:
"""
Now that mem2reg understands how to cope with a slightly wider set of uses of
an alloca, we can pre-compute promotability while analyzing an alloca for
splitting in SROA. That lets us short-circuit the common case of a bunch of
trivially promotable allocas. This cuts 20% to 30% off the run time of SROA for
typical frontend-generated IR sequneces I'm seeing. It gets the new SROA to
within 20% of ScalarRepl for such code. My current benchmark for these numbers
is PR15412, but it fits the general pattern of IR emitted by Clang so it should
be widely applicable.
"""
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188146 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/Scalar/SROA.cpp | 93 |
1 files changed, 81 insertions, 12 deletions
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index d35c3b5..be3ef6f 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -197,6 +197,18 @@ public: /// \brief Construct the slices of a particular alloca. AllocaSlices(const DataLayout &DL, AllocaInst &AI); + /// \brief Whether we determined during the trivial analysis of the alloca + /// that it was immediately promotable with mem2reg. + bool isAllocaPromotable() const { return IsAllocaPromotable; } + + /// \brief A list of directly stored values when \c isAllocaPromotable is + /// true. + /// + /// The contents are undefined if the alloca is not trivially promotable. + /// This is used to detect other allocas which should be iterated on when + /// doing direct promotion. + ArrayRef<Value *> getStoredValues() const { return StoredValues; } + /// \brief Test whether a pointer to the allocation escapes our analysis. /// /// If this is true, the slices are never fully built and should be @@ -253,10 +265,20 @@ private: class SliceBuilder; friend class AllocaSlices::SliceBuilder; -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// \brief Handle to alloca instruction to simplify method interfaces. AllocaInst &AI; -#endif + + /// \brief A flag indicating if the alloca is trivially promotable. + /// + /// While walking the alloca's uses we track when the uses exceed what + /// mem2reg can trivially handle. This essentially should match the logic in + /// \c isAllocaPromotable but re-using the existing walk of the pointer uses. + bool IsAllocaPromotable; + + /// \brief Storage for stored values. + /// + /// Only used while the alloca is trivially promotable. + SmallVector<Value *, 8> StoredValues; /// \brief The instruction responsible for this alloca not having a known set /// of slices. @@ -325,9 +347,9 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> { SmallPtrSet<Instruction *, 4> VisitedDeadInsts; public: - SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &S) + SliceBuilder(const DataLayout &DL, AllocaSlices &S) : PtrUseVisitor<SliceBuilder>(DL), - AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), S(S) {} + AllocSize(DL.getTypeAllocSize(S.AI.getAllocatedType())), S(S) {} private: void markAsDead(Instruction &I) { @@ -380,6 +402,15 @@ private: if (GEPI.use_empty()) return markAsDead(GEPI); + // FIXME: mem2reg shouldn't care about the nature of the GEP, but instead + // the offsets of the loads. Until then, we short-circuit here for the + // promotable case. + if (GEPI.hasAllZeroIndices()) + return Base::enqueueUsers(GEPI); + + // Otherwise, there is something in the GEP, so we disable mem2reg and + // accumulate it. + S.IsAllocaPromotable = false; return Base::visitGetElementPtrInst(GEPI); } @@ -396,6 +427,13 @@ private: bool IsSplittable = Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize; + // mem2reg can only promote non-volatile loads and stores which exactly + // load the alloca (no offset and the right type). + if (IsVolatile || Offset != 0 || Ty != S.AI.getAllocatedType()) + S.IsAllocaPromotable = false; + if (S.IsAllocaPromotable) + assert(Offset == 0); + insertUse(I, Offset, Size, IsSplittable); } @@ -436,6 +474,9 @@ private: return markAsDead(SI); } + if (S.IsAllocaPromotable) + S.StoredValues.push_back(ValOp); + assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) && "All simple FCA stores should have been pre-split"); handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile()); @@ -453,6 +494,8 @@ private: if (!IsOffsetKnown) return PI.setAborted(&II); + S.IsAllocaPromotable = false; + insertUse(II, Offset, Length ? Length->getLimitedValue() : AllocSize - Offset.getLimitedValue(), @@ -469,6 +512,8 @@ private: if (!IsOffsetKnown) return PI.setAborted(&II); + S.IsAllocaPromotable = false; + uint64_t RawOffset = Offset.getLimitedValue(); uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset; @@ -529,6 +574,8 @@ private: return; } + S.IsAllocaPromotable = false; + Base::visitIntrinsicInst(II); } @@ -603,6 +650,8 @@ private: return; } + S.IsAllocaPromotable = false; + insertUse(PN, Offset, PHISize); } @@ -610,14 +659,18 @@ private: if (SI.use_empty()) return markAsDead(SI); if (Value *Result = foldSelectInst(SI)) { - if (Result == *U) + if (Result == *U) { // If the result of the constant fold will be the pointer, recurse // through the select as if we had RAUW'ed it. enqueueUsers(SI); - else + + // FIXME: mem2reg should support this pattern, but it doesn't. + S.IsAllocaPromotable = false; + } else { // Otherwise the operand to the select is dead, and we can replace it // with undef. S.DeadOperands.push_back(U); + } return; } @@ -644,6 +697,8 @@ private: return; } + S.IsAllocaPromotable = false; + insertUse(SI, Offset, SelectSize); } @@ -654,12 +709,8 @@ private: }; AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) - : -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - AI(AI), -#endif - PointerEscapingInstr(0) { - SliceBuilder PB(DL, AI, *this); + : AI(AI), IsAllocaPromotable(true), PointerEscapingInstr(0) { + SliceBuilder PB(DL, *this); SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI); if (PtrI.isEscaped() || PtrI.isAborted()) { // FIXME: We should sink the escape vs. abort info into the caller nicely, @@ -3339,6 +3390,24 @@ bool SROA::runOnAlloca(AllocaInst &AI) { if (S.begin() == S.end()) return Changed; + // Trivially promotable, don't go through the splitting and rewriting. + if (S.isAllocaPromotable()) { + DEBUG(dbgs() << " Directly promoting alloca: " << AI << "\n"); + PromotableAllocas.push_back(&AI); + + // Walk through the stored values quickly here to handle directly + // promotable allocas that require iterating on other allocas. + ArrayRef<Value *> StoredValues = S.getStoredValues(); + for (ArrayRef<Value *>::iterator SVI = StoredValues.begin(), + SVE = StoredValues.end(); + SVI != SVE; ++SVI) + if ((*SVI)->getType()->isPointerTy()) + if (AllocaInst *SAI = + dyn_cast<AllocaInst>((*SVI)->stripInBoundsOffsets())) + PostPromotionWorklist.insert(SAI); + return true; + } + Changed |= splitAlloca(AI, S); DEBUG(dbgs() << " Speculating PHIs\n"); |