diff options
Diffstat (limited to 'lib/Transforms/Scalar/LoopStrengthReduce.cpp')
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 221 |
1 files changed, 140 insertions, 81 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 64d493c..3a4108a 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -113,7 +113,7 @@ class RegUseTracker { public: void CountRegister(const SCEV *Reg, size_t LUIdx); void DropRegister(const SCEV *Reg, size_t LUIdx); - void DropUse(size_t LUIdx); + void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx); bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const; @@ -152,18 +152,27 @@ RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) { } void -RegUseTracker::DropUse(size_t LUIdx) { - // Remove the use index from every register's use list. +RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) { + assert(LUIdx <= LastLUIdx); + + // Update RegUses. The data structure is not optimized for this purpose; + // we must iterate through it and update each of the bit vectors. for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end(); - I != E; ++I) - I->second.UsedByIndices.reset(LUIdx); + I != E; ++I) { + SmallBitVector &UsedByIndices = I->second.UsedByIndices; + if (LUIdx < UsedByIndices.size()) + UsedByIndices[LUIdx] = + LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0; + UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx)); + } } bool RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const { - if (!RegUsesMap.count(Reg)) return false; - const SmallBitVector &UsedByIndices = - RegUsesMap.find(Reg)->second.UsedByIndices; + RegUsesTy::const_iterator I = RegUsesMap.find(Reg); + if (I == RegUsesMap.end()) + return false; + const SmallBitVector &UsedByIndices = I->second.UsedByIndices; int i = UsedByIndices.find_first(); if (i == -1) return false; if ((size_t)i != LUIdx) return true; @@ -607,7 +616,7 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) { bool Changed = false; while (!DeadInsts.empty()) { - Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()); + Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()); if (I == 0 || !isInstructionTriviallyDead(I)) continue; @@ -644,8 +653,6 @@ public: : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0), SetupCost(0) {} - unsigned getNumRegs() const { return NumRegs; } - bool operator<(const Cost &Other) const; void Loose(); @@ -721,6 +728,9 @@ void Cost::RateRegister(const SCEV *Reg, (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) || isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart())))) ++SetupCost; + + NumIVMuls += isa<SCEVMulExpr>(Reg) && + Reg->hasComputableLoopEvolution(L); } /// RatePrimaryRegister - Record this register in the set. If we haven't seen it @@ -755,9 +765,6 @@ void Cost::RateFormula(const Formula &F, return; } RatePrimaryRegister(BaseReg, Regs, L, SE, DT); - - NumIVMuls += isa<SCEVMulExpr>(BaseReg) && - BaseReg->hasComputableLoopEvolution(L); } if (F.BaseRegs.size() > 1) @@ -994,8 +1001,6 @@ public: void DeleteFormula(Formula &F); void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses); - void check() const; - void print(raw_ostream &OS) const; void dump() const; }; @@ -1258,32 +1263,6 @@ struct UseMapDenseMapInfo { } }; -/// FormulaSorter - This class implements an ordering for formulae which sorts -/// the by their standalone cost. -class FormulaSorter { - /// These two sets are kept empty, so that we compute standalone costs. - DenseSet<const SCEV *> VisitedRegs; - SmallPtrSet<const SCEV *, 16> Regs; - Loop *L; - LSRUse *LU; - ScalarEvolution &SE; - DominatorTree &DT; - -public: - FormulaSorter(Loop *l, LSRUse &lu, ScalarEvolution &se, DominatorTree &dt) - : L(l), LU(&lu), SE(se), DT(dt) {} - - bool operator()(const Formula &A, const Formula &B) { - Cost CostA; - CostA.RateFormula(A, Regs, VisitedRegs, L, LU->Offsets, SE, DT); - Regs.clear(); - Cost CostB; - CostB.RateFormula(B, Regs, VisitedRegs, L, LU->Offsets, SE, DT); - Regs.clear(); - return CostA < CostB; - } -}; - /// LSRInstance - This class holds state for the main loop strength reduction /// logic. class LSRInstance { @@ -1342,7 +1321,7 @@ class LSRInstance { LSRUse::KindType Kind, const Type *AccessTy); - void DeleteUse(LSRUse &LU); + void DeleteUse(LSRUse &LU, size_t LUIdx); LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU); @@ -1368,6 +1347,10 @@ public: void FilterOutUndesirableDedicatedRegisters(); size_t EstimateSearchSpaceComplexity() const; + void NarrowSearchSpaceByDetectingSupersets(); + void NarrowSearchSpaceByCollapsingUnrolledCode(); + void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); + void NarrowSearchSpaceByPickingWinnerRegs(); void NarrowSearchSpaceUsingHeuristics(); void SolveRecurse(SmallVectorImpl<const Formula *> &Solution, @@ -1922,10 +1905,13 @@ LSRInstance::getUse(const SCEV *&Expr, } /// DeleteUse - Delete the given use from the Uses list. -void LSRInstance::DeleteUse(LSRUse &LU) { +void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) { if (&LU != &Uses.back()) std::swap(LU, Uses.back()); Uses.pop_back(); + + // Update RegUses. + RegUses.SwapAndDropUse(LUIdx, Uses.size()); } /// FindUseWithFormula - Look for a use distinct from OrigLU which is has @@ -1933,33 +1919,41 @@ void LSRInstance::DeleteUse(LSRUse &LU) { LSRUse * LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, const LSRUse &OrigLU) { - // Search all uses for the formula. This could be more clever. Ignore - // ICmpZero uses because they may contain formulae generated by - // GenerateICmpZeroScales, in which case adding fixup offsets may - // be invalid. + // Search all uses for the formula. This could be more clever. for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { LSRUse &LU = Uses[LUIdx]; + // Check whether this use is close enough to OrigLU, to see whether it's + // worthwhile looking through its formulae. + // Ignore ICmpZero uses because they may contain formulae generated by + // GenerateICmpZeroScales, in which case adding fixup offsets may + // be invalid. if (&LU != &OrigLU && LU.Kind != LSRUse::ICmpZero && LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy && LU.WidestFixupType == OrigLU.WidestFixupType && LU.HasFormulaWithSameRegs(OrigF)) { + // Scan through this use's formulae. for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), E = LU.Formulae.end(); I != E; ++I) { const Formula &F = *I; + // Check to see if this formula has the same registers and symbols + // as OrigF. if (F.BaseRegs == OrigF.BaseRegs && F.ScaledReg == OrigF.ScaledReg && F.AM.BaseGV == OrigF.AM.BaseGV && - F.AM.Scale == OrigF.AM.Scale && - LU.Kind) { + F.AM.Scale == OrigF.AM.Scale) { if (F.AM.BaseOffs == 0) return &LU; + // This is the formula where all the registers and symbols matched; + // there aren't going to be any others. Since we declined it, we + // can skip the rest of the formulae and procede to the next LSRUse. break; } } } } + // Nothing looked good. return 0; } @@ -2796,11 +2790,17 @@ LSRInstance::GenerateAllReuseFormulae() { } GenerateCrossUseConstantOffsets(); + + DEBUG(dbgs() << "\n" + "After generating reuse formulae:\n"; + print_uses(dbgs())); } -/// If their are multiple formulae with the same set of registers used +/// If there are multiple formulae with the same set of registers used /// by other uses, pick the best one and delete the others. void LSRInstance::FilterOutUndesirableDedicatedRegisters() { + DenseSet<const SCEV *> VisitedRegs; + SmallPtrSet<const SCEV *, 16> Regs; #ifndef NDEBUG bool ChangedFormulae = false; #endif @@ -2813,7 +2813,6 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { LSRUse &LU = Uses[LUIdx]; - FormulaSorter Sorter(L, LU, SE, DT); DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n'); bool Any = false; @@ -2839,7 +2838,14 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { BestFormulae.insert(std::make_pair(Key, FIdx)); if (!P.second) { Formula &Best = LU.Formulae[P.first->second]; - if (Sorter.operator()(F, Best)) + + Cost CostF; + CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT); + Regs.clear(); + Cost CostBest; + CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT); + Regs.clear(); + if (CostF < CostBest) std::swap(F, Best); DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); dbgs() << "\n" @@ -2879,7 +2885,7 @@ static const size_t ComplexityLimit = UINT16_MAX; /// this many solutions because it prune the search space, but the pruning /// isn't always sufficient. size_t LSRInstance::EstimateSearchSpaceComplexity() const { - uint32_t Power = 1; + size_t Power = 1; for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end(); I != E; ++I) { size_t FSize = I->Formulae.size(); @@ -2894,11 +2900,11 @@ size_t LSRInstance::EstimateSearchSpaceComplexity() const { return Power; } -/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of -/// formulae to choose from, use some rough heuristics to prune down the number -/// of formulae. This keeps the main solver from taking an extraordinary amount -/// of time in some worst-case scenarios. -void LSRInstance::NarrowSearchSpaceUsingHeuristics() { +/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset +/// of the registers of another formula, it won't help reduce register +/// pressure (though it may not necessarily hurt register pressure); remove +/// it to simplify the system. +void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { DEBUG(dbgs() << "The search space is too complex.\n"); @@ -2956,7 +2962,12 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); } +} +/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers +/// for expressions like A, A+1, A+2, etc., allocate a single register for +/// them. +void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { DEBUG(dbgs() << "The search space is too complex.\n"); @@ -2981,6 +2992,28 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; + // Update the relocs to reference the new use. + for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(), + E = Fixups.end(); I != E; ++I) { + LSRFixup &Fixup = *I; + if (Fixup.LUIdx == LUIdx) { + Fixup.LUIdx = LUThatHas - &Uses.front(); + Fixup.Offset += F.AM.BaseOffs; + // Add the new offset to LUThatHas' offset list. + if (LUThatHas->Offsets.back() != Fixup.Offset) { + LUThatHas->Offsets.push_back(Fixup.Offset); + if (Fixup.Offset > LUThatHas->MaxOffset) + LUThatHas->MaxOffset = Fixup.Offset; + if (Fixup.Offset < LUThatHas->MinOffset) + LUThatHas->MinOffset = Fixup.Offset; + } + DEBUG(dbgs() << "New fixup has offset " + << Fixup.Offset << '\n'); + } + if (Fixup.LUIdx == NumUses-1) + Fixup.LUIdx = LUIdx; + } + // Delete formulae from the new use which are no longer legal. bool Any = false; for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { @@ -2999,22 +3032,8 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { if (Any) LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses); - // Update the relocs to reference the new use. - for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(), - E = Fixups.end(); I != E; ++I) { - LSRFixup &Fixup = *I; - if (Fixup.LUIdx == LUIdx) { - Fixup.LUIdx = LUThatHas - &Uses.front(); - Fixup.Offset += F.AM.BaseOffs; - DEBUG(dbgs() << "New fixup has offset " - << Fixup.Offset << '\n'); - } - if (Fixup.LUIdx == NumUses-1) - Fixup.LUIdx = LUIdx; - } - // Delete the old use. - DeleteUse(LU); + DeleteUse(LU, LUIdx); --LUIdx; --NumUses; break; @@ -3027,7 +3046,30 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); } +} + +/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call +/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that +/// we've done more filtering, as it may be able to find more formulae to +/// eliminate. +void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){ + if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { + DEBUG(dbgs() << "The search space is too complex.\n"); + + DEBUG(dbgs() << "Narrowing the search space by re-filtering out " + "undesirable dedicated registers.\n"); + + FilterOutUndesirableDedicatedRegisters(); + + DEBUG(dbgs() << "After pre-selection:\n"; + print_uses(dbgs())); + } +} +/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely +/// to be profitable, and then in any use which has any reference to that +/// register, delete all formulae which do not reference that register. +void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { // With all other options exhausted, loop until the system is simple // enough to handle. SmallPtrSet<const SCEV *, 4> Taken; @@ -3089,6 +3131,17 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { } } +/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of +/// formulae to choose from, use some rough heuristics to prune down the number +/// of formulae. This keeps the main solver from taking an extraordinary amount +/// of time in some worst-case scenarios. +void LSRInstance::NarrowSearchSpaceUsingHeuristics() { + NarrowSearchSpaceByDetectingSupersets(); + NarrowSearchSpaceByCollapsingUnrolledCode(); + NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); + NarrowSearchSpaceByPickingWinnerRegs(); +} + /// SolveRecurse - This is the recursive solver. void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution, Cost &SolutionCost, @@ -3632,10 +3685,6 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) // to formulate the values needed for the uses. GenerateAllReuseFormulae(); - DEBUG(dbgs() << "\n" - "After generating reuse formulae:\n"; - print_uses(dbgs())); - FilterOutUndesirableDedicatedRegisters(); NarrowSearchSpaceUsingHeuristics(); @@ -3742,15 +3791,25 @@ private: } char LoopStrengthReduce::ID = 0; -INITIALIZE_PASS(LoopStrengthReduce, "loop-reduce", - "Loop Strength Reduction", false, false); +INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce", + "Loop Strength Reduction", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(IVUsers) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce", + "Loop Strength Reduction", false, false) + Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { return new LoopStrengthReduce(TLI); } LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli) - : LoopPass(ID), TLI(tli) {} + : LoopPass(ID), TLI(tli) { + initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry()); + } void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { // We split critical edges, so we change the CFG. However, we do update |