aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Scalar/LoopStrengthReduce.cpp
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-05-29 02:49:00 -0700
committerStephen Hines <srhines@google.com>2014-05-29 02:49:00 -0700
commitdce4a407a24b04eebc6a376f8e62b41aaa7b071f (patch)
treedcebc53f2b182f145a2e659393bf9a0472cedf23 /lib/Transforms/Scalar/LoopStrengthReduce.cpp
parent220b921aed042f9e520c26cffd8282a94c66c3d5 (diff)
downloadexternal_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.zip
external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.gz
external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.bz2
Update LLVM for 3.5 rebase (r209712).
Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
Diffstat (limited to 'lib/Transforms/Scalar/LoopStrengthReduce.cpp')
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp683
1 files changed, 440 insertions, 243 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 272a16d..914b56a 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -53,7 +53,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-reduce"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Hashing.h"
@@ -78,6 +77,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "loop-reduce"
+
/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for
/// bail out. This threshold is far beyond the number of users that LSR can
/// conceivably solve, so it should not affect generated code, but catches the
@@ -237,7 +238,15 @@ struct Formula {
int64_t Scale;
/// BaseRegs - The list of "base" registers for this use. When this is
- /// non-empty,
+ /// non-empty. The canonical representation of a formula is
+ /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
+ /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
+ /// #1 enforces that the scaled register is always used when at least two
+ /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.
+ /// #2 enforces that 1 * reg is reg.
+ /// This invariant can be temporarly broken while building a formula.
+ /// However, every formula inserted into the LSRInstance must be in canonical
+ /// form.
SmallVector<const SCEV *, 4> BaseRegs;
/// ScaledReg - The 'scaled' register for this use. This should be non-null
@@ -250,12 +259,18 @@ struct Formula {
int64_t UnfoldedOffset;
Formula()
- : BaseGV(0), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(0),
- UnfoldedOffset(0) {}
+ : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0),
+ ScaledReg(nullptr), UnfoldedOffset(0) {}
void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
- unsigned getNumRegs() const;
+ bool isCanonical() const;
+
+ void Canonicalize();
+
+ bool Unscale();
+
+ size_t getNumRegs() const;
Type *getType() const;
void DeleteBaseReg(const SCEV *&S);
@@ -345,12 +360,58 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
BaseRegs.push_back(Sum);
HasBaseReg = true;
}
+ Canonicalize();
+}
+
+/// \brief Check whether or not this formula statisfies the canonical
+/// representation.
+/// \see Formula::BaseRegs.
+bool Formula::isCanonical() const {
+ if (ScaledReg)
+ return Scale != 1 || !BaseRegs.empty();
+ return BaseRegs.size() <= 1;
+}
+
+/// \brief Helper method to morph a formula into its canonical representation.
+/// \see Formula::BaseRegs.
+/// Every formula having more than one base register, must use the ScaledReg
+/// field. Otherwise, we would have to do special cases everywhere in LSR
+/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
+/// On the other hand, 1*reg should be canonicalized into reg.
+void Formula::Canonicalize() {
+ if (isCanonical())
+ return;
+ // So far we did not need this case. This is easy to implement but it is
+ // useless to maintain dead code. Beside it could hurt compile time.
+ assert(!BaseRegs.empty() && "1*reg => reg, should not be needed.");
+ // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
+ ScaledReg = BaseRegs.back();
+ BaseRegs.pop_back();
+ Scale = 1;
+ size_t BaseRegsSize = BaseRegs.size();
+ size_t Try = 0;
+ // If ScaledReg is an invariant, try to find a variant expression.
+ while (Try < BaseRegsSize && !isa<SCEVAddRecExpr>(ScaledReg))
+ std::swap(ScaledReg, BaseRegs[Try++]);
+}
+
+/// \brief Get rid of the scale in the formula.
+/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
+/// \return true if it was possible to get rid of the scale, false otherwise.
+/// \note After this operation the formula may not be in the canonical form.
+bool Formula::Unscale() {
+ if (Scale != 1)
+ return false;
+ Scale = 0;
+ BaseRegs.push_back(ScaledReg);
+ ScaledReg = nullptr;
+ return true;
}
/// getNumRegs - Return the total number of register operands used by this
/// formula. This does not include register uses implied by non-constant
/// addrec strides.
-unsigned Formula::getNumRegs() const {
+size_t Formula::getNumRegs() const {
return !!ScaledReg + BaseRegs.size();
}
@@ -360,7 +421,7 @@ Type *Formula::getType() const {
return !BaseRegs.empty() ? BaseRegs.front()->getType() :
ScaledReg ? ScaledReg->getType() :
BaseGV ? BaseGV->getType() :
- 0;
+ nullptr;
}
/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
@@ -487,11 +548,11 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
// Check for a division of a constant by a constant.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
if (!RC)
- return 0;
+ return nullptr;
const APInt &LA = C->getValue()->getValue();
const APInt &RA = RC->getValue()->getValue();
if (LA.srem(RA) != 0)
- return 0;
+ return nullptr;
return SE.getConstant(LA.sdiv(RA));
}
@@ -500,16 +561,16 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
IgnoreSignificantBits);
- if (!Step) return 0;
+ if (!Step) return nullptr;
const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
IgnoreSignificantBits);
- if (!Start) return 0;
+ if (!Start) return nullptr;
// FlagNW is independent of the start value, step direction, and is
// preserved with smaller magnitude steps.
// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
}
- return 0;
+ return nullptr;
}
// Distribute the sdiv over add operands, if the add doesn't overflow.
@@ -520,12 +581,12 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
I != E; ++I) {
const SCEV *Op = getExactSDiv(*I, RHS, SE,
IgnoreSignificantBits);
- if (!Op) return 0;
+ if (!Op) return nullptr;
Ops.push_back(Op);
}
return SE.getAddExpr(Ops);
}
- return 0;
+ return nullptr;
}
// Check for a multiply operand that we can pull RHS out of.
@@ -544,13 +605,13 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
}
Ops.push_back(S);
}
- return Found ? SE.getMulExpr(Ops) : 0;
+ return Found ? SE.getMulExpr(Ops) : nullptr;
}
- return 0;
+ return nullptr;
}
// Otherwise we don't know.
- return 0;
+ return nullptr;
}
/// ExtractImmediate - If S involves the addition of a constant integer value,
@@ -604,7 +665,7 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
SCEV::FlagAnyWrap);
return Result;
}
- return 0;
+ return nullptr;
}
/// isAddressUse - Returns true if the specified instruction is using the
@@ -755,12 +816,12 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
Value *V = DeadInsts.pop_back_val();
Instruction *I = dyn_cast_or_null<Instruction>(V);
- if (I == 0 || !isInstructionTriviallyDead(I))
+ if (!I || !isInstructionTriviallyDead(I))
continue;
for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
if (Instruction *U = dyn_cast<Instruction>(*OI)) {
- *OI = 0;
+ *OI = nullptr;
if (U->use_empty())
DeadInsts.push_back(U);
}
@@ -775,9 +836,18 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
namespace {
class LSRUse;
}
-// Check if it is legal to fold 2 base registers.
-static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
- const Formula &F);
+
+/// \brief Check if the addressing mode defined by \p F is completely
+/// folded in \p LU at isel time.
+/// This includes address-mode folding and special icmp tricks.
+/// This function returns true if \p LU can accommodate what \p F
+/// defines and up to 1 base + 1 scaled + offset.
+/// In other words, if \p F has several base registers, this function may
+/// still return true. Therefore, users still need to account for
+/// additional base registers and/or unfolded offsets to derive an
+/// accurate cost model.
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ const LSRUse &LU, const Formula &F);
// Get the cost of the scaling factor used in F for LU.
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F);
@@ -828,7 +898,7 @@ public:
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
- SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
+ SmallPtrSet<const SCEV *, 16> *LoserRegs = nullptr);
void print(raw_ostream &OS) const;
void dump() const;
@@ -921,6 +991,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
SmallPtrSet<const SCEV *, 16> *LoserRegs) {
+ assert(F.isCanonical() && "Cost is accurate only for canonical formula");
// Tally up the registers.
if (const SCEV *ScaledReg = F.ScaledReg) {
if (VisitedRegs.count(ScaledReg)) {
@@ -944,11 +1015,13 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
}
// Determine how many (unfolded) adds we'll need inside the loop.
- size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0);
+ size_t NumBaseParts = F.getNumRegs();
if (NumBaseParts > 1)
// Do not count the base and a possible second register if the target
// allows to fold 2 registers.
- NumBaseAdds += NumBaseParts - (1 + isLegal2RegAMUse(TTI, LU, F));
+ NumBaseAdds +=
+ NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
+ NumBaseAdds += (F.UnfoldedOffset != 0);
// Accumulate non-free scaling amounts.
ScaleCost += getScalingFactorCost(TTI, LU, F);
@@ -1047,7 +1120,8 @@ struct LSRFixup {
}
LSRFixup::LSRFixup()
- : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {}
+ : UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)),
+ Offset(0) {}
/// isUseFullyOutsideLoop - Test whether this fixup always uses its
/// value outside of the given loop.
@@ -1183,7 +1257,7 @@ public:
MaxOffset(INT64_MIN),
AllFixupsOutsideLoop(true),
RigidFormula(false),
- WidestFixupType(0) {}
+ WidestFixupType(nullptr) {}
bool HasFormulaWithSameRegs(const Formula &F) const;
bool InsertFormula(const Formula &F);
@@ -1208,7 +1282,10 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
+/// The formula must be in canonical form.
bool LSRUse::InsertFormula(const Formula &F) {
+ assert(F.isCanonical() && "Invalid canonical representation");
+
if (!Formulae.empty() && RigidFormula)
return false;
@@ -1234,6 +1311,8 @@ bool LSRUse::InsertFormula(const Formula &F) {
// Record registers now being used by this use.
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+ if (F.ScaledReg)
+ Regs.insert(F.ScaledReg);
return true;
}
@@ -1300,12 +1379,10 @@ void LSRUse::dump() const {
}
#endif
-/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
-/// be completely folded into the user instruction at isel time. This includes
-/// address-mode folding and special icmp tricks.
-static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
- Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale) {
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ LSRUse::KindType Kind, Type *AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg, int64_t Scale) {
switch (Kind) {
case LSRUse::Address:
return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
@@ -1356,10 +1433,11 @@ static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
llvm_unreachable("Invalid LSRUse Kind!");
}
-static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
- int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
- GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale) {
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ int64_t MinOffset, int64_t MaxOffset,
+ LSRUse::KindType Kind, Type *AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg, int64_t Scale) {
// Check for overflow.
if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
(MinOffset > 0))
@@ -1370,9 +1448,41 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
return false;
MaxOffset = (uint64_t)BaseOffset + MaxOffset;
- return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg,
- Scale) &&
- isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale);
+ return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
+ HasBaseReg, Scale) &&
+ isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,
+ HasBaseReg, Scale);
+}
+
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ int64_t MinOffset, int64_t MaxOffset,
+ LSRUse::KindType Kind, Type *AccessTy,
+ const Formula &F) {
+ // For the purpose of isAMCompletelyFolded either having a canonical formula
+ // or a scale not equal to zero is correct.
+ // Problems may arise from non canonical formulae having a scale == 0.
+ // Strictly speaking it would best to just rely on canonical formulae.
+ // However, when we generate the scaled formulae, we first check that the
+ // scaling factor is profitable before computing the actual ScaledReg for
+ // compile time sake.
+ assert((F.isCanonical() || F.Scale != 0));
+ return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
+ F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
+}
+
+/// isLegalUse - Test whether we know how to expand the current formula.
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+ int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
+ int64_t Scale) {
+ // We know how to expand completely foldable formulae.
+ return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
+ BaseOffset, HasBaseReg, Scale) ||
+ // Or formulae that use a base register produced by a sum of base
+ // registers.
+ (Scale == 1 &&
+ isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
+ BaseGV, BaseOffset, true, 0));
}
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
@@ -1382,36 +1492,23 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
F.BaseOffset, F.HasBaseReg, F.Scale);
}
-static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
- const Formula &F) {
- // If F is used as an Addressing Mode, it may fold one Base plus one
- // scaled register. If the scaled register is nil, do as if another
- // element of the base regs is a 1-scaled register.
- // This is possible if BaseRegs has at least 2 registers.
-
- // If this is not an address calculation, this is not an addressing mode
- // use.
- if (LU.Kind != LSRUse::Address)
- return false;
-
- // F is already scaled.
- if (F.Scale != 0)
- return false;
-
- // We need to keep one register for the base and one to scale.
- if (F.BaseRegs.size() < 2)
- return false;
-
- return isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
- F.BaseGV, F.BaseOffset, F.HasBaseReg, 1);
- }
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ const LSRUse &LU, const Formula &F) {
+ return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
+ F.Scale);
+}
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F) {
if (!F.Scale)
return 0;
- assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
- LU.AccessTy, F) && "Illegal formula in use.");
+
+ // If the use is not completely folded in that instruction, we will have to
+ // pay an extra cost only for scale != 1.
+ if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, F))
+ return F.Scale != 1;
switch (LU.Kind) {
case LSRUse::Address: {
@@ -1430,12 +1527,10 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
}
case LSRUse::ICmpZero:
- // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg.
- // Therefore, return 0 in case F.Scale == -1.
- return F.Scale != -1;
-
case LSRUse::Basic:
case LSRUse::Special:
+ // The use is completely folded, i.e., everything is folded into the
+ // instruction.
return 0;
}
@@ -1460,7 +1555,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
HasBaseReg = true;
}
- return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
+ return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
+ HasBaseReg, Scale);
}
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
@@ -1485,8 +1581,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
// base and a scale.
int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
- return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
- BaseOffset, HasBaseReg, Scale);
+ return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
+ BaseOffset, HasBaseReg, Scale);
}
namespace {
@@ -1515,7 +1611,7 @@ struct IVChain {
SmallVector<IVInc,1> Incs;
const SCEV *ExprBase;
- IVChain() : ExprBase(0) {}
+ IVChain() : ExprBase(nullptr) {}
IVChain(const IVInc &Head, const SCEV *Base)
: Incs(1, Head), ExprBase(Base) {}
@@ -1642,8 +1738,19 @@ class LSRInstance {
void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
unsigned Depth = 0);
+
+ void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base, unsigned Depth,
+ size_t Idx, bool IsScaledReg = false);
void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base, size_t Idx,
+ bool IsScaledReg = false);
void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base,
+ const SmallVectorImpl<int64_t> &Worklist,
+ size_t Idx, bool IsScaledReg = false);
void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
@@ -1721,7 +1828,7 @@ void LSRInstance::OptimizeShadowIV() {
IVUsers::const_iterator CandidateUI = UI;
++UI;
Instruction *ShadowUse = CandidateUI->getUser();
- Type *DestTy = 0;
+ Type *DestTy = nullptr;
bool IsSigned = false;
/* If shadow use is a int->float cast then insert a second IV
@@ -1783,7 +1890,7 @@ void LSRInstance::OptimizeShadowIV() {
continue;
/* Initialize new IV, double d = 0.0 in above example. */
- ConstantInt *C = 0;
+ ConstantInt *C = nullptr;
if (Incr->getOperand(0) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(1));
else if (Incr->getOperand(1) == PH)
@@ -1905,7 +2012,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
// for ICMP_ULE here because the comparison would be with zero, which
// isn't interesting.
CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
- const SCEVNAryExpr *Max = 0;
+ const SCEVNAryExpr *Max = nullptr;
if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
Pred = ICmpInst::ICMP_SLE;
Max = S;
@@ -1948,7 +2055,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
- Value *NewRHS = 0;
+ Value *NewRHS = nullptr;
if (ICmpInst::isTrueWhenEqual(Pred)) {
// Look for n+1, and grab n.
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
@@ -2018,7 +2125,7 @@ LSRInstance::OptimizeLoopTermCond() {
continue;
// Search IVUsesByStride to find Cond's IVUse if there is one.
- IVStrideUse *CondUse = 0;
+ IVStrideUse *CondUse = nullptr;
ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
if (!FindIVUserForCond(Cond, CondUse))
continue;
@@ -2071,12 +2178,12 @@ LSRInstance::OptimizeLoopTermCond() {
// Check for possible scaled-address reuse.
Type *AccessTy = getAccessType(UI->getUser());
int64_t Scale = C->getSExtValue();
- if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+ if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
/*BaseOffset=*/ 0,
/*HasBaseReg=*/ false, Scale))
goto decline_post_inc;
Scale = -Scale;
- if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+ if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
/*BaseOffset=*/ 0,
/*HasBaseReg=*/ false, Scale))
goto decline_post_inc;
@@ -2146,23 +2253,25 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
// the uses will have all its uses outside the loop, for example.
if (LU.Kind != Kind)
return false;
+
+ // Check for a mismatched access type, and fall back conservatively as needed.
+ // TODO: Be less conservative when the type is similar and can use the same
+ // addressing modes.
+ if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
+ NewAccessTy = Type::getVoidTy(AccessTy->getContext());
+
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
- if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
LU.MaxOffset - NewOffset, HasBaseReg))
return false;
NewMinOffset = NewOffset;
} else if (NewOffset > LU.MaxOffset) {
- if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
NewOffset - LU.MinOffset, HasBaseReg))
return false;
NewMaxOffset = NewOffset;
}
- // Check for a mismatched access type, and fall back conservatively as needed.
- // TODO: Be less conservative when the type is similar and can use the same
- // addressing modes.
- if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
- NewAccessTy = Type::getVoidTy(AccessTy->getContext());
// Update the use.
LU.MinOffset = NewMinOffset;
@@ -2183,7 +2292,7 @@ LSRInstance::getUse(const SCEV *&Expr,
int64_t Offset = ExtractImmediate(Expr, SE);
// Basic uses can't accept any offset, for example.
- if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,
Offset, /*HasBaseReg=*/ true)) {
Expr = Copy;
Offset = 0;
@@ -2267,7 +2376,7 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
}
// Nothing looked good.
- return 0;
+ return nullptr;
}
void LSRInstance::CollectInterestingTypesAndFactors() {
@@ -2385,7 +2494,7 @@ static const SCEV *getExprBase(const SCEV *S) {
default: // uncluding scUnknown.
return S;
case scConstant:
- return 0;
+ return nullptr;
case scTruncate:
return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
case scZeroExtend:
@@ -2476,7 +2585,7 @@ isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
&& SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
--cost;
}
- const SCEV *LastIncExpr = 0;
+ const SCEV *LastIncExpr = nullptr;
unsigned NumConstIncrements = 0;
unsigned NumVarIncrements = 0;
unsigned NumReusedIncrements = 0;
@@ -2535,7 +2644,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
// Visit all existing chains. Check if its IVOper can be computed as a
// profitable loop invariant increment from the last link in the Chain.
unsigned ChainIdx = 0, NChains = IVChainVec.size();
- const SCEV *LastIncExpr = 0;
+ const SCEV *LastIncExpr = nullptr;
for (; ChainIdx < NChains; ++ChainIdx) {
IVChain &Chain = IVChainVec[ChainIdx];
@@ -2755,7 +2864,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
int64_t IncOffset = IncConst->getValue()->getSExtValue();
if (!isAlwaysFoldable(TTI, LSRUse::Address,
- getAccessType(UserInst), /*BaseGV=*/ 0,
+ getAccessType(UserInst), /*BaseGV=*/ nullptr,
IncOffset, /*HaseBaseReg=*/ false))
return false;
@@ -2773,7 +2882,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
// findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
IVOpEnd, L, SE);
- Value *IVSrc = 0;
+ Value *IVSrc = nullptr;
while (IVOpIter != IVOpEnd) {
IVSrc = getWideOperand(*IVOpIter);
@@ -2800,7 +2909,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
Type *IVTy = IVSrc->getType();
Type *IntTy = SE.getEffectiveSCEVType(IVTy);
- const SCEV *LeftOverExpr = 0;
+ const SCEV *LeftOverExpr = nullptr;
for (IVChain::const_iterator IncI = Chain.begin(),
IncE = Chain.end(); IncI != IncE; ++IncI) {
@@ -2831,7 +2940,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
TTI)) {
assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
IVSrc = IVOper;
- LeftOverExpr = 0;
+ LeftOverExpr = nullptr;
}
}
Type *OperTy = IncI->IVOperand->getType();
@@ -2886,7 +2995,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LF.PostIncLoops = UI->getPostIncLoops();
LSRUse::KindType Kind = LSRUse::Basic;
- Type *AccessTy = 0;
+ Type *AccessTy = nullptr;
if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
Kind = LSRUse::Address;
AccessTy = getAccessType(LF.UserInst);
@@ -2917,7 +3026,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
- N = TransformForPostIncUse(Normalize, N, CI, 0,
+ N = TransformForPostIncUse(Normalize, N, CI, nullptr,
LF.PostIncLoops, SE, DT);
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
@@ -2992,6 +3101,9 @@ void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
+ // Do not insert formula that we will not be able to expand.
+ assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
+ "Formula is illegal");
if (!LU.InsertFormula(F))
return false;
@@ -3068,7 +3180,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
LSRFixup &LF = getNewFixup();
LF.UserInst = const_cast<Instruction *>(UserInst);
LF.OperandValToReplace = U;
- std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0);
+ std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, nullptr);
LF.LUIdx = P.first;
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
@@ -3107,7 +3219,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
if (Remainder)
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
}
- return 0;
+ return nullptr;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
if (AR->getStart()->isZero())
@@ -3119,7 +3231,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
// does not pertain to this loop.
if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
- Remainder = 0;
+ Remainder = nullptr;
}
if (Remainder != AR->getStart()) {
if (!Remainder)
@@ -3141,90 +3253,110 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
if (Remainder)
Ops.push_back(SE.getMulExpr(C, Remainder));
- return 0;
+ return nullptr;
}
}
return S;
}
-/// GenerateReassociations - Split out subexpressions from adds and the bases of
-/// addrecs.
-void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
- Formula Base,
- unsigned Depth) {
- // Arbitrarily cap recursion to protect compile time.
- if (Depth >= 3) return;
-
- for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
- const SCEV *BaseReg = Base.BaseRegs[i];
+/// \brief Helper function for LSRInstance::GenerateReassociations.
+void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base,
+ unsigned Depth, size_t Idx,
+ bool IsScaledReg) {
+ const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
+ SmallVector<const SCEV *, 8> AddOps;
+ const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
+ if (Remainder)
+ AddOps.push_back(Remainder);
+
+ if (AddOps.size() == 1)
+ return;
- SmallVector<const SCEV *, 8> AddOps;
- const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE);
- if (Remainder)
- AddOps.push_back(Remainder);
+ for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
+ JE = AddOps.end();
+ J != JE; ++J) {
- if (AddOps.size() == 1) continue;
+ // Loop-variant "unknown" values are uninteresting; we won't be able to
+ // do anything meaningful with them.
+ if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
+ continue;
- for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
- JE = AddOps.end(); J != JE; ++J) {
+ // Don't pull a constant into a register if the constant could be folded
+ // into an immediate field.
+ if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, *J, Base.getNumRegs() > 1))
+ continue;
- // Loop-variant "unknown" values are uninteresting; we won't be able to
- // do anything meaningful with them.
- if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
- continue;
+ // Collect all operands except *J.
+ SmallVector<const SCEV *, 8> InnerAddOps(
+ ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+ InnerAddOps.append(std::next(J),
+ ((const SmallVector<const SCEV *, 8> &)AddOps).end());
+
+ // Don't leave just a constant behind in a register if the constant could
+ // be folded into an immediate field.
+ if (InnerAddOps.size() == 1 &&
+ isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
+ continue;
- // Don't pull a constant into a register if the constant could be folded
- // into an immediate field.
- if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
- LU.AccessTy, *J, Base.getNumRegs() > 1))
- continue;
+ const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
+ if (InnerSum->isZero())
+ continue;
+ Formula F = Base;
- // Collect all operands except *J.
- SmallVector<const SCEV *, 8> InnerAddOps(
- ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
- InnerAddOps.append(std::next(J),
- ((const SmallVector<const SCEV *, 8> &)AddOps).end());
-
- // Don't leave just a constant behind in a register if the constant could
- // be folded into an immediate field.
- if (InnerAddOps.size() == 1 &&
- isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
- LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
- continue;
+ // Add the remaining pieces of the add back into the new formula.
+ const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
+ if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
+ TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ InnerSumSC->getValue()->getZExtValue())) {
+ F.UnfoldedOffset =
+ (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue();
+ if (IsScaledReg)
+ F.ScaledReg = nullptr;
+ else
+ F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
+ } else if (IsScaledReg)
+ F.ScaledReg = InnerSum;
+ else
+ F.BaseRegs[Idx] = InnerSum;
+
+ // Add J as its own register, or an unfolded immediate.
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
+ if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
+ TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ SC->getValue()->getZExtValue()))
+ F.UnfoldedOffset =
+ (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue();
+ else
+ F.BaseRegs.push_back(*J);
+ // We may have changed the number of register in base regs, adjust the
+ // formula accordingly.
+ F.Canonicalize();
+
+ if (InsertFormula(LU, LUIdx, F))
+ // If that formula hadn't been seen before, recurse to find more like
+ // it.
+ GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1);
+ }
+}
- const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
- if (InnerSum->isZero())
- continue;
- Formula F = Base;
+/// GenerateReassociations - Split out subexpressions from adds and the bases of
+/// addrecs.
+void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
+ Formula Base, unsigned Depth) {
+ assert(Base.isCanonical() && "Input must be in the canonical form");
+ // Arbitrarily cap recursion to protect compile time.
+ if (Depth >= 3)
+ return;
- // Add the remaining pieces of the add back into the new formula.
- const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
- if (InnerSumSC &&
- SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
- TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
- InnerSumSC->getValue()->getZExtValue())) {
- F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
- InnerSumSC->getValue()->getZExtValue();
- F.BaseRegs.erase(F.BaseRegs.begin() + i);
- } else
- F.BaseRegs[i] = InnerSum;
-
- // Add J as its own register, or an unfolded immediate.
- const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
- if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
- TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
- SC->getValue()->getZExtValue()))
- F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
- SC->getValue()->getZExtValue();
- else
- F.BaseRegs.push_back(*J);
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+ GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
- if (InsertFormula(LU, LUIdx, F))
- // If that formula hadn't been seen before, recurse to find more like
- // it.
- GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1);
- }
- }
+ if (Base.Scale == 1)
+ GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
+ /* Idx */ -1, /* IsScaledReg */ true);
}
/// GenerateCombinations - Generate a formula consisting of all of the
@@ -3232,8 +3364,12 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// This method is only interesting on a plurality of registers.
- if (Base.BaseRegs.size() <= 1) return;
+ if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1)
+ return;
+ // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
+ // processing the formula.
+ Base.Unscale();
Formula F = Base;
F.BaseRegs.clear();
SmallVector<const SCEV *, 4> Ops;
@@ -3253,29 +3389,87 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
// rather than proceed with zero in a register.
if (!Sum->isZero()) {
F.BaseRegs.push_back(Sum);
+ F.Canonicalize();
(void)InsertFormula(LU, LUIdx, F);
}
}
}
+/// \brief Helper function for LSRInstance::GenerateSymbolicOffsets.
+void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base, size_t Idx,
+ bool IsScaledReg) {
+ const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
+ GlobalValue *GV = ExtractSymbol(G, SE);
+ if (G->isZero() || !GV)
+ return;
+ Formula F = Base;
+ F.BaseGV = GV;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
+ return;
+ if (IsScaledReg)
+ F.ScaledReg = G;
+ else
+ F.BaseRegs[Idx] = G;
+ (void)InsertFormula(LU, LUIdx, F);
+}
+
/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// We can't add a symbolic offset if the address already contains one.
if (Base.BaseGV) return;
- for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
- const SCEV *G = Base.BaseRegs[i];
- GlobalValue *GV = ExtractSymbol(G, SE);
- if (G->isZero() || !GV)
- continue;
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+ GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
+ if (Base.Scale == 1)
+ GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1,
+ /* IsScaledReg */ true);
+}
+
+/// \brief Helper function for LSRInstance::GenerateConstantOffsets.
+void LSRInstance::GenerateConstantOffsetsImpl(
+ LSRUse &LU, unsigned LUIdx, const Formula &Base,
+ const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
+ const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
+ for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
+ E = Worklist.end();
+ I != E; ++I) {
Formula F = Base;
- F.BaseGV = GV;
- if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
- continue;
- F.BaseRegs[i] = G;
- (void)InsertFormula(LU, LUIdx, F);
+ F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
+ if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
+ LU.AccessTy, F)) {
+ // Add the offset to the base register.
+ const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
+ // If it cancelled out, drop the base register, otherwise update it.
+ if (NewG->isZero()) {
+ if (IsScaledReg) {
+ F.Scale = 0;
+ F.ScaledReg = nullptr;
+ } else
+ F.DeleteBaseReg(F.BaseRegs[Idx]);
+ F.Canonicalize();
+ } else if (IsScaledReg)
+ F.ScaledReg = NewG;
+ else
+ F.BaseRegs[Idx] = NewG;
+
+ (void)InsertFormula(LU, LUIdx, F);
+ }
}
+
+ int64_t Imm = ExtractImmediate(G, SE);
+ if (G->isZero() || Imm == 0)
+ return;
+ Formula F = Base;
+ F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
+ return;
+ if (IsScaledReg)
+ F.ScaledReg = G;
+ else
+ F.BaseRegs[Idx] = G;
+ (void)InsertFormula(LU, LUIdx, F);
}
/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
@@ -3288,38 +3482,11 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
if (LU.MaxOffset != LU.MinOffset)
Worklist.push_back(LU.MaxOffset);
- for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
- const SCEV *G = Base.BaseRegs[i];
-
- for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
- E = Worklist.end(); I != E; ++I) {
- Formula F = Base;
- F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
- if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
- LU.AccessTy, F)) {
- // Add the offset to the base register.
- const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
- // If it cancelled out, drop the base register, otherwise update it.
- if (NewG->isZero()) {
- std::swap(F.BaseRegs[i], F.BaseRegs.back());
- F.BaseRegs.pop_back();
- } else
- F.BaseRegs[i] = NewG;
-
- (void)InsertFormula(LU, LUIdx, F);
- }
- }
-
- int64_t Imm = ExtractImmediate(G, SE);
- if (G->isZero() || Imm == 0)
- continue;
- Formula F = Base;
- F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
- if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
- continue;
- F.BaseRegs[i] = G;
- (void)InsertFormula(LU, LUIdx, F);
- }
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+ GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
+ if (Base.Scale == 1)
+ GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1,
+ /* IsScaledReg */ true);
}
/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
@@ -3419,7 +3586,11 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
if (!IntTy) return;
// If this Formula already has a scaled register, we can't add another one.
- if (Base.Scale != 0) return;
+ // Try to unscale the formula to generate a better scale.
+ if (Base.Scale != 0 && !Base.Unscale())
+ return;
+
+ assert(Base.Scale == 0 && "Unscale did not did its job!");
// Check each interesting stride.
for (SmallSetVector<int64_t, 8>::const_iterator
@@ -3460,6 +3631,11 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
Formula F = Base;
F.ScaledReg = Quotient;
F.DeleteBaseReg(F.BaseRegs[i]);
+ // The canonical representation of 1*reg is reg, which is already in
+ // Base. In that case, do not try to insert the formula, it will be
+ // rejected anyway.
+ if (F.Scale == 1 && F.BaseRegs.empty())
+ continue;
(void)InsertFormula(LU, LUIdx, F);
}
}
@@ -3624,7 +3800,12 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// TODO: Use a more targeted data structure.
for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
- const Formula &F = LU.Formulae[L];
+ Formula F = LU.Formulae[L];
+ // FIXME: The code for the scaled and unscaled registers looks
+ // very similar but slightly different. Investigate if they
+ // could be merged. That way, we would not have to unscale the
+ // Formula.
+ F.Unscale();
// Use the immediate in the scaled register.
if (F.ScaledReg == OrigReg) {
int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
@@ -3650,6 +3831,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
continue;
// OK, looks good.
+ NewF.Canonicalize();
(void)InsertFormula(LU, LUIdx, NewF);
} else {
// Use the immediate in a base register.
@@ -3683,6 +3865,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
goto skip_formula;
// Ok, looks good.
+ NewF.Canonicalize();
(void)InsertFormula(LU, LUIdx, NewF);
break;
skip_formula:;
@@ -3936,7 +4119,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
- if (F.BaseOffset == 0 || F.Scale != 0)
+ if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
continue;
LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
@@ -4033,7 +4216,7 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
// Pick the register which is used by the most LSRUses, which is likely
// to be a good reuse register candidate.
- const SCEV *Best = 0;
+ const SCEV *Best = nullptr;
unsigned BestNum = 0;
for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
I != E; ++I) {
@@ -4130,19 +4313,22 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
- // Ignore formulae which do not use any of the required registers.
- bool SatisfiedReqReg = true;
+ // Ignore formulae which may not be ideal in terms of register reuse of
+ // ReqRegs. The formula should use all required registers before
+ // introducing new ones.
+ int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
JE = ReqRegs.end(); J != JE; ++J) {
const SCEV *Reg = *J;
- if ((!F.ScaledReg || F.ScaledReg != Reg) &&
- std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
+ if ((F.ScaledReg && F.ScaledReg == Reg) ||
+ std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) !=
F.BaseRegs.end()) {
- SatisfiedReqReg = false;
- break;
+ --NumReqRegsToFind;
+ if (NumReqRegsToFind == 0)
+ break;
}
}
- if (!SatisfiedReqReg) {
+ if (NumReqRegsToFind != 0) {
// If none of the formulae satisfied the required registers, then we could
// clear ReqRegs and try again. Currently, we simply give up in this case.
continue;
@@ -4240,7 +4426,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
}
bool AllDominate = true;
- Instruction *BetterPos = 0;
+ Instruction *BetterPos = nullptr;
Instruction *Tentative = IDom->getTerminator();
for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
E = Inputs.end(); I != E; ++I) {
@@ -4379,11 +4565,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
LF.UserInst, LF.OperandValToReplace,
Loops, SE, DT);
- Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
+ Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, IP)));
}
// Expand the ScaledReg portion.
- Value *ICmpScaledV = 0;
+ Value *ICmpScaledV = nullptr;
if (F.Scale != 0) {
const SCEV *ScaledS = F.ScaledReg;
@@ -4394,25 +4580,34 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
Loops, SE, DT);
if (LU.Kind == LSRUse::ICmpZero) {
- // An interesting way of "folding" with an icmp is to use a negated
- // scale, which we'll implement by inserting it into the other operand
- // of the icmp.
- assert(F.Scale == -1 &&
- "The only scale supported by ICmpZero uses is -1!");
- ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
+ // Expand ScaleReg as if it was part of the base regs.
+ if (F.Scale == 1)
+ Ops.push_back(
+ SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)));
+ else {
+ // An interesting way of "folding" with an icmp is to use a negated
+ // scale, which we'll implement by inserting it into the other operand
+ // of the icmp.
+ assert(F.Scale == -1 &&
+ "The only scale supported by ICmpZero uses is -1!");
+ ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP);
+ }
} else {
// Otherwise just expand the scaled register and an explicit scale,
// which is expected to be matched as part of the address.
// Flush the operand list to suppress SCEVExpander hoisting address modes.
- if (!Ops.empty() && LU.Kind == LSRUse::Address) {
+ // Unless the addressing mode will not be folded.
+ if (!Ops.empty() && LU.Kind == LSRUse::Address &&
+ isAMCompletelyFolded(TTI, LU, F)) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
- ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
- ScaledS = SE.getMulExpr(ScaledS,
- SE.getConstant(ScaledS->getType(), F.Scale));
+ ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP));
+ if (F.Scale != 1)
+ ScaledS =
+ SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
Ops.push_back(ScaledS);
}
}
@@ -4490,7 +4685,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
}
CI->setOperand(1, ICmpScaledV);
} else {
- assert(F.Scale == 0 &&
+ // A scale of 1 means that the scale has been expanded as part of the
+ // base regs.
+ assert((F.Scale == 0 || F.Scale == 1) &&
"ICmp does not support folding a global value and "
"a scale at the same time!");
Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
@@ -4531,7 +4728,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
Loop *PNLoop = LI.getLoopFor(Parent);
if (!PNLoop || Parent != PNLoop->getHeader()) {
// Split the critical edge.
- BasicBlock *NewBB = 0;
+ BasicBlock *NewBB = nullptr;
if (!Parent->isLandingPad()) {
NewBB = SplitCriticalEdge(BB, Parent, P,
/*MergeIdenticalEdges=*/true,
@@ -4560,7 +4757,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
}
std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
- Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
+ Inserted.insert(std::make_pair(BB, static_cast<Value *>(nullptr)));
if (!Pair.second)
PN->setIncomingValue(i, Pair.first->second);
else {
@@ -4670,7 +4867,7 @@ LSRInstance::LSRInstance(Loop *L, Pass *P)
DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()),
LI(P->getAnalysis<LoopInfo>()),
TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
- IVIncInsertPos(0) {
+ IVIncInsertPos(nullptr) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
return;