aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp4
-rw-r--r--lib/Transforms/Scalar/GVN.cpp209
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp311
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp109
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp287
-rw-r--r--lib/Transforms/Scalar/ObjCARC.cpp731
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp10
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp4
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp6
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp25
11 files changed, 792 insertions, 908 deletions
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 020ec57..9a5423f 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -567,8 +567,8 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
// happens.
WeakVH IterHandle(CurInstIterator);
- ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0,
- TLInfo, ModifiedDT ? 0 : DT);
+ replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getTargetData() : 0,
+ TLInfo, ModifiedDT ? 0 : DT);
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index ac80c48..fb733ad 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1974,109 +1974,119 @@ unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
/// 'RHS' everywhere in the scope. Returns whether a change was made.
bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
- if (LHS == RHS) return false;
- assert(LHS->getType() == RHS->getType() && "Equal but types differ!");
+ SmallVector<std::pair<Value*, Value*>, 4> Worklist;
+ Worklist.push_back(std::make_pair(LHS, RHS));
+ bool Changed = false;
- // Don't try to propagate equalities between constants.
- if (isa<Constant>(LHS) && isa<Constant>(RHS))
- return false;
+ while (!Worklist.empty()) {
+ std::pair<Value*, Value*> Item = Worklist.pop_back_val();
+ LHS = Item.first; RHS = Item.second;
+
+ if (LHS == RHS) continue;
+ assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
+
+ // Don't try to propagate equalities between constants.
+ if (isa<Constant>(LHS) && isa<Constant>(RHS)) continue;
- // Prefer a constant on the right-hand side, or an Argument if no constants.
- if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS)))
- std::swap(LHS, RHS);
- assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!");
-
- // If there is no obvious reason to prefer the left-hand side over the right-
- // hand side, ensure the longest lived term is on the right-hand side, so the
- // shortest lived term will be replaced by the longest lived. This tends to
- // expose more simplifications.
- uint32_t LVN = VN.lookup_or_add(LHS);
- if ((isa<Argument>(LHS) && isa<Argument>(RHS)) ||
- (isa<Instruction>(LHS) && isa<Instruction>(RHS))) {
- // Move the 'oldest' value to the right-hand side, using the value number as
- // a proxy for age.
- uint32_t RVN = VN.lookup_or_add(RHS);
- if (LVN < RVN) {
+ // Prefer a constant on the right-hand side, or an Argument if no constants.
+ if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS)))
std::swap(LHS, RHS);
- LVN = RVN;
+ assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!");
+
+ // If there is no obvious reason to prefer the left-hand side over the right-
+ // hand side, ensure the longest lived term is on the right-hand side, so the
+ // shortest lived term will be replaced by the longest lived. This tends to
+ // expose more simplifications.
+ uint32_t LVN = VN.lookup_or_add(LHS);
+ if ((isa<Argument>(LHS) && isa<Argument>(RHS)) ||
+ (isa<Instruction>(LHS) && isa<Instruction>(RHS))) {
+ // Move the 'oldest' value to the right-hand side, using the value number as
+ // a proxy for age.
+ uint32_t RVN = VN.lookup_or_add(RHS);
+ if (LVN < RVN) {
+ std::swap(LHS, RHS);
+ LVN = RVN;
+ }
+ }
+ assert((!isa<Instruction>(RHS) ||
+ DT->properlyDominates(cast<Instruction>(RHS)->getParent(), Root)) &&
+ "Instruction doesn't dominate scope!");
+
+ // If value numbering later deduces that an instruction in the scope is equal
+ // to 'LHS' then ensure it will be turned into 'RHS'.
+ addToLeaderTable(LVN, RHS, Root);
+
+ // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
+ // LHS always has at least one use that is not dominated by Root, this will
+ // never do anything if LHS has only one use.
+ if (!LHS->hasOneUse()) {
+ unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root);
+ Changed |= NumReplacements > 0;
+ NumGVNEqProp += NumReplacements;
}
- }
-
- // If value numbering later deduces that an instruction in the scope is equal
- // to 'LHS' then ensure it will be turned into 'RHS'.
- addToLeaderTable(LVN, RHS, Root);
- // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
- // LHS always has at least one use that is not dominated by Root, this will
- // never do anything if LHS has only one use.
- bool Changed = false;
- if (!LHS->hasOneUse()) {
- unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root);
- Changed |= NumReplacements > 0;
- NumGVNEqProp += NumReplacements;
- }
-
- // Now try to deduce additional equalities from this one. For example, if the
- // known equality was "(A != B)" == "false" then it follows that A and B are
- // equal in the scope. Only boolean equalities with an explicit true or false
- // RHS are currently supported.
- if (!RHS->getType()->isIntegerTy(1))
- // Not a boolean equality - bail out.
- return Changed;
- ConstantInt *CI = dyn_cast<ConstantInt>(RHS);
- if (!CI)
- // RHS neither 'true' nor 'false' - bail out.
- return Changed;
- // Whether RHS equals 'true'. Otherwise it equals 'false'.
- bool isKnownTrue = CI->isAllOnesValue();
- bool isKnownFalse = !isKnownTrue;
-
- // If "A && B" is known true then both A and B are known true. If "A || B"
- // is known false then both A and B are known false.
- Value *A, *B;
- if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) ||
- (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) {
- Changed |= propagateEquality(A, RHS, Root);
- Changed |= propagateEquality(B, RHS, Root);
- return Changed;
- }
+ // Now try to deduce additional equalities from this one. For example, if the
+ // known equality was "(A != B)" == "false" then it follows that A and B are
+ // equal in the scope. Only boolean equalities with an explicit true or false
+ // RHS are currently supported.
+ if (!RHS->getType()->isIntegerTy(1))
+ // Not a boolean equality - bail out.
+ continue;
+ ConstantInt *CI = dyn_cast<ConstantInt>(RHS);
+ if (!CI)
+ // RHS neither 'true' nor 'false' - bail out.
+ continue;
+ // Whether RHS equals 'true'. Otherwise it equals 'false'.
+ bool isKnownTrue = CI->isAllOnesValue();
+ bool isKnownFalse = !isKnownTrue;
+
+ // If "A && B" is known true then both A and B are known true. If "A || B"
+ // is known false then both A and B are known false.
+ Value *A, *B;
+ if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) ||
+ (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) {
+ Worklist.push_back(std::make_pair(A, RHS));
+ Worklist.push_back(std::make_pair(B, RHS));
+ continue;
+ }
- // If we are propagating an equality like "(A == B)" == "true" then also
- // propagate the equality A == B. When propagating a comparison such as
- // "(A >= B)" == "true", replace all instances of "A < B" with "false".
- if (ICmpInst *Cmp = dyn_cast<ICmpInst>(LHS)) {
- Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
-
- // If "A == B" is known true, or "A != B" is known false, then replace
- // A with B everywhere in the scope.
- if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) ||
- (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE))
- Changed |= propagateEquality(Op0, Op1, Root);
-
- // If "A >= B" is known true, replace "A < B" with false everywhere.
- CmpInst::Predicate NotPred = Cmp->getInversePredicate();
- Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse);
- // Since we don't have the instruction "A < B" immediately to hand, work out
- // the value number that it would have and use that to find an appropriate
- // instruction (if any).
- uint32_t NextNum = VN.getNextUnusedValueNumber();
- uint32_t Num = VN.lookup_or_add_cmp(Cmp->getOpcode(), NotPred, Op0, Op1);
- // If the number we were assigned was brand new then there is no point in
- // looking for an instruction realizing it: there cannot be one!
- if (Num < NextNum) {
- Value *NotCmp = findLeader(Root, Num);
- if (NotCmp && isa<Instruction>(NotCmp)) {
- unsigned NumReplacements =
- replaceAllDominatedUsesWith(NotCmp, NotVal, Root);
- Changed |= NumReplacements > 0;
- NumGVNEqProp += NumReplacements;
+ // If we are propagating an equality like "(A == B)" == "true" then also
+ // propagate the equality A == B. When propagating a comparison such as
+ // "(A >= B)" == "true", replace all instances of "A < B" with "false".
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(LHS)) {
+ Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
+
+ // If "A == B" is known true, or "A != B" is known false, then replace
+ // A with B everywhere in the scope.
+ if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) ||
+ (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE))
+ Worklist.push_back(std::make_pair(Op0, Op1));
+
+ // If "A >= B" is known true, replace "A < B" with false everywhere.
+ CmpInst::Predicate NotPred = Cmp->getInversePredicate();
+ Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse);
+ // Since we don't have the instruction "A < B" immediately to hand, work out
+ // the value number that it would have and use that to find an appropriate
+ // instruction (if any).
+ uint32_t NextNum = VN.getNextUnusedValueNumber();
+ uint32_t Num = VN.lookup_or_add_cmp(Cmp->getOpcode(), NotPred, Op0, Op1);
+ // If the number we were assigned was brand new then there is no point in
+ // looking for an instruction realizing it: there cannot be one!
+ if (Num < NextNum) {
+ Value *NotCmp = findLeader(Root, Num);
+ if (NotCmp && isa<Instruction>(NotCmp)) {
+ unsigned NumReplacements =
+ replaceAllDominatedUsesWith(NotCmp, NotVal, Root);
+ Changed |= NumReplacements > 0;
+ NumGVNEqProp += NumReplacements;
+ }
}
- }
- // Ensure that any instruction in scope that gets the "A < B" value number
- // is replaced with false.
- addToLeaderTable(Num, NotVal, Root);
+ // Ensure that any instruction in scope that gets the "A < B" value number
+ // is replaced with false.
+ addToLeaderTable(Num, NotVal, Root);
- return Changed;
+ continue;
+ }
}
return Changed;
@@ -2325,7 +2335,14 @@ bool GVN::performPRE(Function &F) {
CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
isa<DbgInfoIntrinsic>(CurInst))
continue;
-
+
+ // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
+ // sinking the compare again, and it would force the code generator to
+ // move the i1 from processor flags or predicate registers into a general
+ // purpose register.
+ if (isa<CmpInst>(CurInst))
+ continue;
+
// We don't currently value number ANY inline asm calls.
if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
if (CallI->isInlineAsm())
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 490617a..a9ba657 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -33,7 +33,6 @@
#include "llvm/LLVMContext.h"
#include "llvm/Type.h"
#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -50,18 +49,12 @@
#include "llvm/ADT/Statistic.h"
using namespace llvm;
-STATISTIC(NumRemoved , "Number of aux indvars removed");
STATISTIC(NumWidened , "Number of indvars widened");
-STATISTIC(NumInserted , "Number of canonical indvars added");
STATISTIC(NumReplaced , "Number of exit values replaced");
STATISTIC(NumLFTR , "Number of loop exit tests replaced");
STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
-static cl::opt<bool> EnableIVRewrite(
- "enable-iv-rewrite", cl::Hidden,
- cl::desc("Enable canonical induction variable rewriting"));
-
// Trip count verification can be enabled by default under NDEBUG if we
// implement a strong expression equivalence checker in SCEV. Until then, we
// use the verify-indvars flag, which may assert in some cases.
@@ -71,7 +64,6 @@ static cl::opt<bool> VerifyIndvars(
namespace {
class IndVarSimplify : public LoopPass {
- IVUsers *IU;
LoopInfo *LI;
ScalarEvolution *SE;
DominatorTree *DT;
@@ -82,7 +74,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0),
+ IndVarSimplify() : LoopPass(ID), LI(0), SE(0), DT(0), TD(0),
Changed(false) {
initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
}
@@ -95,13 +87,9 @@ namespace {
AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
- if (EnableIVRewrite)
- AU.addRequired<IVUsers>();
AU.addPreserved<ScalarEvolution>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
- if (EnableIVRewrite)
- AU.addPreserved<IVUsers>();
AU.setPreservesCFG();
}
@@ -119,8 +107,6 @@ namespace {
void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
- void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
-
Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
PHINode *IndVar, SCEVExpander &Rewriter);
@@ -136,7 +122,6 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(IVUsers)
INITIALIZE_PASS_END(IndVarSimplify, "indvars",
"Induction Variable Simplification", false, false)
@@ -448,13 +433,6 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
PN->replaceAllUsesWith(Conv);
RecursivelyDeleteTriviallyDeadInstructions(PN);
}
-
- // Add a new IVUsers entry for the newly-created integer PHI.
- if (IU) {
- SmallPtrSet<Loop*, 16> SimplifiedLoopNests;
- IU->AddUsersIfInteresting(NewPHI, SimplifiedLoopNests);
- }
-
Changed = true;
}
@@ -600,124 +578,6 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
}
//===----------------------------------------------------------------------===//
-// Rewrite IV users based on a canonical IV.
-// Only for use with -enable-iv-rewrite.
-//===----------------------------------------------------------------------===//
-
-/// FIXME: It is an extremely bad idea to indvar substitute anything more
-/// complex than affine induction variables. Doing so will put expensive
-/// polynomial evaluations inside of the loop, and the str reduction pass
-/// currently can only reduce affine polynomials. For now just disable
-/// indvar subst on anything more complex than an affine addrec, unless
-/// it can be expanded to a trivial value.
-static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
- // Loop-invariant values are safe.
- if (SE->isLoopInvariant(S, L)) return true;
-
- // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
- // to transform them into efficient code.
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
- return AR->isAffine();
-
- // An add is safe it all its operands are safe.
- if (const SCEVCommutativeExpr *Commutative
- = dyn_cast<SCEVCommutativeExpr>(S)) {
- for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
- E = Commutative->op_end(); I != E; ++I)
- if (!isSafe(*I, L, SE)) return false;
- return true;
- }
-
- // A cast is safe if its operand is.
- if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
- return isSafe(C->getOperand(), L, SE);
-
- // A udiv is safe if its operands are.
- if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
- return isSafe(UD->getLHS(), L, SE) &&
- isSafe(UD->getRHS(), L, SE);
-
- // SCEVUnknown is always safe.
- if (isa<SCEVUnknown>(S))
- return true;
-
- // Nothing else is safe.
- return false;
-}
-
-void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
- // Rewrite all induction variable expressions in terms of the canonical
- // induction variable.
- //
- // If there were induction variables of other sizes or offsets, manually
- // add the offsets to the primary induction variable and cast, avoiding
- // the need for the code evaluation methods to insert induction variables
- // of different sizes.
- for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
- Value *Op = UI->getOperandValToReplace();
- Type *UseTy = Op->getType();
- Instruction *User = UI->getUser();
-
- // Compute the final addrec to expand into code.
- const SCEV *AR = IU->getReplacementExpr(*UI);
-
- // Evaluate the expression out of the loop, if possible.
- if (!L->contains(UI->getUser())) {
- const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
- if (SE->isLoopInvariant(ExitVal, L))
- AR = ExitVal;
- }
-
- // FIXME: It is an extremely bad idea to indvar substitute anything more
- // complex than affine induction variables. Doing so will put expensive
- // polynomial evaluations inside of the loop, and the str reduction pass
- // currently can only reduce affine polynomials. For now just disable
- // indvar subst on anything more complex than an affine addrec, unless
- // it can be expanded to a trivial value.
- if (!isSafe(AR, L, SE))
- continue;
-
- // Determine the insertion point for this user. By default, insert
- // immediately before the user. The SCEVExpander class will automatically
- // hoist loop invariants out of the loop. For PHI nodes, there may be
- // multiple uses, so compute the nearest common dominator for the
- // incoming blocks.
- Instruction *InsertPt = getInsertPointForUses(User, Op, DT);
-
- // Now expand it into actual Instructions and patch it into place.
- Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
-
- DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
- << " into = " << *NewVal << "\n");
-
- if (!isValidRewrite(Op, NewVal)) {
- DeadInsts.push_back(NewVal);
- continue;
- }
- // Inform ScalarEvolution that this value is changing. The change doesn't
- // affect its value, but it does potentially affect which use lists the
- // value will be on after the replacement, which affects ScalarEvolution's
- // ability to walk use lists and drop dangling pointers when a value is
- // deleted.
- SE->forgetValue(User);
-
- // Patch the new value into place.
- if (Op->hasName())
- NewVal->takeName(Op);
- if (Instruction *NewValI = dyn_cast<Instruction>(NewVal))
- NewValI->setDebugLoc(User->getDebugLoc());
- User->replaceUsesOfWith(Op, NewVal);
- UI->setOperandValToReplace(NewVal);
-
- ++NumRemoved;
- Changed = true;
-
- // The old value may be dead now.
- DeadInsts.push_back(Op);
- }
-}
-
-//===----------------------------------------------------------------------===//
// IV Widening - Extend the width of an IV to cover its widest uses.
//===----------------------------------------------------------------------===//
@@ -1262,9 +1122,6 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
}
}
- if (EnableIVRewrite)
- return false;
-
// Recurse past add expressions, which commonly occur in the
// BackedgeTakenCount. They may already exist in program code, and if not,
// they are not too expensive rematerialize.
@@ -1321,36 +1178,6 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
return true;
}
-/// getBackedgeIVType - Get the widest type used by the loop test after peeking
-/// through Truncs.
-///
-/// TODO: Unnecessary when ForceLFTR is removed.
-static Type *getBackedgeIVType(Loop *L) {
- if (!L->getExitingBlock())
- return 0;
-
- // Can't rewrite non-branch yet.
- BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
- if (!BI)
- return 0;
-
- ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
- if (!Cond)
- return 0;
-
- Type *Ty = 0;
- for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
- OI != OE; ++OI) {
- assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
- TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
- if (!Trunc)
- continue;
-
- return Trunc->getSrcTy();
- }
- return Ty;
-}
-
/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
/// invariant value to the phi.
static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
@@ -1619,8 +1446,7 @@ LinearFunctionTestReplace(Loop *L,
// LFTR can ignore IV overflow and truncate to the width of
// BECount. This avoids materializing the add(zext(add)) expression.
- Type *CntTy = !EnableIVRewrite ?
- BackedgeTakenCount->getType() : IndVar->getType();
+ Type *CntTy = BackedgeTakenCount->getType();
const SCEV *IVCount = BackedgeTakenCount;
@@ -1805,8 +1631,6 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
if (!L->isLoopSimplifyForm())
return false;
- if (EnableIVRewrite)
- IU = &getAnalysis<IVUsers>();
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTree>();
@@ -1833,10 +1657,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// attempt to avoid evaluating SCEVs for sign/zero extend operations until
// other expressions involving loop IVs have been evaluated. This helps SCEV
// set no-wrap flags before normalizing sign/zero extension.
- if (!EnableIVRewrite) {
- Rewriter.disableCanonicalMode();
- SimplifyAndExtend(L, Rewriter, LPM);
- }
+ Rewriter.disableCanonicalMode();
+ SimplifyAndExtend(L, Rewriter, LPM);
// Check to see if this loop has a computable loop-invariant execution count.
// If so, this means that we can compute the final value of any expressions
@@ -1847,106 +1669,28 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
RewriteLoopExitValues(L, Rewriter);
- // Eliminate redundant IV users.
- if (EnableIVRewrite)
- Changed |= simplifyIVUsers(IU, SE, &LPM, DeadInsts);
-
// Eliminate redundant IV cycles.
- if (!EnableIVRewrite)
- NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
-
- // Compute the type of the largest recurrence expression, and decide whether
- // a canonical induction variable should be inserted.
- Type *LargestType = 0;
- bool NeedCannIV = false;
- bool ExpandBECount = canExpandBackedgeTakenCount(L, SE);
- if (EnableIVRewrite && ExpandBECount) {
- // If we have a known trip count and a single exit block, we'll be
- // rewriting the loop exit test condition below, which requires a
- // canonical induction variable.
- NeedCannIV = true;
- Type *Ty = BackedgeTakenCount->getType();
- if (!EnableIVRewrite) {
- // In this mode, SimplifyIVUsers may have already widened the IV used by
- // the backedge test and inserted a Trunc on the compare's operand. Get
- // the wider type to avoid creating a redundant narrow IV only used by the
- // loop test.
- LargestType = getBackedgeIVType(L);
- }
- if (!LargestType ||
- SE->getTypeSizeInBits(Ty) >
- SE->getTypeSizeInBits(LargestType))
- LargestType = SE->getEffectiveSCEVType(Ty);
- }
- if (EnableIVRewrite) {
- for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
- NeedCannIV = true;
- Type *Ty =
- SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType());
- if (!LargestType ||
- SE->getTypeSizeInBits(Ty) >
- SE->getTypeSizeInBits(LargestType))
- LargestType = Ty;
- }
- }
-
- // Now that we know the largest of the induction variable expressions
- // in this loop, insert a canonical induction variable of the largest size.
- PHINode *IndVar = 0;
- if (NeedCannIV) {
- // Check to see if the loop already has any canonical-looking induction
- // variables. If any are present and wider than the planned canonical
- // induction variable, temporarily remove them, so that the Rewriter
- // doesn't attempt to reuse them.
- SmallVector<PHINode *, 2> OldCannIVs;
- while (PHINode *OldCannIV = L->getCanonicalInductionVariable()) {
- if (SE->getTypeSizeInBits(OldCannIV->getType()) >
- SE->getTypeSizeInBits(LargestType))
- OldCannIV->removeFromParent();
- else
- break;
- OldCannIVs.push_back(OldCannIV);
- }
-
- IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType);
-
- ++NumInserted;
- Changed = true;
- DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n');
+ NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
- // Now that the official induction variable is established, reinsert
- // any old canonical-looking variables after it so that the IR remains
- // consistent. They will be deleted as part of the dead-PHI deletion at
- // the end of the pass.
- while (!OldCannIVs.empty()) {
- PHINode *OldCannIV = OldCannIVs.pop_back_val();
- OldCannIV->insertBefore(L->getHeader()->getFirstInsertionPt());
- }
- }
- else if (!EnableIVRewrite && ExpandBECount && needsLFTR(L, DT)) {
- IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
- }
// If we have a trip count expression, rewrite the loop's exit condition
// using it. We can currently only handle loops with a single exit.
- Value *NewICmp = 0;
- if (ExpandBECount && IndVar) {
- // Check preconditions for proper SCEVExpander operation. SCEV does not
- // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
- // pass that uses the SCEVExpander must do it. This does not work well for
- // loop passes because SCEVExpander makes assumptions about all loops, while
- // LoopPassManager only forces the current loop to be simplified.
- //
- // FIXME: SCEV expansion has no way to bail out, so the caller must
- // explicitly check any assumptions made by SCEV. Brittle.
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
- if (!AR || AR->getLoop()->getLoopPreheader())
- NewICmp =
- LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter);
+ if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) {
+ PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
+ if (IndVar) {
+ // Check preconditions for proper SCEVExpander operation. SCEV does not
+ // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
+ // pass that uses the SCEVExpander must do it. This does not work well for
+ // loop passes because SCEVExpander makes assumptions about all loops, while
+ // LoopPassManager only forces the current loop to be simplified.
+ //
+ // FIXME: SCEV expansion has no way to bail out, so the caller must
+ // explicitly check any assumptions made by SCEV. Brittle.
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
+ if (!AR || AR->getLoop()->getLoopPreheader())
+ (void)LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
+ Rewriter);
+ }
}
- // Rewrite IV-derived expressions.
- if (EnableIVRewrite)
- RewriteIVExpressions(L, Rewriter);
-
// Clear the rewriter cache, because values that are in the rewriter's cache
// can be deleted in the loop below, causing the AssertingVH in the cache to
// trigger.
@@ -1965,16 +1709,6 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// loop may be sunk below the loop to reduce register pressure.
SinkUnusedInvariants(L);
- // For completeness, inform IVUsers of the IV use in the newly-created
- // loop exit test instruction.
- if (IU && NewICmp) {
- ICmpInst *NewICmpInst = dyn_cast<ICmpInst>(NewICmp);
- if (NewICmpInst) {
- SmallPtrSet<Loop*, 16> SimplifiedLoopNests;
- IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0)),
- SimplifiedLoopNests);
- }
- }
// Clean up dead instructions.
Changed |= DeleteDeadPHIs(L->getHeader());
// Check a post-condition.
@@ -1984,8 +1718,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Verify that LFTR, and any other change have not interfered with SCEV's
// ability to compute trip count.
#ifndef NDEBUG
- if (!EnableIVRewrite && VerifyIndvars &&
- !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
+ if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
SE->forgetLoop(L);
const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 82d918e..fe4700b 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -77,11 +77,11 @@
#include <algorithm>
using namespace llvm;
-static cl::opt<bool> EnableNested(
- "enable-lsr-nested", cl::Hidden, cl::desc("Enable LSR on nested loops"));
-
-static cl::opt<bool> EnableRetry(
- "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry"));
+/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for
+/// bail out. This threshold is far beyond the number of users that LSR can
+/// conceivably solve, so it should not affect generated code, but catches the
+/// worst cases before LSR burns too much compile time and stack space.
+static const unsigned MaxIVUsers = 200;
// Temporary flag to cleanup congruent phis after LSR phi expansion.
// It's currently disabled until we can determine whether it's truly useful or
@@ -710,8 +710,9 @@ static bool isHighCostExpansion(const SCEV *S,
Value *UVal = U->getValue();
for (Value::use_iterator UI = UVal->use_begin(), UE = UVal->use_end();
UI != UE; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
- if (User->getOpcode() == Instruction::Mul
+ // If U is a constant, it may be used by a ConstantExpr.
+ Instruction *User = dyn_cast<Instruction>(*UI);
+ if (User && User->getOpcode() == Instruction::Mul
&& SE.isSCEVable(User->getType())) {
return SE.getSCEV(User) == Mul;
}
@@ -824,36 +825,20 @@ void Cost::RateRegister(const SCEV *Reg,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT) {
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
- if (AR->getLoop() == L)
- AddRecCost += 1; /// TODO: This should be a function of the stride.
-
// If this is an addrec for another loop, don't second-guess its addrec phi
// nodes. LSR isn't currently smart enough to reason about more than one
- // loop at a time. LSR has either already run on inner loops, will not run
- // on other loops, and cannot be expected to change sibling loops. If the
- // AddRec exists, consider it's register free and leave it alone. Otherwise,
- // do not consider this formula at all.
- else if (!EnableNested || L->contains(AR->getLoop()) ||
- (!AR->getLoop()->contains(L) &&
- DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
+ // loop at a time. LSR has already run on inner loops, will not run on outer
+ // loops, and cannot be expected to change sibling loops.
+ if (AR->getLoop() != L) {
+ // If the AddRec exists, consider it's register free and leave it alone.
if (isExistingPhi(AR, SE))
return;
- // For !EnableNested, never rewrite IVs in other loops.
- if (!EnableNested) {
- Loose();
- return;
- }
- // If this isn't one of the addrecs that the loop already has, it
- // would require a costly new phi and add. TODO: This isn't
- // precisely modeled right now.
- ++NumBaseAdds;
- if (!Regs.count(AR->getStart())) {
- RateRegister(AR->getStart(), Regs, L, SE, DT);
- if (isLoser())
- return;
- }
+ // Otherwise, do not consider this formula at all.
+ Loose();
+ return;
}
+ AddRecCost += 1; /// TODO: This should be a function of the stride.
// Add the step value register, if it needs one.
// TODO: The non-affine case isn't precisely modeled here.
@@ -1303,10 +1288,19 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,
// If we have low-level target information, ask the target if it can fold an
// integer immediate on an icmp.
if (AM.BaseOffs != 0) {
- if (TLI) return TLI->isLegalICmpImmediate(-(uint64_t)AM.BaseOffs);
- return false;
+ if (!TLI)
+ return false;
+ // We have one of:
+ // ICmpZero BaseReg + Offset => ICmp BaseReg, -Offset
+ // ICmpZero -1*ScaleReg + Offset => ICmp ScaleReg, Offset
+ // Offs is the ICmp immediate.
+ int64_t Offs = AM.BaseOffs;
+ if (AM.Scale == 0)
+ Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN.
+ return TLI->isLegalICmpImmediate(Offs);
}
+ // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
return true;
case LSRUse::Basic:
@@ -2193,7 +2187,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
do {
const SCEV *S = Worklist.pop_back_val();
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
- if (EnableNested || AR->getLoop() == L)
+ if (AR->getLoop() == L)
Strides.insert(AR->getStepRecurrence(SE));
Worklist.push_back(AR->getStart());
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
@@ -2463,7 +2457,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
if (!isCompatibleIVType(PrevIV, NextIV))
continue;
- // A phi nodes terminates a chain.
+ // A phi node terminates a chain.
if (isa<PHINode>(UserInst)
&& isa<PHINode>(IVChainVec[ChainIdx].back().UserInst))
continue;
@@ -2519,13 +2513,14 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
for (Value::use_iterator UseIter = IVOper->use_begin(),
UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) {
Instruction *OtherUse = dyn_cast<Instruction>(*UseIter);
+ if (!OtherUse || OtherUse == UserInst)
+ continue;
if (SE.isSCEVable(OtherUse->getType())
&& !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
&& IU.isIVUserOrOperand(OtherUse)) {
continue;
}
- if (OtherUse && OtherUse != UserInst)
- NearUsers.insert(OtherUse);
+ NearUsers.insert(OtherUse);
}
// Since this user is part of the chain, it's no longer considered a use
@@ -3986,24 +3981,29 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
if (LU.Regs.count(*I))
ReqRegs.insert(*I);
- bool AnySatisfiedReqRegs = false;
SmallPtrSet<const SCEV *, 16> NewRegs;
Cost NewCost;
-retry:
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
// Ignore formulae which do not use any of the required registers.
+ bool SatisfiedReqReg = true;
for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
JE = ReqRegs.end(); J != JE; ++J) {
const SCEV *Reg = *J;
if ((!F.ScaledReg || F.ScaledReg != Reg) &&
std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
- F.BaseRegs.end())
- goto skip;
+ F.BaseRegs.end()) {
+ SatisfiedReqReg = false;
+ break;
+ }
+ }
+ if (!SatisfiedReqReg) {
+ // If none of the formulae satisfied the required registers, then we could
+ // clear ReqRegs and try again. Currently, we simply give up in this case.
+ continue;
}
- AnySatisfiedReqRegs = true;
// Evaluate the cost of the current formula. If it's already worse than
// the current best, prune the search at that point.
@@ -4030,18 +4030,6 @@ retry:
}
Workspace.pop_back();
}
- skip:;
- }
-
- if (!EnableRetry && !AnySatisfiedReqRegs)
- return;
-
- // If none of the formulae had all of the required registers, relax the
- // constraint so that we don't exclude all formulae.
- if (!AnySatisfiedReqRegs) {
- assert(!ReqRegs.empty() && "Solver failed even without required registers");
- ReqRegs.clear();
- goto retry;
}
}
@@ -4537,6 +4525,17 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
// If there's no interesting work to be done, bail early.
if (IU.empty()) return;
+ // If there's too much analysis to be done, bail early. We won't be able to
+ // model the problem anyway.
+ unsigned NumUsers = 0;
+ for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+ if (++NumUsers > MaxIVUsers) {
+ DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << *L
+ << "\n");
+ return;
+ }
+ }
+
#ifndef NDEBUG
// All dominating loops must have preheaders, or SCEVExpander may not be able
// to materialize an AddRecExpr whose Start is an outer AddRecExpr.
@@ -4566,7 +4565,7 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
if (IU.empty()) return;
// Skip nested loops until we can model them better with formulae.
- if (!EnableNested && !L->empty()) {
+ if (!L->empty()) {
DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
return;
}
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 22dbfe3..09a186f 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -197,13 +197,13 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
if (TripCount) {
// Reduce unroll count to be modulo of TripCount for partial unrolling
- Count = CurrentThreshold / LoopSize;
+ Count = Threshold / LoopSize;
while (Count != 0 && TripCount%Count != 0)
Count--;
}
else if (UnrollRuntime) {
// Reduce unroll count to be a lower power-of-two value
- while (Count != 0 && Size > CurrentThreshold) {
+ while (Count != 0 && Size > Threshold) {
Count >>= 1;
Size = LoopSize*Count;
}
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 053eb0c..00ecc74 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -64,63 +64,63 @@ STATISTIC(TotalInsts, "Total number of instructions analyzed");
static cl::opt<unsigned>
Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
cl::init(100), cl::Hidden);
-
+
namespace {
-
+
class LUAnalysisCache {
typedef DenseMap<const SwitchInst*, SmallPtrSet<const Value *, 8> >
UnswitchedValsMap;
-
+
typedef UnswitchedValsMap::iterator UnswitchedValsIt;
-
+
struct LoopProperties {
unsigned CanBeUnswitchedCount;
unsigned SizeEstimation;
UnswitchedValsMap UnswitchedVals;
};
-
- // Here we use std::map instead of DenseMap, since we need to keep valid
+
+ // Here we use std::map instead of DenseMap, since we need to keep valid
// LoopProperties pointer for current loop for better performance.
typedef std::map<const Loop*, LoopProperties> LoopPropsMap;
typedef LoopPropsMap::iterator LoopPropsMapIt;
-
+
LoopPropsMap LoopsProperties;
UnswitchedValsMap* CurLoopInstructions;
LoopProperties* CurrentLoopProperties;
-
+
// Max size of code we can produce on remained iterations.
unsigned MaxSize;
-
+
public:
-
+
LUAnalysisCache() :
CurLoopInstructions(NULL), CurrentLoopProperties(NULL),
MaxSize(Threshold)
{}
-
+
// Analyze loop. Check its size, calculate is it possible to unswitch
// it. Returns true if we can unswitch this loop.
bool countLoop(const Loop* L);
-
+
// Clean all data related to given loop.
void forgetLoop(const Loop* L);
-
+
// Mark case value as unswitched.
// Since SI instruction can be partly unswitched, in order to avoid
// extra unswitching in cloned loops keep track all unswitched values.
void setUnswitched(const SwitchInst* SI, const Value* V);
-
+
// Check was this case value unswitched before or not.
bool isUnswitched(const SwitchInst* SI, const Value* V);
-
+
// Clone all loop-unswitch related loop properties.
// Redistribute unswitching quotas.
// Note, that new loop data is stored inside the VMap.
void cloneData(const Loop* NewLoop, const Loop* OldLoop,
const ValueToValueMapTy& VMap);
};
-
+
class LoopUnswitch : public LoopPass {
LoopInfo *LI; // Loop information
LPPassManager *LPM;
@@ -130,7 +130,7 @@ namespace {
std::vector<Loop*> LoopProcessWorklist;
LUAnalysisCache BranchesInfo;
-
+
bool OptimizeForSize;
bool redoLoop;
@@ -138,9 +138,9 @@ namespace {
DominatorTree *DT;
BasicBlock *loopHeader;
BasicBlock *loopPreheader;
-
+
// LoopBlocks contains all of the basic blocks of the loop, including the
- // preheader of the loop, the body of the loop, and the exit blocks of the
+ // preheader of the loop, the body of the loop, and the exit blocks of the
// loop, in that order.
std::vector<BasicBlock*> LoopBlocks;
// NewBlocks contained cloned copy of basic blocks from LoopBlocks.
@@ -148,8 +148,8 @@ namespace {
public:
static char ID; // Pass ID, replacement for typeid
- explicit LoopUnswitch(bool Os = false) :
- LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
+ explicit LoopUnswitch(bool Os = false) :
+ LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
currentLoop(NULL), DT(NULL), loopHeader(NULL),
loopPreheader(NULL) {
initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
@@ -186,7 +186,7 @@ namespace {
if (I != LoopProcessWorklist.end())
LoopProcessWorklist.erase(I);
}
-
+
void initLoopData() {
loopHeader = currentLoop->getHeader();
loopPreheader = currentLoop->getLoopPreheader();
@@ -205,7 +205,7 @@ namespace {
Constant *Val, bool isEqual);
void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
- BasicBlock *TrueDest,
+ BasicBlock *TrueDest,
BasicBlock *FalseDest,
Instruction *InsertPt);
@@ -222,12 +222,12 @@ namespace {
// Analyze loop. Check its size, calculate is it possible to unswitch
// it. Returns true if we can unswitch this loop.
bool LUAnalysisCache::countLoop(const Loop* L) {
-
+
std::pair<LoopPropsMapIt, bool> InsertRes =
LoopsProperties.insert(std::make_pair(L, LoopProperties()));
-
+
LoopProperties& Props = InsertRes.first->second;
-
+
if (InsertRes.second) {
// New loop.
@@ -235,39 +235,39 @@ bool LUAnalysisCache::countLoop(const Loop* L) {
// expansion, and the number of basic blocks, to avoid loops with
// large numbers of branches which cause loop unswitching to go crazy.
// This is a very ad-hoc heuristic.
-
+
// FIXME: This is overly conservative because it does not take into
// consideration code simplification opportunities and code that can
// be shared by the resultant unswitched loops.
CodeMetrics Metrics;
- for (Loop::block_iterator I = L->block_begin(),
+ for (Loop::block_iterator I = L->block_begin(),
E = L->block_end();
I != E; ++I)
- Metrics.analyzeBasicBlock(*I);
+ Metrics.analyzeBasicBlock(*I);
Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5);
Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount;
- }
-
+ }
+
if (!Props.CanBeUnswitchedCount) {
DEBUG(dbgs() << "NOT unswitching loop %"
<< L->getHeader()->getName() << ", cost too high: "
<< L->getBlocks().size() << "\n");
-
+
return false;
}
-
+
// Be careful. This links are good only before new loop addition.
CurrentLoopProperties = &Props;
CurLoopInstructions = &Props.UnswitchedVals;
-
+
return true;
}
// Clean all data related to given loop.
void LUAnalysisCache::forgetLoop(const Loop* L) {
-
+
LoopPropsMapIt LIt = LoopsProperties.find(L);
if (LIt != LoopsProperties.end()) {
@@ -275,9 +275,9 @@ void LUAnalysisCache::forgetLoop(const Loop* L) {
MaxSize += Props.CanBeUnswitchedCount * Props.SizeEstimation;
LoopsProperties.erase(LIt);
}
-
+
CurrentLoopProperties = NULL;
- CurLoopInstructions = NULL;
+ CurLoopInstructions = NULL;
}
// Mark case value as unswitched.
@@ -289,7 +289,7 @@ void LUAnalysisCache::setUnswitched(const SwitchInst* SI, const Value* V) {
// Check was this case value unswitched before or not.
bool LUAnalysisCache::isUnswitched(const SwitchInst* SI, const Value* V) {
- return (*CurLoopInstructions)[SI].count(V);
+ return (*CurLoopInstructions)[SI].count(V);
}
// Clone all loop-unswitch related loop properties.
@@ -297,20 +297,20 @@ bool LUAnalysisCache::isUnswitched(const SwitchInst* SI, const Value* V) {
// Note, that new loop data is stored inside the VMap.
void LUAnalysisCache::cloneData(const Loop* NewLoop, const Loop* OldLoop,
const ValueToValueMapTy& VMap) {
-
+
LoopProperties& NewLoopProps = LoopsProperties[NewLoop];
LoopProperties& OldLoopProps = *CurrentLoopProperties;
UnswitchedValsMap& Insts = OldLoopProps.UnswitchedVals;
-
+
// Reallocate "can-be-unswitched quota"
--OldLoopProps.CanBeUnswitchedCount;
unsigned Quota = OldLoopProps.CanBeUnswitchedCount;
NewLoopProps.CanBeUnswitchedCount = Quota / 2;
OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2;
-
+
NewLoopProps.SizeEstimation = OldLoopProps.SizeEstimation;
-
+
// Clone unswitched values info:
// for new loop switches we clone info about values that was
// already unswitched and has redundant successors.
@@ -319,7 +319,7 @@ void LUAnalysisCache::cloneData(const Loop* NewLoop, const Loop* OldLoop,
Value* NewI = VMap.lookup(OldInst);
const SwitchInst* NewInst = cast_or_null<SwitchInst>(NewI);
assert(NewInst && "All instructions that are in SrcBB must be in VMap.");
-
+
NewLoopProps.UnswitchedVals[NewInst] = OldLoopProps.UnswitchedVals[OldInst];
}
}
@@ -333,18 +333,18 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
false, false)
-Pass *llvm::createLoopUnswitchPass(bool Os) {
- return new LoopUnswitch(Os);
+Pass *llvm::createLoopUnswitchPass(bool Os) {
+ return new LoopUnswitch(Os);
}
/// FindLIVLoopCondition - Cond is a condition that occurs in L. If it is
/// invariant in the loop, or has an invariant piece, return the invariant.
/// Otherwise, return null.
static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
-
+
// We started analyze new instruction, increment scanned instructions counter.
++TotalInsts;
-
+
// We can never unswitch on vector conditions.
if (Cond->getType()->isVectorTy())
return 0;
@@ -369,7 +369,7 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed))
return RHS;
}
-
+
return 0;
}
@@ -394,19 +394,36 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
return Changed;
}
-/// processCurrentLoop - Do actual work and unswitch loop if possible
+/// processCurrentLoop - Do actual work and unswitch loop if possible
/// and profitable.
bool LoopUnswitch::processCurrentLoop() {
bool Changed = false;
initLoopData();
-
+
// If LoopSimplify was unable to form a preheader, don't do any unswitching.
if (!loopPreheader)
return false;
-
+
+ // Loops with indirectbr cannot be cloned.
+ if (!currentLoop->isSafeToClone())
+ return false;
+
+ // Loops with invokes, whose unwind edge escapes the loop, cannot be
+ // unswitched because splitting their edges are non-trivial and don't preserve
+ // loop simplify information.
+ for (Loop::block_iterator I = currentLoop->block_begin(),
+ E = currentLoop->block_end(); I != E; ++I)
+ if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator()))
+ if (!currentLoop->contains(II->getUnwindDest()))
+ return false;
+
+ // Without dedicated exits, splitting the exit edge may fail.
+ if (!currentLoop->hasDedicatedExits())
+ return false;
+
LLVMContext &Context = loopHeader->getContext();
-
+
// Probably we reach the quota of branches for this loop. If so
// stop unswitching.
if (!BranchesInfo.countLoop(currentLoop))
@@ -415,7 +432,7 @@ bool LoopUnswitch::processCurrentLoop() {
// Loop over all of the basic blocks in the loop. If we find an interior
// block that is branching on a loop-invariant condition, we can unswitch this
// loop.
- for (Loop::block_iterator I = currentLoop->block_begin(),
+ for (Loop::block_iterator I = currentLoop->block_begin(),
E = currentLoop->block_end(); I != E; ++I) {
TerminatorInst *TI = (*I)->getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
@@ -424,24 +441,24 @@ bool LoopUnswitch::processCurrentLoop() {
if (BI->isConditional()) {
// See if this, or some part of it, is loop invariant. If so, we can
// unswitch on it if we desire.
- Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
+ Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
currentLoop, Changed);
- if (LoopCond && UnswitchIfProfitable(LoopCond,
+ if (LoopCond && UnswitchIfProfitable(LoopCond,
ConstantInt::getTrue(Context))) {
++NumBranches;
return true;
}
- }
+ }
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
- Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
+ Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
currentLoop, Changed);
- unsigned NumCases = SI->getNumCases();
+ unsigned NumCases = SI->getNumCases();
if (LoopCond && NumCases) {
// Find a value to unswitch on:
// FIXME: this should chose the most expensive case!
// FIXME: scan for a case with a non-critical edge?
Constant *UnswitchVal = NULL;
-
+
// Do not process same value again and again.
// At this point we have some cases already unswitched and
// some not yet unswitched. Let's find the first not yet unswitched one.
@@ -453,7 +470,7 @@ bool LoopUnswitch::processCurrentLoop() {
break;
}
}
-
+
if (!UnswitchVal)
continue;
@@ -463,14 +480,14 @@ bool LoopUnswitch::processCurrentLoop() {
}
}
}
-
+
// Scan the instructions to check for unswitchable values.
- for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end();
+ for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end();
BBI != E; ++BBI)
if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) {
- Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
+ Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
currentLoop, Changed);
- if (LoopCond && UnswitchIfProfitable(LoopCond,
+ if (LoopCond && UnswitchIfProfitable(LoopCond,
ConstantInt::getTrue(Context))) {
++NumSelects;
return true;
@@ -500,7 +517,7 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
ExitBB = BB;
return true;
}
-
+
// Otherwise, this is an unvisited intra-loop node. Check all successors.
for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) {
// Check to see if the successor is a trivial loop exit.
@@ -513,12 +530,12 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
if (I->mayHaveSideEffects())
return false;
-
+
return true;
}
/// isTrivialLoopExitBlock - Return true if the specified block unconditionally
-/// leads to an exit from the specified loop, and has no side-effects in the
+/// leads to an exit from the specified loop, and has no side-effects in the
/// process. If so, return the block that is exited to, otherwise return null.
static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
std::set<BasicBlock*> Visited;
@@ -546,39 +563,39 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
BasicBlock *Header = currentLoop->getHeader();
TerminatorInst *HeaderTerm = Header->getTerminator();
LLVMContext &Context = Header->getContext();
-
+
BasicBlock *LoopExitBB = 0;
if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
// If the header block doesn't end with a conditional branch on Cond, we
// can't handle it.
if (!BI->isConditional() || BI->getCondition() != Cond)
return false;
-
- // Check to see if a successor of the branch is guaranteed to
- // exit through a unique exit block without having any
+
+ // Check to see if a successor of the branch is guaranteed to
+ // exit through a unique exit block without having any
// side-effects. If so, determine the value of Cond that causes it to do
// this.
- if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
BI->getSuccessor(0)))) {
if (Val) *Val = ConstantInt::getTrue(Context);
- } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
BI->getSuccessor(1)))) {
if (Val) *Val = ConstantInt::getFalse(Context);
}
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) {
// If this isn't a switch on Cond, we can't handle it.
if (SI->getCondition() != Cond) return false;
-
+
// Check to see if a successor of the switch is guaranteed to go to the
- // latch block or exit through a one exit block without having any
+ // latch block or exit through a one exit block without having any
// side-effects. If so, determine the value of Cond that causes it to do
- // this.
+ // this.
// Note that we can't trivially unswitch on the default case or
// on already unswitched cases.
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
i != e; ++i) {
BasicBlock* LoopExitCandidate;
- if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
+ if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
i.getCaseSuccessor()))) {
// Okay, we found a trivial case, remember the value that is trivial.
ConstantInt* CaseVal = i.getCaseValue();
@@ -598,9 +615,9 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
// contains phi nodes, this isn't trivial.
if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
return false; // Can't handle this.
-
+
if (LoopExit) *LoopExit = LoopExitBB;
-
+
// We already know that nothing uses any scalar values defined inside of this
// loop. As such, we just have to check to see if this loop will execute any
// side-effecting instructions (e.g. stores, calls, volatile loads) in the
@@ -686,17 +703,17 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
/// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable
/// condition in it (a cond branch from its header block to its latch block,
-/// where the path through the loop that doesn't execute its body has no
+/// where the path through the loop that doesn't execute its body has no
/// side-effects), unswitch it. This doesn't involve any code duplication, just
/// moving the conditional branch outside of the loop and updating loop info.
-void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
- Constant *Val,
+void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
+ Constant *Val,
BasicBlock *ExitBlock) {
DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %"
<< loopHeader->getName() << " [" << L->getBlocks().size()
<< " blocks] in Function " << L->getHeader()->getParent()->getName()
<< " on cond: " << *Val << " == " << *Cond << "\n");
-
+
// First step, split the preheader, so that we know that there is a safe place
// to insert the conditional branch. We will change loopPreheader to have a
// conditional branch on Cond.
@@ -705,24 +722,24 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
// Now that we have a place to insert the conditional branch, create a place
// to branch to: this is the exit block out of the loop that we should
// short-circuit to.
-
+
// Split this block now, so that the loop maintains its exit block, and so
// that the jump from the preheader can execute the contents of the exit block
// without actually branching to it (the exit block should be dominated by the
// loop header, not the preheader).
assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), this);
-
- // Okay, now we have a position to branch from and a position to branch to,
+
+ // Okay, now we have a position to branch from and a position to branch to,
// insert the new conditional branch.
- EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH,
+ EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH,
loopPreheader->getTerminator());
LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L);
loopPreheader->getTerminator()->eraseFromParent();
// We need to reprocess this loop, it could be unswitched again.
redoLoop = true;
-
+
// Now that we know that the loop is never entered when this condition is a
// particular value, rewrite the loop with this info. We know that this will
// at least eliminate the old branch.
@@ -732,7 +749,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
/// SplitExitEdges - Split all of the edges from inside the loop to their exit
/// blocks. Update the appropriate Phi nodes as we do so.
-void LoopUnswitch::SplitExitEdges(Loop *L,
+void LoopUnswitch::SplitExitEdges(Loop *L,
const SmallVector<BasicBlock *, 8> &ExitBlocks){
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
@@ -752,10 +769,10 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
}
}
-/// UnswitchNontrivialCondition - We determined that the loop is profitable
-/// to unswitch when LIC equal Val. Split it into loop versions and test the
+/// UnswitchNontrivialCondition - We determined that the loop is profitable
+/// to unswitch when LIC equal Val. Split it into loop versions and test the
/// condition outside of either loop. Return the loops created as Out1/Out2.
-void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
+void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
Loop *L) {
Function *F = loopHeader->getParent();
DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
@@ -798,7 +815,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
ValueToValueMapTy VMap;
for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
-
+
NewBlocks.push_back(NewBB);
VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping.
LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
@@ -828,7 +845,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// The new exit block should be in the same loop as the old one.
if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase());
-
+
assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
"Exit block should have been split to have one successor!");
BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
@@ -863,7 +880,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end(); I != E; ++I)
RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
-
+
// Rewrite the original preheader to select between versions of the loop.
BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
@@ -882,7 +899,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// the condition that we're unswitching on), we don't rewrite the second
// iteration.
WeakVH LICHandle(LIC);
-
+
// Now we rewrite the original code to know that the condition is true and the
// new code to know that the condition is false.
RewriteLoopBodyWithConditionConstant(L, LIC, Val, false);
@@ -897,7 +914,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
/// RemoveFromWorklist - Remove all instances of I from the worklist vector
/// specified.
-static void RemoveFromWorklist(Instruction *I,
+static void RemoveFromWorklist(Instruction *I,
std::vector<Instruction*> &Worklist) {
std::vector<Instruction*>::iterator WI = std::find(Worklist.begin(),
Worklist.end(), I);
@@ -910,7 +927,7 @@ static void RemoveFromWorklist(Instruction *I,
/// ReplaceUsesOfWith - When we find that I really equals V, remove I from the
/// program, replacing all uses with V and update the worklist.
-static void ReplaceUsesOfWith(Instruction *I, Value *V,
+static void ReplaceUsesOfWith(Instruction *I, Value *V,
std::vector<Instruction*> &Worklist,
Loop *L, LPPassManager *LPM) {
DEBUG(dbgs() << "Replace with '" << *V << "': " << *I);
@@ -943,10 +960,10 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
if (BasicBlock *Pred = BB->getSinglePredecessor()) {
// If it has one pred, fold phi nodes in BB.
while (isa<PHINode>(BB->begin()))
- ReplaceUsesOfWith(BB->begin(),
- cast<PHINode>(BB->begin())->getIncomingValue(0),
+ ReplaceUsesOfWith(BB->begin(),
+ cast<PHINode>(BB->begin())->getIncomingValue(0),
Worklist, L, LPM);
-
+
// If this is the header of a loop and the only pred is the latch, we now
// have an unreachable loop.
if (Loop *L = LI->getLoopFor(BB))
@@ -957,15 +974,15 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L);
Pred->getTerminator()->eraseFromParent();
new UnreachableInst(BB->getContext(), Pred);
-
+
// The loop is now broken, remove it from LI.
RemoveLoopFromHierarchy(L);
-
+
// Reprocess the header, which now IS dead.
RemoveBlockIfDead(BB, Worklist, L);
return;
}
-
+
// If pred ends in a uncond branch, add uncond branch to worklist so that
// the two blocks will get merged.
if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
@@ -976,11 +993,11 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
}
DEBUG(dbgs() << "Nuking dead block: " << *BB);
-
+
// Remove the instructions in the basic block from the worklist.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
RemoveFromWorklist(I, Worklist);
-
+
// Anything that uses the instructions in this basic block should have their
// uses replaced with undefs.
// If I is not void type then replaceAllUsesWith undef.
@@ -988,7 +1005,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
if (!I->getType()->isVoidTy())
I->replaceAllUsesWith(UndefValue::get(I->getType()));
}
-
+
// If this is the edge to the header block for a loop, remove the loop and
// promote all subloops.
if (Loop *BBLoop = LI->getLoopFor(BB)) {
@@ -1004,8 +1021,8 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
// Remove the block from the loop info, which removes it from any loops it
// was in.
LI->removeBlock(BB);
-
-
+
+
// Remove phi node entries in successors for this block.
TerminatorInst *TI = BB->getTerminator();
SmallVector<BasicBlock*, 4> Succs;
@@ -1013,13 +1030,13 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
Succs.push_back(TI->getSuccessor(i));
TI->getSuccessor(i)->removePredecessor(BB);
}
-
+
// Unique the successors, remove anything with multiple uses.
array_pod_sort(Succs.begin(), Succs.end());
Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end());
-
+
// Remove the basic block, including all of the instructions contained in it.
- LPM->deleteSimpleAnalysisValue(BB, L);
+ LPM->deleteSimpleAnalysisValue(BB, L);
BB->eraseFromParent();
// Remove successor blocks here that are not dead, so that we know we only
// have dead blocks in this list. Nondead blocks have a way of becoming dead,
@@ -1037,7 +1054,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
--i;
}
}
-
+
for (unsigned i = 0, e = Succs.size(); i != e; ++i)
RemoveBlockIfDead(Succs[i], Worklist, L);
}
@@ -1060,14 +1077,14 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
Constant *Val,
bool IsEqual) {
assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
-
+
// FIXME: Support correlated properties, like:
// for (...)
// if (li1 < li2)
// ...
// if (li1 > li2)
// ...
-
+
// FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches,
// selects, switches.
std::vector<Instruction*> Worklist;
@@ -1082,9 +1099,9 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
if (IsEqual)
Replacement = Val;
else
- Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()),
+ Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()),
!cast<ConstantInt>(Val)->getZExtValue());
-
+
for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end();
UI != E; ++UI) {
Instruction *U = dyn_cast<Instruction>(*UI);
@@ -1092,15 +1109,15 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
continue;
Worklist.push_back(U);
}
-
+
for (std::vector<Instruction*>::iterator UI = Worklist.begin();
UI != Worklist.end(); ++UI)
- (*UI)->replaceUsesOfWith(LIC, Replacement);
-
+ (*UI)->replaceUsesOfWith(LIC, Replacement);
+
SimplifyCode(Worklist, L);
return;
}
-
+
// Otherwise, we don't know the precise value of LIC, but we do know that it
// is certainly NOT "Val". As such, simplify any uses in the loop that we
// can. This case occurs when we unswitch switch statements.
@@ -1112,27 +1129,27 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
Worklist.push_back(U);
- // TODO: We could do other simplifications, for example, turning
+ // TODO: We could do other simplifications, for example, turning
// 'icmp eq LIC, Val' -> false.
// If we know that LIC is not Val, use this info to simplify code.
SwitchInst *SI = dyn_cast<SwitchInst>(U);
if (SI == 0 || !isa<ConstantInt>(Val)) continue;
-
+
SwitchInst::CaseIt DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
// Default case is live for multiple values.
if (DeadCase == SI->case_default()) continue;
-
- // Found a dead case value. Don't remove PHI nodes in the
+
+ // Found a dead case value. Don't remove PHI nodes in the
// successor if they become single-entry, those PHI nodes may
// be in the Users list.
BasicBlock *Switch = SI->getParent();
BasicBlock *SISucc = DeadCase.getCaseSuccessor();
BasicBlock *Latch = L->getLoopLatch();
-
+
BranchesInfo.setUnswitched(SI, Val);
-
+
if (!SI->findCaseDest(SISucc)) continue; // Edge is critical.
// If the DeadCase successor dominates the loop latch, then the
// transformation isn't safe since it will delete the sole predecessor edge
@@ -1172,7 +1189,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
if (DT)
DT->addNewBlock(Abort, NewSISucc);
}
-
+
SimplifyCode(Worklist, L);
}
@@ -1193,7 +1210,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
// Simple DCE.
if (isInstructionTriviallyDead(I)) {
DEBUG(dbgs() << "Remove dead instruction '" << *I);
-
+
// Add uses to the worklist, which may be dead now.
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
@@ -1225,24 +1242,24 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
if (!SinglePred) continue; // Nothing to do.
assert(SinglePred == Pred && "CFG broken");
- DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- "
+ DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- "
<< Succ->getName() << "\n");
-
+
// Resolve any single entry PHI nodes in Succ.
while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
-
+
// If Succ has any successors with PHI nodes, update them to have
// entries coming from Pred instead of Succ.
Succ->replaceAllUsesWith(Pred);
-
+
// Move all of the successor contents from Succ to Pred.
Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
Succ->end());
LPM->deleteSimpleAnalysisValue(BI, L);
BI->eraseFromParent();
RemoveFromWorklist(BI, Worklist);
-
+
// Remove Succ from the loop tree.
LI->removeBlock(Succ);
LPM->deleteSimpleAnalysisValue(Succ, L);
@@ -1250,7 +1267,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
++NumSimplify;
continue;
}
-
+
if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
// Conditional branch. Turn it into an unconditional branch, then
// remove dead blocks.
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index 9fdea8d..29234da 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -162,6 +162,7 @@ namespace {
IC_MoveWeak, ///< objc_moveWeak (derived)
IC_CopyWeak, ///< objc_copyWeak (derived)
IC_DestroyWeak, ///< objc_destroyWeak (derived)
+ IC_StoreStrong, ///< objc_storeStrong (derived)
IC_CallOrUser, ///< could call objc_release and/or "use" pointers
IC_Call, ///< could call objc_release
IC_User, ///< could "use" a pointer
@@ -262,6 +263,7 @@ static InstructionClass GetFunctionClass(const Function *F) {
return StringSwitch<InstructionClass>(F->getName())
.Case("objc_storeWeak", IC_StoreWeak)
.Case("objc_initWeak", IC_InitWeak)
+ .Case("objc_storeStrong", IC_StoreStrong)
.Default(IC_CallOrUser);
// Second argument is i8**.
if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
@@ -618,22 +620,35 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) {
const User *UUser = *UI;
// Special - Use by a call (callee or argument) is not considered
// to be an escape.
- if (isa<CallInst>(UUser) || isa<InvokeInst>(UUser))
- continue;
- // Use by an instruction which copies the value is an escape if the
- // result is an escape.
- if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) ||
- isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
- Worklist.push_back(UUser);
+ switch (GetBasicInstructionClass(UUser)) {
+ case IC_StoreWeak:
+ case IC_InitWeak:
+ case IC_StoreStrong:
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ // These special functions make copies of their pointer arguments.
+ return true;
+ case IC_User:
+ case IC_None:
+ // Use by an instruction which copies the value is an escape if the
+ // result is an escape.
+ if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) ||
+ isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
+ Worklist.push_back(UUser);
+ continue;
+ }
+ // Use by a load is not an escape.
+ if (isa<LoadInst>(UUser))
+ continue;
+ // Use by a store is not an escape if the use is the address.
+ if (const StoreInst *SI = dyn_cast<StoreInst>(UUser))
+ if (V != SI->getValueOperand())
+ continue;
+ break;
+ default:
+ // Regular calls and other stuff are not considered escapes.
continue;
}
- // Use by a load is not an escape.
- if (isa<LoadInst>(UUser))
- continue;
- // Use by a store is not an escape if the use is the address.
- if (const StoreInst *SI = dyn_cast<StoreInst>(UUser))
- if (V != SI->getValueOperand())
- continue;
// Otherwise, conservatively assume an escape.
return true;
}
@@ -883,7 +898,7 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
// These calls return their argument verbatim, as a low-level
// optimization. However, this makes high-level optimizations
// harder. Undo any uses of this optimization that the front-end
- // emitted here. We'll redo them in a later pass.
+ // emitted here. We'll redo them in the contract pass.
Changed = true;
Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0));
break;
@@ -997,7 +1012,11 @@ bool ObjCARCAPElim::runOnModule(Module &M) {
return false;
// Find the llvm.global_ctors variable, as the first step in
- // identifying the global constructors.
+ // identifying the global constructors. In theory, unnecessary autorelease
+ // pools could occur anywhere, but in practice it's pretty rare. Global
+ // ctors are a place where autorelease pools get inserted automatically,
+ // so it's pretty common for them to be unnecessary, and it's pretty
+ // profitable to eliminate them.
GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
if (!GV)
return false;
@@ -1014,7 +1033,11 @@ bool ObjCARCAPElim::runOnModule(Module &M) {
Value *Op = *OI;
// llvm.global_ctors is an array of pairs where the second members
// are constructor functions.
- Function *F = cast<Function>(cast<ConstantStruct>(Op)->getOperand(1));
+ Function *F = dyn_cast<Function>(cast<ConstantStruct>(Op)->getOperand(1));
+ // If the user used a constructor function with the wrong signature and
+ // it got bitcasted or whatever, look the other way.
+ if (!F)
+ continue;
// Only look at function definitions.
if (F->isDeclaration())
continue;
@@ -1678,9 +1701,16 @@ namespace {
void CheckForCFGHazards(const BasicBlock *BB,
DenseMap<const BasicBlock *, BBState> &BBStates,
BBState &MyStates) const;
+ bool VisitInstructionBottomUp(Instruction *Inst,
+ BasicBlock *BB,
+ MapVector<Value *, RRInfo> &Retains,
+ BBState &MyStates);
bool VisitBottomUp(BasicBlock *BB,
DenseMap<const BasicBlock *, BBState> &BBStates,
MapVector<Value *, RRInfo> &Retains);
+ bool VisitInstructionTopDown(Instruction *Inst,
+ DenseMap<Value *, RRInfo> &Releases,
+ BBState &MyStates);
bool VisitTopDown(BasicBlock *BB,
DenseMap<const BasicBlock *, BBState> &BBStates,
DenseMap<Value *, RRInfo> &Releases);
@@ -1956,6 +1986,7 @@ namespace {
/// use here.
enum DependenceKind {
NeedsPositiveRetainCount,
+ AutoreleasePoolBoundary,
CanChangeRetainCount,
RetainAutoreleaseDep, ///< Blocks objc_retainAutorelease.
RetainAutoreleaseRVDep, ///< Blocks objc_retainAutoreleaseReturnValue.
@@ -1985,6 +2016,19 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
}
}
+ case AutoreleasePoolBoundary: {
+ InstructionClass Class = GetInstructionClass(Inst);
+ switch (Class) {
+ case IC_AutoreleasepoolPop:
+ case IC_AutoreleasepoolPush:
+ // These mark the end and begin of an autorelease pool scope.
+ return true;
+ default:
+ // Nothing else does this.
+ return false;
+ }
+ }
+
case CanChangeRetainCount: {
InstructionClass Class = GetInstructionClass(Inst);
switch (Class) {
@@ -2002,6 +2046,7 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
case RetainAutoreleaseDep:
switch (GetBasicInstructionClass(Inst)) {
case IC_AutoreleasepoolPop:
+ case IC_AutoreleasepoolPush:
// Don't merge an objc_autorelease with an objc_retain inside a different
// autoreleasepool scope.
return true;
@@ -2136,17 +2181,26 @@ ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
/// return true.
bool
ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
- // Check for the argument being from an immediately preceding call.
+ // Check for the argument being from an immediately preceding call or invoke.
Value *Arg = GetObjCArg(RetainRV);
CallSite CS(Arg);
- if (Instruction *Call = CS.getInstruction())
+ if (Instruction *Call = CS.getInstruction()) {
if (Call->getParent() == RetainRV->getParent()) {
BasicBlock::iterator I = Call;
++I;
while (isNoopInstruction(I)) ++I;
if (&*I == RetainRV)
return false;
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ BasicBlock *RetainRVParent = RetainRV->getParent();
+ if (II->getNormalDest() == RetainRVParent) {
+ BasicBlock::iterator I = RetainRVParent->begin();
+ while (isNoopInstruction(I)) ++I;
+ if (&*I == RetainRV)
+ return false;
+ }
}
+ }
// Check for being preceded by an objc_autoreleaseReturnValue on the same
// pointer. In this case, we can delete the pair.
@@ -2232,6 +2286,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
case IC_DestroyWeak: {
CallInst *CI = cast<CallInst>(Inst);
if (isNullOrUndef(CI->getArgOperand(0))) {
+ Changed = true;
Type *Ty = CI->getArgOperand(0)->getType();
new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
Constant::getNullValue(Ty),
@@ -2247,6 +2302,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
CallInst *CI = cast<CallInst>(Inst);
if (isNullOrUndef(CI->getArgOperand(0)) ||
isNullOrUndef(CI->getArgOperand(1))) {
+ Changed = true;
Type *Ty = CI->getArgOperand(0)->getType();
new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
Constant::getNullValue(Ty),
@@ -2360,9 +2416,34 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// Check that there is nothing that cares about the reference
// count between the call and the phi.
- FindDependencies(NeedsPositiveRetainCount, Arg,
- Inst->getParent(), Inst,
- DependingInstructions, Visited, PA);
+ switch (Class) {
+ case IC_Retain:
+ case IC_RetainBlock:
+ // These can always be moved up.
+ break;
+ case IC_Release:
+ // These can't be moved across things that care about the retain count.
+ FindDependencies(NeedsPositiveRetainCount, Arg,
+ Inst->getParent(), Inst,
+ DependingInstructions, Visited, PA);
+ break;
+ case IC_Autorelease:
+ // These can't be moved across autorelease pool scope boundaries.
+ FindDependencies(AutoreleasePoolBoundary, Arg,
+ Inst->getParent(), Inst,
+ DependingInstructions, Visited, PA);
+ break;
+ case IC_RetainRV:
+ case IC_AutoreleaseRV:
+ // Don't move these; the RV optimization depends on the autoreleaseRV
+ // being tail called, and the retainRV being immediately after a call
+ // (which might still happen if we get lucky with codegen layout, but
+ // it's not worth taking the chance).
+ continue;
+ default:
+ llvm_unreachable("Invalid dependence flavor");
+ }
+
if (DependingInstructions.size() == 1 &&
*DependingInstructions.begin() == PN) {
Changed = true;
@@ -2516,6 +2597,164 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
}
bool
+ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
+ BasicBlock *BB,
+ MapVector<Value *, RRInfo> &Retains,
+ BBState &MyStates) {
+ bool NestingDetected = false;
+ InstructionClass Class = GetInstructionClass(Inst);
+ const Value *Arg = 0;
+
+ switch (Class) {
+ case IC_Release: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrBottomUpState(Arg);
+
+ // If we see two releases in a row on the same pointer. If so, make
+ // a note, and we'll cicle back to revisit it after we've
+ // hopefully eliminated the second release, which may allow us to
+ // eliminate the first release too.
+ // Theoretically we could implement removal of nested retain+release
+ // pairs by making PtrState hold a stack of states, but this is
+ // simple and avoids adding overhead for the non-nested case.
+ if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
+ NestingDetected = true;
+
+ S.RRI.clear();
+
+ MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+ S.SetSeq(ReleaseMetadata ? S_MovableRelease : S_Release);
+ S.RRI.ReleaseMetadata = ReleaseMetadata;
+ S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented();
+ S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+ S.RRI.Calls.insert(Inst);
+
+ S.IncrementRefCount();
+ S.IncrementNestCount();
+ break;
+ }
+ case IC_RetainBlock:
+ // An objc_retainBlock call with just a use may need to be kept,
+ // because it may be copying a block from the stack to the heap.
+ if (!IsRetainBlockOptimizable(Inst))
+ break;
+ // FALLTHROUGH
+ case IC_Retain:
+ case IC_RetainRV: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrBottomUpState(Arg);
+ S.DecrementRefCount();
+ S.SetAtLeastOneRefCount();
+ S.DecrementNestCount();
+
+ switch (S.GetSeq()) {
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Use:
+ S.RRI.ReverseInsertPts.clear();
+ // FALL THROUGH
+ case S_CanRelease:
+ // Don't do retain+release tracking for IC_RetainRV, because it's
+ // better to let it remain as the first instruction after a call.
+ if (Class != IC_RetainRV) {
+ S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+ Retains[Inst] = S.RRI;
+ }
+ S.ClearSequenceProgress();
+ break;
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ return NestingDetected;
+ }
+ case IC_AutoreleasepoolPop:
+ // Conservatively, clear MyStates for all known pointers.
+ MyStates.clearBottomUpPointers();
+ return NestingDetected;
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ // These are irrelevant.
+ return NestingDetected;
+ default:
+ break;
+ }
+
+ // Consider any other possible effects of this instruction on each
+ // pointer being tracked.
+ for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
+ ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
+ const Value *Ptr = MI->first;
+ if (Ptr == Arg)
+ continue; // Handled above.
+ PtrState &S = MI->second;
+ Sequence Seq = S.GetSeq();
+
+ // Check for possible releases.
+ if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+ S.DecrementRefCount();
+ switch (Seq) {
+ case S_Use:
+ S.SetSeq(S_CanRelease);
+ continue;
+ case S_CanRelease:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Stop:
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ }
+
+ // Check for possible direct uses.
+ switch (Seq) {
+ case S_Release:
+ case S_MovableRelease:
+ if (CanUse(Inst, Ptr, PA, Class)) {
+ assert(S.RRI.ReverseInsertPts.empty());
+ // If this is an invoke instruction, we're scanning it as part of
+ // one of its successor blocks, since we can't insert code after it
+ // in its own block, and we don't want to split critical edges.
+ if (isa<InvokeInst>(Inst))
+ S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
+ else
+ S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
+ S.SetSeq(S_Use);
+ } else if (Seq == S_Release &&
+ (Class == IC_User || Class == IC_CallOrUser)) {
+ // Non-movable releases depend on any possible objc pointer use.
+ S.SetSeq(S_Stop);
+ assert(S.RRI.ReverseInsertPts.empty());
+ // As above; handle invoke specially.
+ if (isa<InvokeInst>(Inst))
+ S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
+ else
+ S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
+ }
+ break;
+ case S_Stop:
+ if (CanUse(Inst, Ptr, PA, Class))
+ S.SetSeq(S_Use);
+ break;
+ case S_CanRelease:
+ case S_Use:
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ }
+
+ return NestingDetected;
+}
+
+bool
ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
DenseMap<const BasicBlock *, BBState> &BBStates,
MapVector<Value *, RRInfo> &Retains) {
@@ -2560,144 +2799,164 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
// Visit all the instructions, bottom-up.
for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
Instruction *Inst = llvm::prior(I);
- InstructionClass Class = GetInstructionClass(Inst);
- const Value *Arg = 0;
- switch (Class) {
- case IC_Release: {
- Arg = GetObjCArg(Inst);
+ // Invoke instructions are visited as part of their successors (below).
+ if (isa<InvokeInst>(Inst))
+ continue;
+
+ NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
+ }
+
+ // If there's a predecessor with an invoke, visit the invoke as
+ // if it were part of this block, since we can't insert code after
+ // an invoke in its own block, and we don't want to split critical
+ // edges.
+ for (pred_iterator PI(BB), PE(BB, false); PI != PE; ++PI) {
+ BasicBlock *Pred = *PI;
+ TerminatorInst *PredTI = cast<TerminatorInst>(&Pred->back());
+ if (isa<InvokeInst>(PredTI))
+ NestingDetected |= VisitInstructionBottomUp(PredTI, BB, Retains, MyStates);
+ }
+
+ return NestingDetected;
+}
+
+bool
+ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
+ DenseMap<Value *, RRInfo> &Releases,
+ BBState &MyStates) {
+ bool NestingDetected = false;
+ InstructionClass Class = GetInstructionClass(Inst);
+ const Value *Arg = 0;
+
+ switch (Class) {
+ case IC_RetainBlock:
+ // An objc_retainBlock call with just a use may need to be kept,
+ // because it may be copying a block from the stack to the heap.
+ if (!IsRetainBlockOptimizable(Inst))
+ break;
+ // FALLTHROUGH
+ case IC_Retain:
+ case IC_RetainRV: {
+ Arg = GetObjCArg(Inst);
- PtrState &S = MyStates.getPtrBottomUpState(Arg);
+ PtrState &S = MyStates.getPtrTopDownState(Arg);
- // If we see two releases in a row on the same pointer. If so, make
+ // Don't do retain+release tracking for IC_RetainRV, because it's
+ // better to let it remain as the first instruction after a call.
+ if (Class != IC_RetainRV) {
+ // If we see two retains in a row on the same pointer. If so, make
// a note, and we'll cicle back to revisit it after we've
- // hopefully eliminated the second release, which may allow us to
- // eliminate the first release too.
+ // hopefully eliminated the second retain, which may allow us to
+ // eliminate the first retain too.
// Theoretically we could implement removal of nested retain+release
// pairs by making PtrState hold a stack of states, but this is
// simple and avoids adding overhead for the non-nested case.
- if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
+ if (S.GetSeq() == S_Retain)
NestingDetected = true;
+ S.SetSeq(S_Retain);
S.RRI.clear();
-
- MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
- S.SetSeq(ReleaseMetadata ? S_MovableRelease : S_Release);
- S.RRI.ReleaseMetadata = ReleaseMetadata;
- S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented();
- S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+ S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+ // Don't check S.IsKnownIncremented() here because it's not
+ // sufficient.
+ S.RRI.KnownSafe = S.IsKnownNested();
S.RRI.Calls.insert(Inst);
+ }
- S.IncrementRefCount();
- S.IncrementNestCount();
+ S.SetAtLeastOneRefCount();
+ S.IncrementRefCount();
+ S.IncrementNestCount();
+ return NestingDetected;
+ }
+ case IC_Release: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrTopDownState(Arg);
+ S.DecrementRefCount();
+ S.DecrementNestCount();
+
+ switch (S.GetSeq()) {
+ case S_Retain:
+ case S_CanRelease:
+ S.RRI.ReverseInsertPts.clear();
+ // FALL THROUGH
+ case S_Use:
+ S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+ S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+ Releases[Inst] = S.RRI;
+ S.ClearSequenceProgress();
+ break;
+ case S_None:
break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in release state!");
}
- case IC_RetainBlock:
- // An objc_retainBlock call with just a use may need to be kept,
- // because it may be copying a block from the stack to the heap.
- if (!IsRetainBlockOptimizable(Inst))
- break;
- // FALLTHROUGH
- case IC_Retain:
- case IC_RetainRV: {
- Arg = GetObjCArg(Inst);
+ break;
+ }
+ case IC_AutoreleasepoolPop:
+ // Conservatively, clear MyStates for all known pointers.
+ MyStates.clearTopDownPointers();
+ return NestingDetected;
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ // These are irrelevant.
+ return NestingDetected;
+ default:
+ break;
+ }
- PtrState &S = MyStates.getPtrBottomUpState(Arg);
+ // Consider any other possible effects of this instruction on each
+ // pointer being tracked.
+ for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
+ ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
+ const Value *Ptr = MI->first;
+ if (Ptr == Arg)
+ continue; // Handled above.
+ PtrState &S = MI->second;
+ Sequence Seq = S.GetSeq();
+
+ // Check for possible releases.
+ if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
S.DecrementRefCount();
- S.SetAtLeastOneRefCount();
- S.DecrementNestCount();
+ switch (Seq) {
+ case S_Retain:
+ S.SetSeq(S_CanRelease);
+ assert(S.RRI.ReverseInsertPts.empty());
+ S.RRI.ReverseInsertPts.insert(Inst);
- switch (S.GetSeq()) {
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
+ // One call can't cause a transition from S_Retain to S_CanRelease
+ // and S_CanRelease to S_Use. If we've made the first transition,
+ // we're done.
+ continue;
case S_Use:
- S.RRI.ReverseInsertPts.clear();
- // FALL THROUGH
case S_CanRelease:
- // Don't do retain+release tracking for IC_RetainRV, because it's
- // better to let it remain as the first instruction after a call.
- if (Class != IC_RetainRV) {
- S.RRI.IsRetainBlock = Class == IC_RetainBlock;
- Retains[Inst] = S.RRI;
- }
- S.ClearSequenceProgress();
- break;
case S_None:
break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
- continue;
- }
- case IC_AutoreleasepoolPop:
- // Conservatively, clear MyStates for all known pointers.
- MyStates.clearBottomUpPointers();
- continue;
- case IC_AutoreleasepoolPush:
- case IC_None:
- // These are irrelevant.
- continue;
- default:
- break;
- }
-
- // Consider any other possible effects of this instruction on each
- // pointer being tracked.
- for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
- ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
- const Value *Ptr = MI->first;
- if (Ptr == Arg)
- continue; // Handled above.
- PtrState &S = MI->second;
- Sequence Seq = S.GetSeq();
-
- // Check for possible releases.
- if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
- S.DecrementRefCount();
- switch (Seq) {
- case S_Use:
- S.SetSeq(S_CanRelease);
- continue;
- case S_CanRelease:
- case S_Release:
- case S_MovableRelease:
- case S_Stop:
- case S_None:
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
- }
-
- // Check for possible direct uses.
- switch (Seq) {
+ case S_Stop:
case S_Release:
case S_MovableRelease:
- if (CanUse(Inst, Ptr, PA, Class)) {
- assert(S.RRI.ReverseInsertPts.empty());
- S.RRI.ReverseInsertPts.insert(Inst);
- S.SetSeq(S_Use);
- } else if (Seq == S_Release &&
- (Class == IC_User || Class == IC_CallOrUser)) {
- // Non-movable releases depend on any possible objc pointer use.
- S.SetSeq(S_Stop);
- assert(S.RRI.ReverseInsertPts.empty());
- S.RRI.ReverseInsertPts.insert(Inst);
- }
- break;
- case S_Stop:
- if (CanUse(Inst, Ptr, PA, Class))
- S.SetSeq(S_Use);
- break;
- case S_CanRelease:
- case S_Use:
- case S_None:
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
+ llvm_unreachable("top-down pointer in release state!");
}
}
+
+ // Check for possible direct uses.
+ switch (Seq) {
+ case S_CanRelease:
+ if (CanUse(Inst, Ptr, PA, Class))
+ S.SetSeq(S_Use);
+ break;
+ case S_Retain:
+ case S_Use:
+ case S_None:
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in release state!");
+ }
}
return NestingDetected;
@@ -2751,138 +3010,7 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
// Visit all the instructions, top-down.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
Instruction *Inst = I;
- InstructionClass Class = GetInstructionClass(Inst);
- const Value *Arg = 0;
-
- switch (Class) {
- case IC_RetainBlock:
- // An objc_retainBlock call with just a use may need to be kept,
- // because it may be copying a block from the stack to the heap.
- if (!IsRetainBlockOptimizable(Inst))
- break;
- // FALLTHROUGH
- case IC_Retain:
- case IC_RetainRV: {
- Arg = GetObjCArg(Inst);
-
- PtrState &S = MyStates.getPtrTopDownState(Arg);
-
- // Don't do retain+release tracking for IC_RetainRV, because it's
- // better to let it remain as the first instruction after a call.
- if (Class != IC_RetainRV) {
- // If we see two retains in a row on the same pointer. If so, make
- // a note, and we'll cicle back to revisit it after we've
- // hopefully eliminated the second retain, which may allow us to
- // eliminate the first retain too.
- // Theoretically we could implement removal of nested retain+release
- // pairs by making PtrState hold a stack of states, but this is
- // simple and avoids adding overhead for the non-nested case.
- if (S.GetSeq() == S_Retain)
- NestingDetected = true;
-
- S.SetSeq(S_Retain);
- S.RRI.clear();
- S.RRI.IsRetainBlock = Class == IC_RetainBlock;
- // Don't check S.IsKnownIncremented() here because it's not
- // sufficient.
- S.RRI.KnownSafe = S.IsKnownNested();
- S.RRI.Calls.insert(Inst);
- }
-
- S.SetAtLeastOneRefCount();
- S.IncrementRefCount();
- S.IncrementNestCount();
- continue;
- }
- case IC_Release: {
- Arg = GetObjCArg(Inst);
-
- PtrState &S = MyStates.getPtrTopDownState(Arg);
- S.DecrementRefCount();
- S.DecrementNestCount();
-
- switch (S.GetSeq()) {
- case S_Retain:
- case S_CanRelease:
- S.RRI.ReverseInsertPts.clear();
- // FALL THROUGH
- case S_Use:
- S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
- S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
- Releases[Inst] = S.RRI;
- S.ClearSequenceProgress();
- break;
- case S_None:
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- llvm_unreachable("top-down pointer in release state!");
- }
- break;
- }
- case IC_AutoreleasepoolPop:
- // Conservatively, clear MyStates for all known pointers.
- MyStates.clearTopDownPointers();
- continue;
- case IC_AutoreleasepoolPush:
- case IC_None:
- // These are irrelevant.
- continue;
- default:
- break;
- }
-
- // Consider any other possible effects of this instruction on each
- // pointer being tracked.
- for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
- ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
- const Value *Ptr = MI->first;
- if (Ptr == Arg)
- continue; // Handled above.
- PtrState &S = MI->second;
- Sequence Seq = S.GetSeq();
-
- // Check for possible releases.
- if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
- S.DecrementRefCount();
- switch (Seq) {
- case S_Retain:
- S.SetSeq(S_CanRelease);
- assert(S.RRI.ReverseInsertPts.empty());
- S.RRI.ReverseInsertPts.insert(Inst);
-
- // One call can't cause a transition from S_Retain to S_CanRelease
- // and S_CanRelease to S_Use. If we've made the first transition,
- // we're done.
- continue;
- case S_Use:
- case S_CanRelease:
- case S_None:
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- llvm_unreachable("top-down pointer in release state!");
- }
- }
-
- // Check for possible direct uses.
- switch (Seq) {
- case S_CanRelease:
- if (CanUse(Inst, Ptr, PA, Class))
- S.SetSeq(S_Use);
- break;
- case S_Retain:
- case S_Use:
- case S_None:
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- llvm_unreachable("top-down pointer in release state!");
- }
- }
+ NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
}
CheckForCFGHazards(BB, BBStates, MyStates);
@@ -3032,35 +3160,17 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
for (SmallPtrSet<Instruction *, 2>::const_iterator
PI = RetainsToMove.ReverseInsertPts.begin(),
PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
- Instruction *LastUse = *PI;
- Instruction *InsertPts[] = { 0, 0, 0 };
- if (InvokeInst *II = dyn_cast<InvokeInst>(LastUse)) {
- // We can't insert code immediately after an invoke instruction, so
- // insert code at the beginning of both successor blocks instead.
- // The invoke's return value isn't available in the unwind block,
- // but our releases will never depend on it, because they must be
- // paired with retains from before the invoke.
- InsertPts[0] = II->getNormalDest()->getFirstInsertionPt();
- if (!II->getMetadata(NoObjCARCExceptionsMDKind))
- InsertPts[1] = II->getUnwindDest()->getFirstInsertionPt();
- } else {
- // Insert code immediately after the last use.
- InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse));
- }
-
- for (Instruction **I = InsertPts; *I; ++I) {
- Instruction *InsertPt = *I;
- Value *MyArg = ArgTy == ParamTy ? Arg :
- new BitCastInst(Arg, ParamTy, "", InsertPt);
- CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg,
- "", InsertPt);
- // Attach a clang.imprecise_release metadata tag, if appropriate.
- if (MDNode *M = ReleasesToMove.ReleaseMetadata)
- Call->setMetadata(ImpreciseReleaseMDKind, M);
- Call->setDoesNotThrow();
- if (ReleasesToMove.IsTailCallRelease)
- Call->setTailCall();
- }
+ Instruction *InsertPt = *PI;
+ Value *MyArg = ArgTy == ParamTy ? Arg :
+ new BitCastInst(Arg, ParamTy, "", InsertPt);
+ CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg,
+ "", InsertPt);
+ // Attach a clang.imprecise_release metadata tag, if appropriate.
+ if (MDNode *M = ReleasesToMove.ReleaseMetadata)
+ Call->setMetadata(ImpreciseReleaseMDKind, M);
+ Call->setDoesNotThrow();
+ if (ReleasesToMove.IsTailCallRelease)
+ Call->setTailCall();
}
// Delete the original retain and release calls.
@@ -3080,6 +3190,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
}
}
+/// PerformCodePlacement - Identify pairings between the retains and releases,
+/// and delete and/or move them.
bool
ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
&BBStates,
@@ -3093,6 +3205,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
SmallVector<Instruction *, 4> NewReleases;
SmallVector<Instruction *, 8> DeadInsts;
+ // Visit each retain.
for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
E = Retains.end(); I != E; ++I) {
Value *V = I->first;
@@ -3566,6 +3679,7 @@ bool ObjCARCOpt::doInitialization(Module &M) {
if (!EnableARCOpts)
return false;
+ // If nothing in the Module uses ARC, don't do anything.
Run = ModuleHasARC(M);
if (!Run)
return false;
@@ -3900,6 +4014,7 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
}
bool ObjCARCContract::doInitialization(Module &M) {
+ // If nothing in the Module uses ARC, don't do anything.
Run = ModuleHasARC(M);
if (!Run)
return false;
@@ -3975,6 +4090,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
--BBI;
while (isNoopInstruction(BBI)) --BBI;
if (&*BBI == GetObjCArg(Inst)) {
+ Changed = true;
InlineAsm *IA =
InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()),
/*isVarArg=*/false),
@@ -4024,16 +4140,19 @@ bool ObjCARCContract::runOnFunction(Function &F) {
Use &U = UI.getUse();
unsigned OperandNo = UI.getOperandNo();
++UI; // Increment UI now, because we may unlink its element.
- Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
- if (!UserInst)
- continue;
- // FIXME: dominates should return true for unreachable UserInst.
- if (!DT->isReachableFromEntry(UserInst->getParent()) ||
- DT->dominates(Inst, UserInst)) {
+
+ // If the call's return value dominates a use of the call's argument
+ // value, rewrite the use to use the return value. We check for
+ // reachability here because an unreachable call is considered to
+ // trivially dominate itself, which would lead us to rewriting its
+ // argument in terms of its return value, which would lead to
+ // infinite loops in GetObjCArg.
+ if (DT->isReachableFromEntry(U) &&
+ DT->dominates(Inst, U)) {
Changed = true;
Instruction *Replacement = Inst;
Type *UseTy = U.get()->getType();
- if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) {
+ if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) {
// For PHI nodes, insert the bitcast in the predecessor block.
unsigned ValNo =
PHINode::getIncomingValueNumForOperand(OperandNo);
@@ -4042,6 +4161,9 @@ bool ObjCARCContract::runOnFunction(Function &F) {
if (Replacement->getType() != UseTy)
Replacement = new BitCastInst(Replacement, UseTy, "",
&BB->back());
+ // While we're here, rewrite all edges for this PHI, rather
+ // than just one use at a time, to minimize the number of
+ // bitcasts we emit.
for (unsigned i = 0, e = PHI->getNumIncomingValues();
i != e; ++i)
if (PHI->getIncomingBlock(i) == BB) {
@@ -4054,7 +4176,8 @@ bool ObjCARCContract::runOnFunction(Function &F) {
}
} else {
if (Replacement->getType() != UseTy)
- Replacement = new BitCastInst(Replacement, UseTy, "", UserInst);
+ Replacement = new BitCastInst(Replacement, UseTy, "",
+ cast<Instruction>(U.getUser()));
U.set(Replacement);
}
}
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 8f98a5b..cb408a1 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -74,7 +74,7 @@ static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
namespace {
class Reassociate : public FunctionPass {
DenseMap<BasicBlock*, unsigned> RankMap;
- DenseMap<AssertingVH<>, unsigned> ValueRankMap;
+ DenseMap<AssertingVH<Value>, unsigned> ValueRankMap;
SmallVector<WeakVH, 8> RedoInsts;
SmallVector<WeakVH, 8> DeadInsts;
bool MadeChange;
@@ -210,7 +210,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
/// LowerNegateToMultiply - Replace 0-X with X*-1.
///
static Instruction *LowerNegateToMultiply(Instruction *Neg,
- DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+ DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) {
Constant *Cst = Constant::getAllOnesValue(Neg->getType());
Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
@@ -492,7 +492,7 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) {
/// only used by an add, transform this into (X+(0-Y)) to promote better
/// reassociation.
static Instruction *BreakUpSubtract(Instruction *Sub,
- DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+ DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) {
// Convert a subtract into an add and a neg instruction. This allows sub
// instructions to be commuted with other add instructions.
//
@@ -517,8 +517,8 @@ static Instruction *BreakUpSubtract(Instruction *Sub,
/// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used
/// by one, change this into a multiply by a constant to assist with further
/// reassociation.
-static Instruction *ConvertShiftToMul(Instruction *Shl,
- DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+static Instruction *ConvertShiftToMul(Instruction *Shl,
+ DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) {
// If an operand of this shift is a reassociable multiply, or if the shift
// is used by a reassociable multiply or add, turn into a multiply.
if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) ||
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 5ce82b9..16b64a5 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1925,8 +1925,8 @@ bool IPSCCP::runOnModule(Module &M) {
ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType()));
}
- // If we inferred constant or undef values for globals variables, we can delete
- // the global and any stores that remain to it.
+ // If we inferred constant or undef values for globals variables, we can
+ // delete the global and any stores that remain to it.
const DenseMap<GlobalVariable*, LatticeVal> &TG = Solver.getTrackedGlobals();
for (DenseMap<GlobalVariable*, LatticeVal>::const_iterator I = TG.begin(),
E = TG.end(); I != E; ++I) {
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index d36a18f..026fea1 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -13,7 +13,7 @@
// each member (if possible). Then, if possible, it transforms the individual
// alloca instructions into nice clean scalar SSA form.
//
-// This combines a simple SRoA algorithm with the Mem2Reg algorithm because
+// This combines a simple SRoA algorithm with the Mem2Reg algorithm because they
// often interact, especially for C++ programs. As such, iterating between
// SRoA, then Mem2Reg until we run out of things to promote works well.
//
@@ -574,8 +574,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// transform it into a store of the expanded constant value.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
assert(MSI->getRawDest() == Ptr && "Consistency error!");
- signed SNumBytes = cast<ConstantInt>(MSI->getLength())->getSExtValue();
- if (SNumBytes > 0) {
+ int64_t SNumBytes = cast<ConstantInt>(MSI->getLength())->getSExtValue();
+ if (SNumBytes > 0 && (SNumBytes >> 32) == 0) {
unsigned NumBytes = static_cast<unsigned>(SNumBytes);
unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 9c49ec1..f7b6941 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -1583,21 +1583,16 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["llvm.exp2.f64"] = &Exp2;
Optimizations["llvm.exp2.f32"] = &Exp2;
-#ifdef HAVE_FLOORF
- Optimizations["floor"] = &UnaryDoubleFP;
-#endif
-#ifdef HAVE_CEILF
- Optimizations["ceil"] = &UnaryDoubleFP;
-#endif
-#ifdef HAVE_ROUNDF
- Optimizations["round"] = &UnaryDoubleFP;
-#endif
-#ifdef HAVE_RINTF
- Optimizations["rint"] = &UnaryDoubleFP;
-#endif
-#ifdef HAVE_NEARBYINTF
- Optimizations["nearbyint"] = &UnaryDoubleFP;
-#endif
+ if (TLI->has(LibFunc::floor) && TLI->has(LibFunc::floorf))
+ Optimizations["floor"] = &UnaryDoubleFP;
+ if (TLI->has(LibFunc::ceil) && TLI->has(LibFunc::ceilf))
+ Optimizations["ceil"] = &UnaryDoubleFP;
+ if (TLI->has(LibFunc::round) && TLI->has(LibFunc::roundf))
+ Optimizations["round"] = &UnaryDoubleFP;
+ if (TLI->has(LibFunc::rint) && TLI->has(LibFunc::rintf))
+ Optimizations["rint"] = &UnaryDoubleFP;
+ if (TLI->has(LibFunc::nearbyint) && TLI->has(LibFunc::nearbyintf))
+ Optimizations["nearbyint"] = &UnaryDoubleFP;
// Integer Optimizations
Optimizations["ffs"] = &FFS;