aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp46
-rw-r--r--lib/Transforms/Scalar/DCE.cpp8
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp36
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp2
-rw-r--r--lib/Transforms/Scalar/GVN.cpp26
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp17
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp2
-rw-r--r--lib/Transforms/Scalar/LICM.cpp39
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp24
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp65
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp14
-rw-r--r--lib/Transforms/Scalar/ObjCARC.cpp91
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp52
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp154
15 files changed, 387 insertions, 191 deletions
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index a8deda8..5912107 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -43,6 +43,7 @@
#include "llvm/Transforms/Utils/AddrModeMatcher.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/BypassSlowDivision.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -148,7 +149,19 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
PFI = getAnalysisIfAvailable<ProfileInfo>();
OptSize = F.hasFnAttr(Attribute::OptimizeForSize);
- // First pass, eliminate blocks that contain only PHI nodes and an
+ /// This optimization identifies DIV instructions that can be
+ /// profitably bypassed and carried out with a shorter, faster divide.
+ if (TLI && TLI->isSlowDivBypassed()) {
+ const DenseMap<Type *, Type *> &BypassTypeMap = TLI->getBypassSlowDivTypes();
+
+ for (Function::iterator I = F.begin(); I != F.end(); I++) {
+ EverMadeChange |= bypassSlowDivision(F,
+ I,
+ BypassTypeMap);
+ }
+ }
+
+ // Eliminate blocks that contain only PHI nodes and an
// unconditional branch.
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
@@ -988,7 +1001,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
WeakVH IterHandle(CurInstIterator);
BasicBlock *BB = CurInstIterator->getParent();
- RecursivelyDeleteTriviallyDeadInstructions(Repl);
+ RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
if (IterHandle != CurInstIterator) {
// If the iterator instruction was recursively deleted, start over at the
@@ -1174,17 +1187,32 @@ static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
}
+/// If we have a SelectInst that will likely profit from branch prediction,
+/// turn it into a branch.
bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
- // If we have a SelectInst that will likely profit from branch prediction,
- // turn it into a branch.
- if (DisableSelectToBranch || OptSize || !TLI ||
- !TLI->isPredictableSelectExpensive())
- return false;
+ bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
- if (!SI->getCondition()->getType()->isIntegerTy(1) ||
- !isFormingBranchFromSelectProfitable(SI))
+ // Can we convert the 'select' to CF ?
+ if (DisableSelectToBranch || OptSize || !TLI || VectorCond)
return false;
+ TargetLowering::SelectSupportKind SelectKind;
+ if (VectorCond)
+ SelectKind = TargetLowering::VectorMaskSelect;
+ else if (SI->getType()->isVectorTy())
+ SelectKind = TargetLowering::ScalarCondVectorVal;
+ else
+ SelectKind = TargetLowering::ScalarValSelect;
+
+ // Do we have efficient codegen support for this kind of 'selects' ?
+ if (TLI->isSelectSupported(SelectKind)) {
+ // We have efficient codegen support for the select instruction.
+ // Check if it is profitable to keep this 'select'.
+ if (!TLI->isPredictableSelectExpensive() ||
+ !isFormingBranchFromSelectProfitable(SI))
+ return false;
+ }
+
ModifiedDT = true;
// First, we split the block containing the select into 2 blocks.
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 8dbcc23..086f0a1 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -22,6 +22,7 @@
#include "llvm/Instruction.h"
#include "llvm/Pass.h"
#include "llvm/Support/InstIterator.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -38,10 +39,11 @@ namespace {
initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry());
}
virtual bool runOnBasicBlock(BasicBlock &BB) {
+ TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
Instruction *Inst = DI++;
- if (isInstructionTriviallyDead(Inst)) {
+ if (isInstructionTriviallyDead(Inst, TLI)) {
Inst->eraseFromParent();
Changed = true;
++DIEEliminated;
@@ -87,6 +89,8 @@ char DCE::ID = 0;
INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
bool DCE::runOnFunction(Function &F) {
+ TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+
// Start out with all of the instructions in the worklist...
std::vector<Instruction*> WorkList;
for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i)
@@ -101,7 +105,7 @@ bool DCE::runOnFunction(Function &F) {
Instruction *I = WorkList.back();
WorkList.pop_back();
- if (isInstructionTriviallyDead(I)) { // If the instruction is dead.
+ if (isInstructionTriviallyDead(I, TLI)) { // If the instruction is dead.
// Loop over all of the values that the instruction uses, if there are
// instructions being used, add them to the worklist, because they might
// go dead after this one is removed.
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 8b1283f..1ff4329 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -106,6 +106,7 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
///
static void DeleteDeadInstruction(Instruction *I,
MemoryDependenceAnalysis &MD,
+ const TargetLibraryInfo *TLI,
SmallSetVector<Value*, 16> *ValueSet = 0) {
SmallVector<Instruction*, 32> NowDeadInsts;
@@ -130,7 +131,7 @@ static void DeleteDeadInstruction(Instruction *I,
if (!Op->use_empty()) continue;
if (Instruction *OpI = dyn_cast<Instruction>(Op))
- if (isInstructionTriviallyDead(OpI))
+ if (isInstructionTriviallyDead(OpI, TLI))
NowDeadInsts.push_back(OpI);
}
@@ -276,7 +277,7 @@ static Value *getStoredPointerOperand(Instruction *I) {
static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) {
uint64_t Size;
- if (getObjectSize(V, Size, AA.getTargetData()))
+ if (getObjectSize(V, Size, AA.getTargetData(), AA.getTargetLibraryInfo()))
return Size;
return AliasAnalysis::UnknownSize;
}
@@ -454,7 +455,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
Instruction *Inst = BBI++;
// Handle 'free' calls specially.
- if (CallInst *F = isFreeCall(Inst)) {
+ if (CallInst *F = isFreeCall(Inst, AA->getTargetLibraryInfo())) {
MadeChange |= HandleFree(F);
continue;
}
@@ -483,7 +484,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
// in case we need it.
WeakVH NextInst(BBI);
- DeleteDeadInstruction(SI, *MD);
+ DeleteDeadInstruction(SI, *MD, AA->getTargetLibraryInfo());
if (NextInst == 0) // Next instruction deleted.
BBI = BB.begin();
@@ -530,7 +531,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
// Delete the store and now-dead instructions that feed it.
- DeleteDeadInstruction(DepWrite, *MD);
+ DeleteDeadInstruction(DepWrite, *MD, AA->getTargetLibraryInfo());
++NumFastStores;
MadeChange = true;
@@ -640,7 +641,7 @@ bool DSE::HandleFree(CallInst *F) {
Instruction *Next = llvm::next(BasicBlock::iterator(Dependency));
// DCE instructions only used to calculate that store
- DeleteDeadInstruction(Dependency, *MD);
+ DeleteDeadInstruction(Dependency, *MD, AA->getTargetLibraryInfo());
++NumFastStores;
MadeChange = true;
@@ -680,7 +681,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// Okay, so these are dead heap objects, but if the pointer never escapes
// then it's leaked by this function anyways.
- else if (isAllocLikeFn(I) && !PointerMayBeCaptured(I, true, true))
+ else if (isAllocLikeFn(I, AA->getTargetLibraryInfo()) &&
+ !PointerMayBeCaptured(I, true, true))
DeadStackObjects.insert(I);
}
@@ -724,7 +726,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
dbgs() << '\n');
// DCE instructions only used to calculate that store.
- DeleteDeadInstruction(Dead, *MD, &DeadStackObjects);
+ DeleteDeadInstruction(Dead, *MD, AA->getTargetLibraryInfo(),
+ &DeadStackObjects);
++NumFastStores;
MadeChange = true;
continue;
@@ -732,9 +735,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
}
// Remove any dead non-memory-mutating instructions.
- if (isInstructionTriviallyDead(BBI)) {
+ if (isInstructionTriviallyDead(BBI, AA->getTargetLibraryInfo())) {
Instruction *Inst = BBI++;
- DeleteDeadInstruction(Inst, *MD, &DeadStackObjects);
+ DeleteDeadInstruction(Inst, *MD, AA->getTargetLibraryInfo(),
+ &DeadStackObjects);
++NumFastOther;
MadeChange = true;
continue;
@@ -750,7 +754,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
if (CallSite CS = cast<Value>(BBI)) {
// Remove allocation function calls from the list of dead stack objects;
// there can't be any references before the definition.
- if (isAllocLikeFn(BBI))
+ if (isAllocLikeFn(BBI, AA->getTargetLibraryInfo()))
DeadStackObjects.remove(BBI);
// If this call does not access memory, it can't be loading any of our
@@ -771,15 +775,15 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
LiveAllocas.push_back(*I);
}
- for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
- E = LiveAllocas.end(); I != E; ++I)
- DeadStackObjects.remove(*I);
-
// If all of the allocas were clobbered by the call then we're not going
// to find anything else to process.
- if (DeadStackObjects.empty())
+ if (DeadStackObjects.size() == LiveAllocas.size())
break;
+ for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
+ E = LiveAllocas.end(); I != E; ++I)
+ DeadStackObjects.remove(*I);
+
continue;
}
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 9759549..2627113 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -374,7 +374,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
Instruction *Inst = I++;
// Dead instructions should just be removed.
- if (isInstructionTriviallyDead(Inst)) {
+ if (isInstructionTriviallyDead(Inst, TLI)) {
DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
Inst->eraseFromParent();
Changed = true;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 4822fd0..16ae6ad 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -271,16 +271,16 @@ void ValueTable::add(Value *V, uint32_t num) {
valueNumbering.insert(std::make_pair(V, num));
}
-uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
+uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
if (AA->doesNotAccessMemory(C)) {
Expression exp = create_expression(C);
- uint32_t& e = expressionNumbering[exp];
+ uint32_t &e = expressionNumbering[exp];
if (!e) e = nextValueNumber++;
valueNumbering[C] = e;
return e;
} else if (AA->onlyReadsMemory(C)) {
Expression exp = create_expression(C);
- uint32_t& e = expressionNumbering[exp];
+ uint32_t &e = expressionNumbering[exp];
if (!e) {
e = nextValueNumber++;
valueNumbering[C] = e;
@@ -413,7 +413,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
case Instruction::LShr:
case Instruction::AShr:
case Instruction::And:
- case Instruction::Or :
+ case Instruction::Or:
case Instruction::Xor:
case Instruction::ICmp:
case Instruction::FCmp:
@@ -632,6 +632,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
+#ifndef NDEBUG
void GVN::dump(DenseMap<uint32_t, Value*>& d) {
errs() << "{\n";
for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
@@ -641,6 +642,7 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) {
}
errs() << "}\n";
}
+#endif
/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
/// we're analyzing is fully available in the specified block. As we go, keep
@@ -1436,7 +1438,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
Instruction *DepInst = DepInfo.getInst();
// Loading the allocation -> undef.
- if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst) ||
+ if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) ||
// Loading immediately after lifetime begin -> undef.
isLifetimeStart(DepInst)) {
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
@@ -1951,7 +1953,7 @@ bool GVN::processLoad(LoadInst *L) {
// If this load really doesn't depend on anything, then we must be loading an
// undef value. This can happen when loading for a fresh allocation with no
// intervening stores, for example.
- if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst)) {
+ if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI)) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
markInstructionForDeletion(L);
++NumGVNLoad;
@@ -2231,12 +2233,20 @@ bool GVN::processInstruction(Instruction *I) {
Value *SwitchCond = SI->getCondition();
BasicBlock *Parent = SI->getParent();
bool Changed = false;
+
+ // Remember how many outgoing edges there are to every successor.
+ SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
+ for (unsigned i = 0, n = SI->getNumSuccessors(); i != n; ++i)
+ ++SwitchEdges[SI->getSuccessor(i)];
+
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
i != e; ++i) {
BasicBlock *Dst = i.getCaseSuccessor();
- BasicBlockEdge E(Parent, Dst);
- if (E.isSingleEdge())
+ // If there is only a single edge, propagate the case value into it.
+ if (SwitchEdges.lookup(Dst) == 1) {
+ BasicBlockEdge E(Parent, Dst);
Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E);
+ }
}
return Changed;
}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 37f8bdf..c933a17 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -44,6 +44,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -68,6 +69,7 @@ namespace {
ScalarEvolution *SE;
DominatorTree *DT;
TargetData *TD;
+ TargetLibraryInfo *TLI;
SmallVector<WeakVH, 16> DeadInsts;
bool Changed;
@@ -414,11 +416,11 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// new comparison.
NewCompare->takeName(Compare);
Compare->replaceAllUsesWith(NewCompare);
- RecursivelyDeleteTriviallyDeadInstructions(Compare);
+ RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
// Delete the old floating point increment.
Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
- RecursivelyDeleteTriviallyDeadInstructions(Incr);
+ RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
// If the FP induction variable still has uses, this is because something else
// in the loop uses its value. In order to canonicalize the induction
@@ -431,7 +433,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
PN->getParent()->getFirstInsertionPt());
PN->replaceAllUsesWith(Conv);
- RecursivelyDeleteTriviallyDeadInstructions(PN);
+ RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
}
Changed = true;
}
@@ -550,14 +552,14 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
PN->setIncomingValue(i, ExitVal);
// If this instruction is dead now, delete it.
- RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
if (NumPreds == 1) {
// Completely replace a single-pred PHI. This is safe, because the
// NewVal won't be variant in the loop, so we don't need an LCSSA phi
// node anymore.
PN->replaceAllUsesWith(ExitVal);
- RecursivelyDeleteTriviallyDeadInstructions(PN);
+ RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
}
}
if (NumPreds != 1) {
@@ -1697,6 +1699,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTree>();
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
DeadInsts.clear();
Changed = false;
@@ -1763,7 +1766,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
while (!DeadInsts.empty())
if (Instruction *Inst =
dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
- RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
// The Rewriter may not be used from this point on.
@@ -1772,7 +1775,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
SinkUnusedInvariants(L);
// Clean up dead instructions.
- Changed |= DeleteDeadPHIs(L->getHeader());
+ Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
// Check a post-condition.
assert(L->isLCSSAForm(*DT) &&
"Indvars did not leave the loop in lcssa form!");
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index dd42c59..20844c6 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1455,7 +1455,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// At this point, the IR is fully up to date and consistent. Do a quick scan
// over the new instructions and zap any that are constants or dead. This
// frequently happens because of phi translation.
- SimplifyInstructionsInBlock(NewBB, TD);
+ SimplifyInstructionsInBlock(NewBB, TD, TLI);
// Threaded an edge!
++NumThreads;
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 0192e92..99bedce 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -108,6 +108,9 @@ namespace {
BasicBlock *Preheader; // The preheader block of the current loop...
Loop *CurLoop; // The current loop we are working on...
AliasSetTracker *CurAST; // AliasSet information for the current loop...
+ bool MayThrow; // The current loop contains an instruction which
+ // may throw, thus preventing code motion of
+ // instructions with side effects.
DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
@@ -240,6 +243,15 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
CurAST->add(*BB); // Incorporate the specified basic block
}
+ MayThrow = false;
+ // TODO: We've already searched for instructions which may throw in subloops.
+ // We may want to reuse this information.
+ for (Loop::block_iterator BB = L->block_begin(), BBE = L->block_end();
+ (BB != BBE) && !MayThrow ; ++BB)
+ for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
+ (I != E) && !MayThrow; ++I)
+ MayThrow |= I->mayThrow();
+
// We want to visit all of the instructions in this loop... that are not parts
// of our subloops (they have already had their invariants hoisted out of
// their loop, into this loop, so there is no need to process the BODIES of
@@ -307,7 +319,7 @@ void LICM::SinkRegion(DomTreeNode *N) {
// If the instruction is dead, we would try to sink it because it isn't used
// in the loop, instead, just delete it.
- if (isInstructionTriviallyDead(&I)) {
+ if (isInstructionTriviallyDead(&I, TLI)) {
DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
++II;
CurAST->deleteValue(&I);
@@ -418,17 +430,22 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
if (!FoundMod) return true;
}
- // FIXME: This should use mod/ref information to see if we can hoist or sink
- // the call.
+ // FIXME: This should use mod/ref information to see if we can hoist or
+ // sink the call.
return false;
}
- // Otherwise these instructions are hoistable/sinkable
- return isa<BinaryOperator>(I) || isa<CastInst>(I) ||
- isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
- isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
- isa<ShuffleVectorInst>(I);
+ // Only these instructions are hoistable/sinkable.
+ bool HoistableKind = (isa<BinaryOperator>(I) || isa<CastInst>(I) ||
+ isa<SelectInst>(I) || isa<GetElementPtrInst>(I) ||
+ isa<CmpInst>(I) || isa<InsertElementInst>(I) ||
+ isa<ExtractElementInst>(I) ||
+ isa<ShuffleVectorInst>(I));
+ if (!HoistableKind)
+ return false;
+
+ return isSafeToExecuteUnconditionally(I);
}
/// isNotUsedInLoop - Return true if the only users of this instruction are
@@ -604,6 +621,12 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
}
bool LICM::isGuaranteedToExecute(Instruction &Inst) {
+
+ // Somewhere in this loop there is an instruction which may throw and make us
+ // exit the loop.
+ if (MayThrow)
+ return false;
+
// Otherwise we have to check to make sure that the instruction dominates all
// of the exit blocks. If it doesn't, then there is a path out of the loop
// which does not execute this instruction, so we can't hoist it.
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index ac1082c..a72e288 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -132,7 +132,8 @@ Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
/// and zero out all the operands of this instruction. If any of them become
/// dead, delete them and the computation tree that feeds them.
///
-static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
+static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE,
+ const TargetLibraryInfo *TLI) {
SmallVector<Instruction*, 32> NowDeadInsts;
NowDeadInsts.push_back(I);
@@ -153,7 +154,7 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
if (!Op->use_empty()) continue;
if (Instruction *OpI = dyn_cast<Instruction>(Op))
- if (isInstructionTriviallyDead(OpI))
+ if (isInstructionTriviallyDead(OpI, TLI))
NowDeadInsts.push_back(OpI);
}
@@ -164,10 +165,11 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
/// deleteIfDeadInstruction - If the specified value is a dead instruction,
/// delete it and any recursively used instructions.
-static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) {
+static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE,
+ const TargetLibraryInfo *TLI) {
if (Instruction *I = dyn_cast<Instruction>(V))
- if (isInstructionTriviallyDead(I))
- deleteDeadInstruction(I, SE);
+ if (isInstructionTriviallyDead(I, TLI))
+ deleteDeadInstruction(I, SE, TLI);
}
bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
@@ -490,7 +492,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){
Expander.clear();
// If we generated new code for the base pointer, clean up.
- deleteIfDeadInstruction(BasePtr, *SE);
+ deleteIfDeadInstruction(BasePtr, *SE, TLI);
return false;
}
@@ -538,7 +540,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
- deleteDeadInstruction(TheStore, *SE);
+ deleteDeadInstruction(TheStore, *SE, TLI);
++NumMemSet;
return true;
}
@@ -579,7 +581,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
getAnalysis<AliasAnalysis>(), SI)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
- deleteIfDeadInstruction(StoreBasePtr, *SE);
+ deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
return false;
}
@@ -594,8 +596,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
- deleteIfDeadInstruction(LoadBasePtr, *SE);
- deleteIfDeadInstruction(StoreBasePtr, *SE);
+ deleteIfDeadInstruction(LoadBasePtr, *SE, TLI);
+ deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
return false;
}
@@ -628,7 +630,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
- deleteDeadInstruction(SI, *SE);
+ deleteDeadInstruction(SI, *SE, TLI);
++NumMemCpy;
return true;
}
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 982400c..f5daa7b 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -120,7 +120,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
++NumSimplified;
}
}
- LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I);
+ LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
if (IsSubloopHeader && !isa<PHINode>(I))
break;
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 7eeb152..abe07aa 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -24,6 +24,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -256,6 +257,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
return false;
BasicBlock *OrigHeader = L->getHeader();
+ BasicBlock *OrigLatch = L->getLoopLatch();
BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
if (BI == 0 || BI->isUnconditional())
@@ -267,13 +269,9 @@ bool LoopRotate::rotateLoop(Loop *L) {
if (!L->isLoopExiting(OrigHeader))
return false;
- // Updating PHInodes in loops with multiple exits adds complexity.
- // Keep it simple, and restrict loop rotation to loops with one exit only.
- // In future, lift this restriction and support for multiple exits if
- // required.
- SmallVector<BasicBlock*, 8> ExitBlocks;
- L->getExitBlocks(ExitBlocks);
- if (ExitBlocks.size() > 1)
+ // If the loop latch already contains a branch that leaves the loop then the
+ // loop is already rotated.
+ if (OrigLatch == 0 || L->isLoopExiting(OrigLatch))
return false;
// Check size of original header and reject loop if it is very big.
@@ -286,11 +284,10 @@ bool LoopRotate::rotateLoop(Loop *L) {
// Now, this loop is suitable for rotation.
BasicBlock *OrigPreheader = L->getLoopPreheader();
- BasicBlock *OrigLatch = L->getLoopLatch();
// If the loop could not be converted to canonical form, it must have an
// indirectbr in it, just give up.
- if (OrigPreheader == 0 || OrigLatch == 0)
+ if (OrigPreheader == 0)
return false;
// Anything ScalarEvolution may know about this loop or the PHI nodes
@@ -298,6 +295,8 @@ bool LoopRotate::rotateLoop(Loop *L) {
if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
SE->forgetLoop(L);
+ DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
+
// Find new Loop header. NewHeader is a Header's one and only successor
// that is inside loop. Header's other successor is outside the
// loop. Otherwise loop is not suitable for rotation.
@@ -408,10 +407,19 @@ bool LoopRotate::rotateLoop(Loop *L) {
// Update DominatorTree to reflect the CFG change we just made. Then split
// edges as necessary to preserve LoopSimplify form.
if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
- // Since OrigPreheader now has the conditional branch to Exit block, it is
- // the dominator of Exit.
- DT->changeImmediateDominator(Exit, OrigPreheader);
- DT->changeImmediateDominator(NewHeader, OrigPreheader);
+ // Everything that was dominated by the old loop header is now dominated
+ // by the original loop preheader. Conceptually the header was merged
+ // into the preheader, even though we reuse the actual block as a new
+ // loop latch.
+ DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
+ SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
+ OrigHeaderNode->end());
+ DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader);
+ for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I)
+ DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode);
+
+ assert(DT->getNode(Exit)->getIDom() == OrigPreheaderNode);
+ assert(DT->getNode(NewHeader)->getIDom() == OrigPreheaderNode);
// Update OrigHeader to be dominated by the new header block.
DT->changeImmediateDominator(OrigHeader, OrigLatch);
@@ -440,6 +448,35 @@ bool LoopRotate::rotateLoop(Loop *L) {
// Update OrigHeader to be dominated by the new header block.
DT->changeImmediateDominator(NewHeader, OrigPreheader);
DT->changeImmediateDominator(OrigHeader, OrigLatch);
+
+ // Brute force incremental dominator tree update. Call
+ // findNearestCommonDominator on all CFG predecessors of each child of the
+ // original header.
+ DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
+ SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
+ OrigHeaderNode->end());
+ bool Changed;
+ do {
+ Changed = false;
+ for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) {
+ DomTreeNode *Node = HeaderChildren[I];
+ BasicBlock *BB = Node->getBlock();
+
+ pred_iterator PI = pred_begin(BB);
+ BasicBlock *NearestDom = *PI;
+ for (pred_iterator PE = pred_end(BB); PI != PE; ++PI)
+ NearestDom = DT->findNearestCommonDominator(NearestDom, *PI);
+
+ // Remember if this changes the DomTree.
+ if (Node->getIDom()->getBlock() != NearestDom) {
+ DT->changeImmediateDominator(BB, NearestDom);
+ Changed = true;
+ }
+ }
+
+ // If the dominator changed, this may have an effect on other
+ // predecessors, continue until we reach a fixpoint.
+ } while (Changed);
}
}
@@ -452,6 +489,8 @@ bool LoopRotate::rotateLoop(Loop *L) {
// emitted code isn't too gross in this common case.
MergeBlockIntoPredecessor(OrigHeader, this);
+ DEBUG(dbgs() << "LoopRotation: into "; L->dump());
+
++NumRotated;
return true;
}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 0ae7a51..d7495da 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -121,9 +121,11 @@ void RegSortData::print(raw_ostream &OS) const {
OS << "[NumUses=" << UsedByIndices.count() << ']';
}
+#ifndef NDEBUG
void RegSortData::dump() const {
print(errs()); errs() << '\n';
}
+#endif
namespace {
@@ -414,9 +416,11 @@ void Formula::print(raw_ostream &OS) const {
}
}
+#ifndef NDEBUG
void Formula::dump() const {
print(errs()); errs() << '\n';
}
+#endif
/// isAddRecSExtable - Return true if the given addrec can be sign-extended
/// without changing its value.
@@ -974,9 +978,11 @@ void Cost::print(raw_ostream &OS) const {
OS << ", plus " << SetupCost << " setup cost";
}
+#ifndef NDEBUG
void Cost::dump() const {
print(errs()); errs() << '\n';
}
+#endif
namespace {
@@ -1060,9 +1066,11 @@ void LSRFixup::print(raw_ostream &OS) const {
OS << ", Offset=" << Offset;
}
+#ifndef NDEBUG
void LSRFixup::dump() const {
print(errs()); errs() << '\n';
}
+#endif
namespace {
@@ -1252,9 +1260,11 @@ void LSRUse::print(raw_ostream &OS) const {
OS << ", widest fixup type: " << *WidestFixupType;
}
+#ifndef NDEBUG
void LSRUse::dump() const {
print(errs()); errs() << '\n';
}
+#endif
/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
/// be completely folded into the user instruction at isel time. This includes
@@ -3436,9 +3446,11 @@ void WorkItem::print(raw_ostream &OS) const {
<< " , add offset " << Imm;
}
+#ifndef NDEBUG
void WorkItem::dump() const {
print(errs()); errs() << '\n';
}
+#endif
/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
/// distance apart and try to form reuse opportunities between them.
@@ -4731,9 +4743,11 @@ void LSRInstance::print(raw_ostream &OS) const {
print_uses(OS);
}
+#ifndef NDEBUG
void LSRInstance::dump() const {
print(errs()); errs() << '\n';
}
+#endif
namespace {
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index 3222f20..dce8e8b 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -1236,16 +1236,19 @@ bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) {
// An ObjC-Identified object can't alias a load if it is never locally stored.
if (AIsIdentified) {
+ // Check for an obvious escape.
+ if (isa<LoadInst>(B))
+ return isStoredObjCPointer(A);
if (BIsIdentified) {
- // If both pointers have provenance, they can be directly compared.
- if (A != B)
- return false;
- } else {
- if (isa<LoadInst>(B))
- return isStoredObjCPointer(A);
+ // Check for an obvious escape.
+ if (isa<LoadInst>(A))
+ return isStoredObjCPointer(B);
+ // Both pointers are identified and escapes aren't an evident problem.
+ return false;
}
- } else {
- if (BIsIdentified && isa<LoadInst>(A))
+ } else if (BIsIdentified) {
+ // Check for an obvious escape.
+ if (isa<LoadInst>(A))
return isStoredObjCPointer(B);
}
@@ -1381,9 +1384,6 @@ namespace {
/// PtrState - This class summarizes several per-pointer runtime properties
/// which are propogated through the flow graph.
class PtrState {
- /// NestCount - The known minimum level of retain+release nesting.
- unsigned NestCount;
-
/// KnownPositiveRefCount - True if the reference count is known to
/// be incremented.
bool KnownPositiveRefCount;
@@ -1401,7 +1401,7 @@ namespace {
/// TODO: Encapsulate this better.
RRInfo RRI;
- PtrState() : NestCount(0), KnownPositiveRefCount(false), Partial(false),
+ PtrState() : KnownPositiveRefCount(false), Partial(false),
Seq(S_None) {}
void SetKnownPositiveRefCount() {
@@ -1416,18 +1416,6 @@ namespace {
return KnownPositiveRefCount;
}
- void IncrementNestCount() {
- if (NestCount != UINT_MAX) ++NestCount;
- }
-
- void DecrementNestCount() {
- if (NestCount != 0) --NestCount;
- }
-
- bool IsKnownNested() const {
- return NestCount > 0;
- }
-
void SetSeq(Sequence NewSeq) {
Seq = NewSeq;
}
@@ -1454,7 +1442,6 @@ void
PtrState::Merge(const PtrState &Other, bool TopDown) {
Seq = MergeSeqs(Seq, Other.Seq, TopDown);
KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount;
- NestCount = std::min(NestCount, Other.NestCount);
// We can't merge a plain objc_retain with an objc_retainBlock.
if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
@@ -1868,6 +1855,26 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
return AutoreleaseCallee;
}
+/// IsPotentialUse - Test whether the given value is possible a
+/// reference-counted pointer, including tests which utilize AliasAnalysis.
+static bool IsPotentialUse(const Value *Op, AliasAnalysis &AA) {
+ // First make the rudimentary check.
+ if (!IsPotentialUse(Op))
+ return false;
+
+ // Objects in constant memory are not reference-counted.
+ if (AA.pointsToConstantMemory(Op))
+ return false;
+
+ // Pointers in constant memory are not pointing to reference-counted objects.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(Op))
+ if (AA.pointsToConstantMemory(LI->getPointerOperand()))
+ return false;
+
+ // Otherwise assume the worst.
+ return true;
+}
+
/// CanAlterRefCount - Test whether the given instruction can result in a
/// reference count modification (positive or negative) for the pointer's
/// object.
@@ -1894,7 +1901,7 @@ CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
I != E; ++I) {
const Value *Op = *I;
- if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+ if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
return true;
}
return false;
@@ -1919,14 +1926,14 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
// Comparing a pointer with null, or any other constant, isn't really a use,
// because we don't care what the pointer points to, or about the values
// of any other dynamic reference-counted pointers.
- if (!IsPotentialUse(ICI->getOperand(1)))
+ if (!IsPotentialUse(ICI->getOperand(1), *PA.getAA()))
return false;
} else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) {
// For calls, just check the arguments (and not the callee operand).
for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),
OE = CS.arg_end(); OI != OE; ++OI) {
const Value *Op = *OI;
- if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+ if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
return true;
}
return false;
@@ -1936,14 +1943,14 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand());
// If we can't tell what the underlying object was, assume there is a
// dependence.
- return IsPotentialUse(Op) && PA.related(Op, Ptr);
+ return IsPotentialUse(Op, *PA.getAA()) && PA.related(Op, Ptr);
}
// Check each operand for a match.
for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
OI != OE; ++OI) {
const Value *Op = *OI;
- if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+ if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
return true;
}
return false;
@@ -2612,11 +2619,11 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release);
S.RRI.ReleaseMetadata = ReleaseMetadata;
- S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented();
+ S.RRI.KnownSafe = S.IsKnownIncremented();
S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
S.RRI.Calls.insert(Inst);
- S.IncrementNestCount();
+ S.SetKnownPositiveRefCount();
break;
}
case IC_RetainBlock:
@@ -2631,7 +2638,6 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
PtrState &S = MyStates.getPtrBottomUpState(Arg);
S.SetKnownPositiveRefCount();
- S.DecrementNestCount();
switch (S.GetSeq()) {
case S_Stop:
@@ -2747,8 +2753,9 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
// Merge the states from each successor to compute the initial state
// for the current block.
- for (BBState::edge_iterator SI(MyStates.succ_begin()),
- SE(MyStates.succ_end()); SI != SE; ++SI) {
+ BBState::edge_iterator SI(MyStates.succ_begin()),
+ SE(MyStates.succ_end());
+ if (SI != SE) {
const BasicBlock *Succ = *SI;
DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
assert(I != BBStates.end());
@@ -2760,7 +2767,6 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
assert(I != BBStates.end());
MyStates.MergeSucc(I->second);
}
- break;
}
// Visit all the instructions, bottom-up.
@@ -2823,12 +2829,11 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
S.ResetSequenceProgress(S_Retain);
S.RRI.IsRetainBlock = Class == IC_RetainBlock;
- // Don't check S.IsKnownIncremented() here because it's not sufficient.
- S.RRI.KnownSafe = S.IsKnownNested();
+ S.RRI.KnownSafe = S.IsKnownIncremented();
S.RRI.Calls.insert(Inst);
}
- S.IncrementNestCount();
+ S.SetKnownPositiveRefCount();
// A retain can be a potential use; procede to the generic checking
// code below.
@@ -2838,7 +2843,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
Arg = GetObjCArg(Inst);
PtrState &S = MyStates.getPtrTopDownState(Arg);
- S.DecrementNestCount();
+ S.ClearRefCount();
switch (S.GetSeq()) {
case S_Retain:
@@ -2935,8 +2940,9 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
// Merge the states from each predecessor to compute the initial state
// for the current block.
- for (BBState::edge_iterator PI(MyStates.pred_begin()),
- PE(MyStates.pred_end()); PI != PE; ++PI) {
+ BBState::edge_iterator PI(MyStates.pred_begin()),
+ PE(MyStates.pred_end());
+ if (PI != PE) {
const BasicBlock *Pred = *PI;
DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
assert(I != BBStates.end());
@@ -2948,7 +2954,6 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
assert(I != BBStates.end());
MyStates.MergePred(I->second);
}
- break;
}
// Visit all the instructions, top-down.
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index d13e4ab..6d27db1 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -59,9 +59,9 @@ FunctionPass *llvm::createCFGSimplificationPass() {
return new CFGSimplifyPass();
}
-/// ChangeToUnreachable - Insert an unreachable instruction before the specified
+/// changeToUnreachable - Insert an unreachable instruction before the specified
/// instruction, making it and the rest of the code in the block dead.
-static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
BasicBlock *BB = I->getParent();
// Loop over all of the successors, removing BB's entry from any PHI
// nodes.
@@ -87,8 +87,8 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
}
}
-/// ChangeToCall - Convert the specified invoke into a normal call.
-static void ChangeToCall(InvokeInst *II) {
+/// changeToCall - Convert the specified invoke into a normal call.
+static void changeToCall(InvokeInst *II) {
SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
NewCall->takeName(II);
@@ -105,7 +105,7 @@ static void ChangeToCall(InvokeInst *II) {
II->eraseFromParent();
}
-static bool MarkAliveBlocks(BasicBlock *BB,
+static bool markAliveBlocks(BasicBlock *BB,
SmallPtrSet<BasicBlock*, 128> &Reachable) {
SmallVector<BasicBlock*, 128> Worklist;
@@ -129,7 +129,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
++BBI;
if (!isa<UnreachableInst>(BBI)) {
// Don't insert a call to llvm.trap right before the unreachable.
- ChangeToUnreachable(BBI, false);
+ changeToUnreachable(BBI, false);
Changed = true;
}
break;
@@ -148,7 +148,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
if (isa<UndefValue>(Ptr) ||
(isa<ConstantPointerNull>(Ptr) &&
SI->getPointerAddressSpace() == 0)) {
- ChangeToUnreachable(SI, true);
+ changeToUnreachable(SI, true);
Changed = true;
break;
}
@@ -159,7 +159,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
Value *Callee = II->getCalledValue();
if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
- ChangeToUnreachable(II, true);
+ changeToUnreachable(II, true);
Changed = true;
} else if (II->doesNotThrow()) {
if (II->use_empty() && II->onlyReadsMemory()) {
@@ -168,7 +168,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
II->getUnwindDest()->removePredecessor(II->getParent());
II->eraseFromParent();
} else
- ChangeToCall(II);
+ changeToCall(II);
Changed = true;
}
}
@@ -180,12 +180,12 @@ static bool MarkAliveBlocks(BasicBlock *BB,
return Changed;
}
-/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even
+/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
/// if they are in a dead cycle. Return true if a change was made, false
/// otherwise.
-static bool RemoveUnreachableBlocksFromFn(Function &F) {
+static bool removeUnreachableBlocksFromFn(Function &F) {
SmallPtrSet<BasicBlock*, 128> Reachable;
- bool Changed = MarkAliveBlocks(F.begin(), Reachable);
+ bool Changed = markAliveBlocks(F.begin(), Reachable);
// If there are unreachable blocks in the CFG...
if (Reachable.size() == F.size())
@@ -215,9 +215,9 @@ static bool RemoveUnreachableBlocksFromFn(Function &F) {
return true;
}
-/// MergeEmptyReturnBlocks - If we have more than one empty (other than phi
+/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi
/// node) return blocks, merge them together to promote recursive block merging.
-static bool MergeEmptyReturnBlocks(Function &F) {
+static bool mergeEmptyReturnBlocks(Function &F) {
bool Changed = false;
BasicBlock *RetBlock = 0;
@@ -291,9 +291,9 @@ static bool MergeEmptyReturnBlocks(Function &F) {
return Changed;
}
-/// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function,
+/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function,
/// iterating until no more changes are made.
-static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
+static bool iterativelySimplifyCFG(Function &F, const TargetData *TD) {
bool Changed = false;
bool LocalChange = true;
while (LocalChange) {
@@ -317,24 +317,24 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
//
bool CFGSimplifyPass::runOnFunction(Function &F) {
const TargetData *TD = getAnalysisIfAvailable<TargetData>();
- bool EverChanged = RemoveUnreachableBlocksFromFn(F);
- EverChanged |= MergeEmptyReturnBlocks(F);
- EverChanged |= IterativeSimplifyCFG(F, TD);
+ bool EverChanged = removeUnreachableBlocksFromFn(F);
+ EverChanged |= mergeEmptyReturnBlocks(F);
+ EverChanged |= iterativelySimplifyCFG(F, TD);
// If neither pass changed anything, we're done.
if (!EverChanged) return false;
- // IterativeSimplifyCFG can (rarely) make some loops dead. If this happens,
- // RemoveUnreachableBlocksFromFn is needed to nuke them, which means we should
+ // iterativelySimplifyCFG can (rarely) make some loops dead. If this happens,
+ // removeUnreachableBlocksFromFn is needed to nuke them, which means we should
// iterate between the two optimizations. We structure the code like this to
- // avoid reruning IterativeSimplifyCFG if the second pass of
- // RemoveUnreachableBlocksFromFn doesn't do anything.
- if (!RemoveUnreachableBlocksFromFn(F))
+ // avoid reruning iterativelySimplifyCFG if the second pass of
+ // removeUnreachableBlocksFromFn doesn't do anything.
+ if (!removeUnreachableBlocksFromFn(F))
return true;
do {
- EverChanged = IterativeSimplifyCFG(F, TD);
- EverChanged |= RemoveUnreachableBlocksFromFn(F);
+ EverChanged = iterativelySimplifyCFG(F, TD);
+ EverChanged |= removeUnreachableBlocksFromFn(F);
} while (EverChanged);
return true;
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 3904419..65311fe 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -28,6 +28,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
@@ -38,6 +39,10 @@ using namespace llvm;
STATISTIC(NumSimplified, "Number of library calls simplified");
STATISTIC(NumAnnotated, "Number of attributes added to library functions");
+static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
+ cl::init(false),
+ cl::desc("Enable unsafe double to float "
+ "shrinking for math lib calls"));
//===----------------------------------------------------------------------===//
// Optimizer Base Class
//===----------------------------------------------------------------------===//
@@ -893,16 +898,56 @@ struct MemSetOpt : public LibCallOptimization {
//===----------------------------------------------------------------------===//
//===---------------------------------------===//
-// 'cos*' Optimizations
+// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
+
+struct UnaryDoubleFPOpt : public LibCallOptimization {
+ bool CheckRetType;
+ UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {}
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
+ !FT->getParamType(0)->isDoubleTy())
+ return 0;
+
+ if (CheckRetType) {
+ // Check if all the uses for function like 'sin' are converted to float.
+ for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end();
+ ++UseI) {
+ FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI);
+ if (Cast == 0 || !Cast->getType()->isFloatTy())
+ return 0;
+ }
+ }
+
+ // If this is something like 'floor((double)floatval)', convert to floorf.
+ FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
+ if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
+ return 0;
+
+ // floor((double)floatval) -> (double)floorf(floatval)
+ Value *V = Cast->getOperand(0);
+ V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
+ return B.CreateFPExt(V, B.getDoubleTy());
+ }
+};
+//===---------------------------------------===//
+// 'cos*' Optimizations
struct CosOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *Ret = NULL;
+ if (UnsafeFPShrink && Callee->getName() == "cos" &&
+ TLI->has(LibFunc::cosf)) {
+ UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+ Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B);
+ }
+
FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 1 argument of FP type, which matches the
// result type.
if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isFloatingPointTy())
- return 0;
+ return Ret;
// cos(-x) -> cos(x)
Value *Op1 = CI->getArgOperand(0);
@@ -910,7 +955,7 @@ struct CosOpt : public LibCallOptimization {
BinaryOperator *BinExpr = cast<BinaryOperator>(Op1);
return B.CreateCall(Callee, BinExpr->getOperand(1), "cos");
}
- return 0;
+ return Ret;
}
};
@@ -919,13 +964,20 @@ struct CosOpt : public LibCallOptimization {
struct PowOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *Ret = NULL;
+ if (UnsafeFPShrink && Callee->getName() == "pow" &&
+ TLI->has(LibFunc::powf)) {
+ UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+ Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B);
+ }
+
FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 2 arguments of the same FP type, which match the
// result type.
if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
!FT->getParamType(0)->isFloatingPointTy())
- return 0;
+ return Ret;
Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
@@ -936,7 +988,7 @@ struct PowOpt : public LibCallOptimization {
}
ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
- if (Op2C == 0) return 0;
+ if (Op2C == 0) return Ret;
if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
return ConstantFP::get(CI->getType(), 1.0);
@@ -974,12 +1026,19 @@ struct PowOpt : public LibCallOptimization {
struct Exp2Opt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *Ret = NULL;
+ if (UnsafeFPShrink && Callee->getName() == "exp2" &&
+ TLI->has(LibFunc::exp2)) {
+ UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+ Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B);
+ }
+
FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 1 argument of FP type, which matches the
// result type.
if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isFloatingPointTy())
- return 0;
+ return Ret;
Value *Op = CI->getArgOperand(0);
// Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
@@ -1016,29 +1075,7 @@ struct Exp2Opt : public LibCallOptimization {
return CI;
}
- return 0;
- }
-};
-
-//===---------------------------------------===//
-// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
-
-struct UnaryDoubleFPOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
- !FT->getParamType(0)->isDoubleTy())
- return 0;
-
- // If this is something like 'floor((double)floatval)', convert to floorf.
- FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
- if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
- return 0;
-
- // floor((double)floatval) -> (double)floorf(floatval)
- Value *V = Cast->getOperand(0);
- V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
- return B.CreateFPExt(V, B.getDoubleTy());
+ return Ret;
}
};
@@ -1534,7 +1571,8 @@ namespace {
StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr;
MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet;
// Math Library Optimizations
- CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP;
+ CosOpt Cos; PowOpt Pow; Exp2Opt Exp2;
+ UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP;
// Integer Optimizations
FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii;
ToAsciiOpt ToAscii;
@@ -1547,10 +1585,13 @@ namespace {
public:
static char ID; // Pass identification
SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true),
- StpCpy(false), StpCpyChk(true) {
+ StpCpy(false), StpCpyChk(true),
+ UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true) {
initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
}
void AddOpt(LibFunc::Func F, LibCallOptimization* Opt);
+ void AddOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt);
+
void InitOptimizations();
bool runOnFunction(Function &F);
@@ -1586,6 +1627,12 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F, LibCallOptimization* Opt) {
Optimizations[TLI->getName(F)] = Opt;
}
+void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2,
+ LibCallOptimization* Opt) {
+ if (TLI->has(F1) && TLI->has(F2))
+ Optimizations[TLI->getName(F1)] = Opt;
+}
+
/// Optimizations - Populate the Optimizations map with all the optimizations
/// we know.
void SimplifyLibCalls::InitOptimizations() {
@@ -1641,20 +1688,37 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["llvm.exp2.f64"] = &Exp2;
Optimizations["llvm.exp2.f32"] = &Exp2;
- if (TLI->has(LibFunc::fabs) && TLI->has(LibFunc::fabsf))
- Optimizations["fabs"] = &UnaryDoubleFP;
- if (TLI->has(LibFunc::floor) && TLI->has(LibFunc::floorf))
- Optimizations["floor"] = &UnaryDoubleFP;
- if (TLI->has(LibFunc::ceil) && TLI->has(LibFunc::ceilf))
- Optimizations["ceil"] = &UnaryDoubleFP;
- if (TLI->has(LibFunc::round) && TLI->has(LibFunc::roundf))
- Optimizations["round"] = &UnaryDoubleFP;
- if (TLI->has(LibFunc::rint) && TLI->has(LibFunc::rintf))
- Optimizations["rint"] = &UnaryDoubleFP;
- if (TLI->has(LibFunc::nearbyint) && TLI->has(LibFunc::nearbyintf))
- Optimizations["nearbyint"] = &UnaryDoubleFP;
- if (TLI->has(LibFunc::trunc) && TLI->has(LibFunc::truncf))
- Optimizations["trunc"] = &UnaryDoubleFP;
+ AddOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP);
+ AddOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP);
+ AddOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP);
+ AddOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP);
+ AddOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP);
+ AddOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP);
+ AddOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP);
+
+ if(UnsafeFPShrink) {
+ AddOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP);
+ AddOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP);
+ }
// Integer Optimizations
Optimizations["ffs"] = &FFS;