diff options
| author | Stephen Hines <srhines@google.com> | 2012-09-13 19:10:35 -0700 |
|---|---|---|
| committer | Android Git Automerger <android-git-automerger@android.com> | 2012-09-13 19:10:35 -0700 |
| commit | 8f1c32e4f21c4e297f5690acf958a842384ba802 (patch) | |
| tree | 52800183ec2d22164b8f396842142c3a8aab912a /lib/Transforms/Scalar | |
| parent | 828ded66831c0caaeecd2291a6bfb084f373d0e4 (diff) | |
| parent | 1c4ad5ef4fab105f0c8af7edd026e00502fb6279 (diff) | |
| download | external_llvm-8f1c32e4f21c4e297f5690acf958a842384ba802.zip external_llvm-8f1c32e4f21c4e297f5690acf958a842384ba802.tar.gz external_llvm-8f1c32e4f21c4e297f5690acf958a842384ba802.tar.bz2 | |
am 1c4ad5ef: Merge branch \'upstream\' into merge-2012_09_10
* commit '1c4ad5ef4fab105f0c8af7edd026e00502fb6279': (446 commits)
Revert r163556. Missed updates to tablegen files.
Update function names to conform to guidelines. No functional change intended.
test/CodeGen/X86/ms-inline-asm.ll: Relax for non-darwin x86 targets. '##InlineAsm' could not be seen in other hosts.
[ms-inline asm] Properly emit the asm directives when the AsmPrinterVariant and InlineAsmVariant don't match.
Update test case for Release builds.
Remove redundant semicolons which are null statements.
Disable stack coloring because it makes dragonegg fail bootstrapping.
[ms-inline asm] Pass the correct AsmVariant to the PrintAsmOperand() function and update the printOperand() function accordingly.
[ms-inline asm] Add support for .att_syntax directive.
Enable stack coloring.
Don't attempt to use flags from predicated instructions.
[Object] Extract Elf_Ehdr. Patch by Hemant Kulkarni!
Stack Coloring: Handle the case where END markers come before BEGIN markers properly.
Enhance PR11334 fix to support extload from v2f32/v4f32
Add "blocked" heuristic to the Hexagon MI scheduler.
Fold multiply by 0 or 1 when in UnsafeFPMath mode in SelectionDAG::getNode().
whitespace
Add boolean simplification support from CMOV
Fix an assertion failure when optimising a shufflevector incorrectly into concat_vectors, and a followup bug with SelectionDAG::getNode() creating nodes with invalid types.
Minor cleanup. No functional change.
...
Diffstat (limited to 'lib/Transforms/Scalar')
| -rw-r--r-- | lib/Transforms/Scalar/CodeGenPrepare.cpp | 46 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/DCE.cpp | 8 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/DeadStoreElimination.cpp | 36 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/EarlyCSE.cpp | 2 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/GVN.cpp | 26 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/IndVarSimplify.cpp | 17 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/JumpThreading.cpp | 2 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/LICM.cpp | 39 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 24 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/LoopInstSimplify.cpp | 2 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/LoopRotation.cpp | 65 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 14 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/ObjCARC.cpp | 91 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/SimplifyCFGPass.cpp | 52 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/SimplifyLibCalls.cpp | 154 |
15 files changed, 387 insertions, 191 deletions
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index a8deda8..5912107 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -43,6 +43,7 @@ #include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/BypassSlowDivision.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -148,7 +149,19 @@ bool CodeGenPrepare::runOnFunction(Function &F) { PFI = getAnalysisIfAvailable<ProfileInfo>(); OptSize = F.hasFnAttr(Attribute::OptimizeForSize); - // First pass, eliminate blocks that contain only PHI nodes and an + /// This optimization identifies DIV instructions that can be + /// profitably bypassed and carried out with a shorter, faster divide. + if (TLI && TLI->isSlowDivBypassed()) { + const DenseMap<Type *, Type *> &BypassTypeMap = TLI->getBypassSlowDivTypes(); + + for (Function::iterator I = F.begin(); I != F.end(); I++) { + EverMadeChange |= bypassSlowDivision(F, + I, + BypassTypeMap); + } + } + + // Eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); @@ -988,7 +1001,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, WeakVH IterHandle(CurInstIterator); BasicBlock *BB = CurInstIterator->getParent(); - RecursivelyDeleteTriviallyDeadInstructions(Repl); + RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); if (IterHandle != CurInstIterator) { // If the iterator instruction was recursively deleted, start over at the @@ -1174,17 +1187,32 @@ static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { } +/// If we have a SelectInst that will likely profit from branch prediction, +/// turn it into a branch. bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { - // If we have a SelectInst that will likely profit from branch prediction, - // turn it into a branch. - if (DisableSelectToBranch || OptSize || !TLI || - !TLI->isPredictableSelectExpensive()) - return false; + bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); - if (!SI->getCondition()->getType()->isIntegerTy(1) || - !isFormingBranchFromSelectProfitable(SI)) + // Can we convert the 'select' to CF ? + if (DisableSelectToBranch || OptSize || !TLI || VectorCond) return false; + TargetLowering::SelectSupportKind SelectKind; + if (VectorCond) + SelectKind = TargetLowering::VectorMaskSelect; + else if (SI->getType()->isVectorTy()) + SelectKind = TargetLowering::ScalarCondVectorVal; + else + SelectKind = TargetLowering::ScalarValSelect; + + // Do we have efficient codegen support for this kind of 'selects' ? + if (TLI->isSelectSupported(SelectKind)) { + // We have efficient codegen support for the select instruction. + // Check if it is profitable to keep this 'select'. + if (!TLI->isPredictableSelectExpensive() || + !isFormingBranchFromSelectProfitable(SI)) + return false; + } + ModifiedDT = true; // First, we split the block containing the select into 2 blocks. diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index 8dbcc23..086f0a1 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -22,6 +22,7 @@ #include "llvm/Instruction.h" #include "llvm/Pass.h" #include "llvm/Support/InstIterator.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -38,10 +39,11 @@ namespace { initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry()); } virtual bool runOnBasicBlock(BasicBlock &BB) { + TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); bool Changed = false; for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) { Instruction *Inst = DI++; - if (isInstructionTriviallyDead(Inst)) { + if (isInstructionTriviallyDead(Inst, TLI)) { Inst->eraseFromParent(); Changed = true; ++DIEEliminated; @@ -87,6 +89,8 @@ char DCE::ID = 0; INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false) bool DCE::runOnFunction(Function &F) { + TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); + // Start out with all of the instructions in the worklist... std::vector<Instruction*> WorkList; for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) @@ -101,7 +105,7 @@ bool DCE::runOnFunction(Function &F) { Instruction *I = WorkList.back(); WorkList.pop_back(); - if (isInstructionTriviallyDead(I)) { // If the instruction is dead. + if (isInstructionTriviallyDead(I, TLI)) { // If the instruction is dead. // Loop over all of the values that the instruction uses, if there are // instructions being used, add them to the worklist, because they might // go dead after this one is removed. diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 8b1283f..1ff4329 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -106,6 +106,7 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } /// static void DeleteDeadInstruction(Instruction *I, MemoryDependenceAnalysis &MD, + const TargetLibraryInfo *TLI, SmallSetVector<Value*, 16> *ValueSet = 0) { SmallVector<Instruction*, 32> NowDeadInsts; @@ -130,7 +131,7 @@ static void DeleteDeadInstruction(Instruction *I, if (!Op->use_empty()) continue; if (Instruction *OpI = dyn_cast<Instruction>(Op)) - if (isInstructionTriviallyDead(OpI)) + if (isInstructionTriviallyDead(OpI, TLI)) NowDeadInsts.push_back(OpI); } @@ -276,7 +277,7 @@ static Value *getStoredPointerOperand(Instruction *I) { static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) { uint64_t Size; - if (getObjectSize(V, Size, AA.getTargetData())) + if (getObjectSize(V, Size, AA.getTargetData(), AA.getTargetLibraryInfo())) return Size; return AliasAnalysis::UnknownSize; } @@ -454,7 +455,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { Instruction *Inst = BBI++; // Handle 'free' calls specially. - if (CallInst *F = isFreeCall(Inst)) { + if (CallInst *F = isFreeCall(Inst, AA->getTargetLibraryInfo())) { MadeChange |= HandleFree(F); continue; } @@ -483,7 +484,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // in case we need it. WeakVH NextInst(BBI); - DeleteDeadInstruction(SI, *MD); + DeleteDeadInstruction(SI, *MD, AA->getTargetLibraryInfo()); if (NextInst == 0) // Next instruction deleted. BBI = BB.begin(); @@ -530,7 +531,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { << *DepWrite << "\n KILLER: " << *Inst << '\n'); // Delete the store and now-dead instructions that feed it. - DeleteDeadInstruction(DepWrite, *MD); + DeleteDeadInstruction(DepWrite, *MD, AA->getTargetLibraryInfo()); ++NumFastStores; MadeChange = true; @@ -640,7 +641,7 @@ bool DSE::HandleFree(CallInst *F) { Instruction *Next = llvm::next(BasicBlock::iterator(Dependency)); // DCE instructions only used to calculate that store - DeleteDeadInstruction(Dependency, *MD); + DeleteDeadInstruction(Dependency, *MD, AA->getTargetLibraryInfo()); ++NumFastStores; MadeChange = true; @@ -680,7 +681,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Okay, so these are dead heap objects, but if the pointer never escapes // then it's leaked by this function anyways. - else if (isAllocLikeFn(I) && !PointerMayBeCaptured(I, true, true)) + else if (isAllocLikeFn(I, AA->getTargetLibraryInfo()) && + !PointerMayBeCaptured(I, true, true)) DeadStackObjects.insert(I); } @@ -724,7 +726,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) { dbgs() << '\n'); // DCE instructions only used to calculate that store. - DeleteDeadInstruction(Dead, *MD, &DeadStackObjects); + DeleteDeadInstruction(Dead, *MD, AA->getTargetLibraryInfo(), + &DeadStackObjects); ++NumFastStores; MadeChange = true; continue; @@ -732,9 +735,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) { } // Remove any dead non-memory-mutating instructions. - if (isInstructionTriviallyDead(BBI)) { + if (isInstructionTriviallyDead(BBI, AA->getTargetLibraryInfo())) { Instruction *Inst = BBI++; - DeleteDeadInstruction(Inst, *MD, &DeadStackObjects); + DeleteDeadInstruction(Inst, *MD, AA->getTargetLibraryInfo(), + &DeadStackObjects); ++NumFastOther; MadeChange = true; continue; @@ -750,7 +754,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (CallSite CS = cast<Value>(BBI)) { // Remove allocation function calls from the list of dead stack objects; // there can't be any references before the definition. - if (isAllocLikeFn(BBI)) + if (isAllocLikeFn(BBI, AA->getTargetLibraryInfo())) DeadStackObjects.remove(BBI); // If this call does not access memory, it can't be loading any of our @@ -771,15 +775,15 @@ bool DSE::handleEndBlock(BasicBlock &BB) { LiveAllocas.push_back(*I); } - for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(), - E = LiveAllocas.end(); I != E; ++I) - DeadStackObjects.remove(*I); - // If all of the allocas were clobbered by the call then we're not going // to find anything else to process. - if (DeadStackObjects.empty()) + if (DeadStackObjects.size() == LiveAllocas.size()) break; + for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(), + E = LiveAllocas.end(); I != E; ++I) + DeadStackObjects.remove(*I); + continue; } diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 9759549..2627113 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -374,7 +374,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { Instruction *Inst = I++; // Dead instructions should just be removed. - if (isInstructionTriviallyDead(Inst)) { + if (isInstructionTriviallyDead(Inst, TLI)) { DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n'); Inst->eraseFromParent(); Changed = true; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 4822fd0..16ae6ad 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -271,16 +271,16 @@ void ValueTable::add(Value *V, uint32_t num) { valueNumbering.insert(std::make_pair(V, num)); } -uint32_t ValueTable::lookup_or_add_call(CallInst* C) { +uint32_t ValueTable::lookup_or_add_call(CallInst *C) { if (AA->doesNotAccessMemory(C)) { Expression exp = create_expression(C); - uint32_t& e = expressionNumbering[exp]; + uint32_t &e = expressionNumbering[exp]; if (!e) e = nextValueNumber++; valueNumbering[C] = e; return e; } else if (AA->onlyReadsMemory(C)) { Expression exp = create_expression(C); - uint32_t& e = expressionNumbering[exp]; + uint32_t &e = expressionNumbering[exp]; if (!e) { e = nextValueNumber++; valueNumbering[C] = e; @@ -413,7 +413,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) { case Instruction::LShr: case Instruction::AShr: case Instruction::And: - case Instruction::Or : + case Instruction::Or: case Instruction::Xor: case Instruction::ICmp: case Instruction::FCmp: @@ -632,6 +632,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) +#ifndef NDEBUG void GVN::dump(DenseMap<uint32_t, Value*>& d) { errs() << "{\n"; for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), @@ -641,6 +642,7 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) { } errs() << "}\n"; } +#endif /// IsValueFullyAvailableInBlock - Return true if we can prove that the value /// we're analyzing is fully available in the specified block. As we go, keep @@ -1436,7 +1438,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { Instruction *DepInst = DepInfo.getInst(); // Loading the allocation -> undef. - if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst) || + if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) || // Loading immediately after lifetime begin -> undef. isLifetimeStart(DepInst)) { ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, @@ -1951,7 +1953,7 @@ bool GVN::processLoad(LoadInst *L) { // If this load really doesn't depend on anything, then we must be loading an // undef value. This can happen when loading for a fresh allocation with no // intervening stores, for example. - if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst)) { + if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI)) { L->replaceAllUsesWith(UndefValue::get(L->getType())); markInstructionForDeletion(L); ++NumGVNLoad; @@ -2231,12 +2233,20 @@ bool GVN::processInstruction(Instruction *I) { Value *SwitchCond = SI->getCondition(); BasicBlock *Parent = SI->getParent(); bool Changed = false; + + // Remember how many outgoing edges there are to every successor. + SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges; + for (unsigned i = 0, n = SI->getNumSuccessors(); i != n; ++i) + ++SwitchEdges[SI->getSuccessor(i)]; + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { BasicBlock *Dst = i.getCaseSuccessor(); - BasicBlockEdge E(Parent, Dst); - if (E.isSingleEdge()) + // If there is only a single edge, propagate the case value into it. + if (SwitchEdges.lookup(Dst) == 1) { + BasicBlockEdge E(Parent, Dst); Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E); + } } return Changed; } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 37f8bdf..c933a17 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -44,6 +44,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -68,6 +69,7 @@ namespace { ScalarEvolution *SE; DominatorTree *DT; TargetData *TD; + TargetLibraryInfo *TLI; SmallVector<WeakVH, 16> DeadInsts; bool Changed; @@ -414,11 +416,11 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // new comparison. NewCompare->takeName(Compare); Compare->replaceAllUsesWith(NewCompare); - RecursivelyDeleteTriviallyDeadInstructions(Compare); + RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI); // Delete the old floating point increment. Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); - RecursivelyDeleteTriviallyDeadInstructions(Incr); + RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI); // If the FP induction variable still has uses, this is because something else // in the loop uses its value. In order to canonicalize the induction @@ -431,7 +433,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", PN->getParent()->getFirstInsertionPt()); PN->replaceAllUsesWith(Conv); - RecursivelyDeleteTriviallyDeadInstructions(PN); + RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); } Changed = true; } @@ -550,14 +552,14 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { PN->setIncomingValue(i, ExitVal); // If this instruction is dead now, delete it. - RecursivelyDeleteTriviallyDeadInstructions(Inst); + RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); if (NumPreds == 1) { // Completely replace a single-pred PHI. This is safe, because the // NewVal won't be variant in the loop, so we don't need an LCSSA phi // node anymore. PN->replaceAllUsesWith(ExitVal); - RecursivelyDeleteTriviallyDeadInstructions(PN); + RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); } } if (NumPreds != 1) { @@ -1697,6 +1699,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTree>(); TD = getAnalysisIfAvailable<TargetData>(); + TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); DeadInsts.clear(); Changed = false; @@ -1763,7 +1766,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { while (!DeadInsts.empty()) if (Instruction *Inst = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); + RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); // The Rewriter may not be used from this point on. @@ -1772,7 +1775,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { SinkUnusedInvariants(L); // Clean up dead instructions. - Changed |= DeleteDeadPHIs(L->getHeader()); + Changed |= DeleteDeadPHIs(L->getHeader(), TLI); // Check a post-condition. assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!"); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index dd42c59..20844c6 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -1455,7 +1455,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // At this point, the IR is fully up to date and consistent. Do a quick scan // over the new instructions and zap any that are constants or dead. This // frequently happens because of phi translation. - SimplifyInstructionsInBlock(NewBB, TD); + SimplifyInstructionsInBlock(NewBB, TD, TLI); // Threaded an edge! ++NumThreads; diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 0192e92..99bedce 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -108,6 +108,9 @@ namespace { BasicBlock *Preheader; // The preheader block of the current loop... Loop *CurLoop; // The current loop we are working on... AliasSetTracker *CurAST; // AliasSet information for the current loop... + bool MayThrow; // The current loop contains an instruction which + // may throw, thus preventing code motion of + // instructions with side effects. DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap; /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. @@ -240,6 +243,15 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { CurAST->add(*BB); // Incorporate the specified basic block } + MayThrow = false; + // TODO: We've already searched for instructions which may throw in subloops. + // We may want to reuse this information. + for (Loop::block_iterator BB = L->block_begin(), BBE = L->block_end(); + (BB != BBE) && !MayThrow ; ++BB) + for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); + (I != E) && !MayThrow; ++I) + MayThrow |= I->mayThrow(); + // We want to visit all of the instructions in this loop... that are not parts // of our subloops (they have already had their invariants hoisted out of // their loop, into this loop, so there is no need to process the BODIES of @@ -307,7 +319,7 @@ void LICM::SinkRegion(DomTreeNode *N) { // If the instruction is dead, we would try to sink it because it isn't used // in the loop, instead, just delete it. - if (isInstructionTriviallyDead(&I)) { + if (isInstructionTriviallyDead(&I, TLI)) { DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); ++II; CurAST->deleteValue(&I); @@ -418,17 +430,22 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { if (!FoundMod) return true; } - // FIXME: This should use mod/ref information to see if we can hoist or sink - // the call. + // FIXME: This should use mod/ref information to see if we can hoist or + // sink the call. return false; } - // Otherwise these instructions are hoistable/sinkable - return isa<BinaryOperator>(I) || isa<CastInst>(I) || - isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || - isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || - isa<ShuffleVectorInst>(I); + // Only these instructions are hoistable/sinkable. + bool HoistableKind = (isa<BinaryOperator>(I) || isa<CastInst>(I) || + isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || + isa<CmpInst>(I) || isa<InsertElementInst>(I) || + isa<ExtractElementInst>(I) || + isa<ShuffleVectorInst>(I)); + if (!HoistableKind) + return false; + + return isSafeToExecuteUnconditionally(I); } /// isNotUsedInLoop - Return true if the only users of this instruction are @@ -604,6 +621,12 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { } bool LICM::isGuaranteedToExecute(Instruction &Inst) { + + // Somewhere in this loop there is an instruction which may throw and make us + // exit the loop. + if (MayThrow) + return false; + // Otherwise we have to check to make sure that the instruction dominates all // of the exit blocks. If it doesn't, then there is a path out of the loop // which does not execute this instruction, so we can't hoist it. diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index ac1082c..a72e288 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -132,7 +132,8 @@ Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } /// and zero out all the operands of this instruction. If any of them become /// dead, delete them and the computation tree that feeds them. /// -static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { +static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE, + const TargetLibraryInfo *TLI) { SmallVector<Instruction*, 32> NowDeadInsts; NowDeadInsts.push_back(I); @@ -153,7 +154,7 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { if (!Op->use_empty()) continue; if (Instruction *OpI = dyn_cast<Instruction>(Op)) - if (isInstructionTriviallyDead(OpI)) + if (isInstructionTriviallyDead(OpI, TLI)) NowDeadInsts.push_back(OpI); } @@ -164,10 +165,11 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { /// deleteIfDeadInstruction - If the specified value is a dead instruction, /// delete it and any recursively used instructions. -static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) { +static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE, + const TargetLibraryInfo *TLI) { if (Instruction *I = dyn_cast<Instruction>(V)) - if (isInstructionTriviallyDead(I)) - deleteDeadInstruction(I, SE); + if (isInstructionTriviallyDead(I, TLI)) + deleteDeadInstruction(I, SE, TLI); } bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { @@ -490,7 +492,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){ Expander.clear(); // If we generated new code for the base pointer, clean up. - deleteIfDeadInstruction(BasePtr, *SE); + deleteIfDeadInstruction(BasePtr, *SE, TLI); return false; } @@ -538,7 +540,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - deleteDeadInstruction(TheStore, *SE); + deleteDeadInstruction(TheStore, *SE, TLI); ++NumMemSet; return true; } @@ -579,7 +581,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, getAnalysis<AliasAnalysis>(), SI)) { Expander.clear(); // If we generated new code for the base pointer, clean up. - deleteIfDeadInstruction(StoreBasePtr, *SE); + deleteIfDeadInstruction(StoreBasePtr, *SE, TLI); return false; } @@ -594,8 +596,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, StoreSize, getAnalysis<AliasAnalysis>(), SI)) { Expander.clear(); // If we generated new code for the base pointer, clean up. - deleteIfDeadInstruction(LoadBasePtr, *SE); - deleteIfDeadInstruction(StoreBasePtr, *SE); + deleteIfDeadInstruction(LoadBasePtr, *SE, TLI); + deleteIfDeadInstruction(StoreBasePtr, *SE, TLI); return false; } @@ -628,7 +630,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - deleteDeadInstruction(SI, *SE); + deleteDeadInstruction(SI, *SE, TLI); ++NumMemCpy; return true; } diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 982400c..f5daa7b 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -120,7 +120,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { ++NumSimplified; } } - LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I); + LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI); if (IsSubloopHeader && !isa<PHINode>(I)) break; diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 7eeb152..abe07aa 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -24,6 +24,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -256,6 +257,7 @@ bool LoopRotate::rotateLoop(Loop *L) { return false; BasicBlock *OrigHeader = L->getHeader(); + BasicBlock *OrigLatch = L->getLoopLatch(); BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); if (BI == 0 || BI->isUnconditional()) @@ -267,13 +269,9 @@ bool LoopRotate::rotateLoop(Loop *L) { if (!L->isLoopExiting(OrigHeader)) return false; - // Updating PHInodes in loops with multiple exits adds complexity. - // Keep it simple, and restrict loop rotation to loops with one exit only. - // In future, lift this restriction and support for multiple exits if - // required. - SmallVector<BasicBlock*, 8> ExitBlocks; - L->getExitBlocks(ExitBlocks); - if (ExitBlocks.size() > 1) + // If the loop latch already contains a branch that leaves the loop then the + // loop is already rotated. + if (OrigLatch == 0 || L->isLoopExiting(OrigLatch)) return false; // Check size of original header and reject loop if it is very big. @@ -286,11 +284,10 @@ bool LoopRotate::rotateLoop(Loop *L) { // Now, this loop is suitable for rotation. BasicBlock *OrigPreheader = L->getLoopPreheader(); - BasicBlock *OrigLatch = L->getLoopLatch(); // If the loop could not be converted to canonical form, it must have an // indirectbr in it, just give up. - if (OrigPreheader == 0 || OrigLatch == 0) + if (OrigPreheader == 0) return false; // Anything ScalarEvolution may know about this loop or the PHI nodes @@ -298,6 +295,8 @@ bool LoopRotate::rotateLoop(Loop *L) { if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>()) SE->forgetLoop(L); + DEBUG(dbgs() << "LoopRotation: rotating "; L->dump()); + // Find new Loop header. NewHeader is a Header's one and only successor // that is inside loop. Header's other successor is outside the // loop. Otherwise loop is not suitable for rotation. @@ -408,10 +407,19 @@ bool LoopRotate::rotateLoop(Loop *L) { // Update DominatorTree to reflect the CFG change we just made. Then split // edges as necessary to preserve LoopSimplify form. if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) { - // Since OrigPreheader now has the conditional branch to Exit block, it is - // the dominator of Exit. - DT->changeImmediateDominator(Exit, OrigPreheader); - DT->changeImmediateDominator(NewHeader, OrigPreheader); + // Everything that was dominated by the old loop header is now dominated + // by the original loop preheader. Conceptually the header was merged + // into the preheader, even though we reuse the actual block as a new + // loop latch. + DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader); + SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(), + OrigHeaderNode->end()); + DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader); + for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) + DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode); + + assert(DT->getNode(Exit)->getIDom() == OrigPreheaderNode); + assert(DT->getNode(NewHeader)->getIDom() == OrigPreheaderNode); // Update OrigHeader to be dominated by the new header block. DT->changeImmediateDominator(OrigHeader, OrigLatch); @@ -440,6 +448,35 @@ bool LoopRotate::rotateLoop(Loop *L) { // Update OrigHeader to be dominated by the new header block. DT->changeImmediateDominator(NewHeader, OrigPreheader); DT->changeImmediateDominator(OrigHeader, OrigLatch); + + // Brute force incremental dominator tree update. Call + // findNearestCommonDominator on all CFG predecessors of each child of the + // original header. + DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader); + SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(), + OrigHeaderNode->end()); + bool Changed; + do { + Changed = false; + for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) { + DomTreeNode *Node = HeaderChildren[I]; + BasicBlock *BB = Node->getBlock(); + + pred_iterator PI = pred_begin(BB); + BasicBlock *NearestDom = *PI; + for (pred_iterator PE = pred_end(BB); PI != PE; ++PI) + NearestDom = DT->findNearestCommonDominator(NearestDom, *PI); + + // Remember if this changes the DomTree. + if (Node->getIDom()->getBlock() != NearestDom) { + DT->changeImmediateDominator(BB, NearestDom); + Changed = true; + } + } + + // If the dominator changed, this may have an effect on other + // predecessors, continue until we reach a fixpoint. + } while (Changed); } } @@ -452,6 +489,8 @@ bool LoopRotate::rotateLoop(Loop *L) { // emitted code isn't too gross in this common case. MergeBlockIntoPredecessor(OrigHeader, this); + DEBUG(dbgs() << "LoopRotation: into "; L->dump()); + ++NumRotated; return true; } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 0ae7a51..d7495da 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -121,9 +121,11 @@ void RegSortData::print(raw_ostream &OS) const { OS << "[NumUses=" << UsedByIndices.count() << ']'; } +#ifndef NDEBUG void RegSortData::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { @@ -414,9 +416,11 @@ void Formula::print(raw_ostream &OS) const { } } +#ifndef NDEBUG void Formula::dump() const { print(errs()); errs() << '\n'; } +#endif /// isAddRecSExtable - Return true if the given addrec can be sign-extended /// without changing its value. @@ -974,9 +978,11 @@ void Cost::print(raw_ostream &OS) const { OS << ", plus " << SetupCost << " setup cost"; } +#ifndef NDEBUG void Cost::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { @@ -1060,9 +1066,11 @@ void LSRFixup::print(raw_ostream &OS) const { OS << ", Offset=" << Offset; } +#ifndef NDEBUG void LSRFixup::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { @@ -1252,9 +1260,11 @@ void LSRUse::print(raw_ostream &OS) const { OS << ", widest fixup type: " << *WidestFixupType; } +#ifndef NDEBUG void LSRUse::dump() const { print(errs()); errs() << '\n'; } +#endif /// isLegalUse - Test whether the use described by AM is "legal", meaning it can /// be completely folded into the user instruction at isel time. This includes @@ -3436,9 +3446,11 @@ void WorkItem::print(raw_ostream &OS) const { << " , add offset " << Imm; } +#ifndef NDEBUG void WorkItem::dump() const { print(errs()); errs() << '\n'; } +#endif /// GenerateCrossUseConstantOffsets - Look for registers which are a constant /// distance apart and try to form reuse opportunities between them. @@ -4731,9 +4743,11 @@ void LSRInstance::print(raw_ostream &OS) const { print_uses(OS); } +#ifndef NDEBUG void LSRInstance::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 3222f20..dce8e8b 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -1236,16 +1236,19 @@ bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) { // An ObjC-Identified object can't alias a load if it is never locally stored. if (AIsIdentified) { + // Check for an obvious escape. + if (isa<LoadInst>(B)) + return isStoredObjCPointer(A); if (BIsIdentified) { - // If both pointers have provenance, they can be directly compared. - if (A != B) - return false; - } else { - if (isa<LoadInst>(B)) - return isStoredObjCPointer(A); + // Check for an obvious escape. + if (isa<LoadInst>(A)) + return isStoredObjCPointer(B); + // Both pointers are identified and escapes aren't an evident problem. + return false; } - } else { - if (BIsIdentified && isa<LoadInst>(A)) + } else if (BIsIdentified) { + // Check for an obvious escape. + if (isa<LoadInst>(A)) return isStoredObjCPointer(B); } @@ -1381,9 +1384,6 @@ namespace { /// PtrState - This class summarizes several per-pointer runtime properties /// which are propogated through the flow graph. class PtrState { - /// NestCount - The known minimum level of retain+release nesting. - unsigned NestCount; - /// KnownPositiveRefCount - True if the reference count is known to /// be incremented. bool KnownPositiveRefCount; @@ -1401,7 +1401,7 @@ namespace { /// TODO: Encapsulate this better. RRInfo RRI; - PtrState() : NestCount(0), KnownPositiveRefCount(false), Partial(false), + PtrState() : KnownPositiveRefCount(false), Partial(false), Seq(S_None) {} void SetKnownPositiveRefCount() { @@ -1416,18 +1416,6 @@ namespace { return KnownPositiveRefCount; } - void IncrementNestCount() { - if (NestCount != UINT_MAX) ++NestCount; - } - - void DecrementNestCount() { - if (NestCount != 0) --NestCount; - } - - bool IsKnownNested() const { - return NestCount > 0; - } - void SetSeq(Sequence NewSeq) { Seq = NewSeq; } @@ -1454,7 +1442,6 @@ void PtrState::Merge(const PtrState &Other, bool TopDown) { Seq = MergeSeqs(Seq, Other.Seq, TopDown); KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount; - NestCount = std::min(NestCount, Other.NestCount); // We can't merge a plain objc_retain with an objc_retainBlock. if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock) @@ -1868,6 +1855,26 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { return AutoreleaseCallee; } +/// IsPotentialUse - Test whether the given value is possible a +/// reference-counted pointer, including tests which utilize AliasAnalysis. +static bool IsPotentialUse(const Value *Op, AliasAnalysis &AA) { + // First make the rudimentary check. + if (!IsPotentialUse(Op)) + return false; + + // Objects in constant memory are not reference-counted. + if (AA.pointsToConstantMemory(Op)) + return false; + + // Pointers in constant memory are not pointing to reference-counted objects. + if (const LoadInst *LI = dyn_cast<LoadInst>(Op)) + if (AA.pointsToConstantMemory(LI->getPointerOperand())) + return false; + + // Otherwise assume the worst. + return true; +} + /// CanAlterRefCount - Test whether the given instruction can result in a /// reference count modification (positive or negative) for the pointer's /// object. @@ -1894,7 +1901,7 @@ CanAlterRefCount(const Instruction *Inst, const Value *Ptr, for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) { const Value *Op = *I; - if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) return true; } return false; @@ -1919,14 +1926,14 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, // Comparing a pointer with null, or any other constant, isn't really a use, // because we don't care what the pointer points to, or about the values // of any other dynamic reference-counted pointers. - if (!IsPotentialUse(ICI->getOperand(1))) + if (!IsPotentialUse(ICI->getOperand(1), *PA.getAA())) return false; } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) { // For calls, just check the arguments (and not the callee operand). for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(), OE = CS.arg_end(); OI != OE; ++OI) { const Value *Op = *OI; - if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) return true; } return false; @@ -1936,14 +1943,14 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand()); // If we can't tell what the underlying object was, assume there is a // dependence. - return IsPotentialUse(Op) && PA.related(Op, Ptr); + return IsPotentialUse(Op, *PA.getAA()) && PA.related(Op, Ptr); } // Check each operand for a match. for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end(); OI != OE; ++OI) { const Value *Op = *OI; - if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) return true; } return false; @@ -2612,11 +2619,11 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release); S.RRI.ReleaseMetadata = ReleaseMetadata; - S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented(); + S.RRI.KnownSafe = S.IsKnownIncremented(); S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); S.RRI.Calls.insert(Inst); - S.IncrementNestCount(); + S.SetKnownPositiveRefCount(); break; } case IC_RetainBlock: @@ -2631,7 +2638,6 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, PtrState &S = MyStates.getPtrBottomUpState(Arg); S.SetKnownPositiveRefCount(); - S.DecrementNestCount(); switch (S.GetSeq()) { case S_Stop: @@ -2747,8 +2753,9 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, // Merge the states from each successor to compute the initial state // for the current block. - for (BBState::edge_iterator SI(MyStates.succ_begin()), - SE(MyStates.succ_end()); SI != SE; ++SI) { + BBState::edge_iterator SI(MyStates.succ_begin()), + SE(MyStates.succ_end()); + if (SI != SE) { const BasicBlock *Succ = *SI; DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ); assert(I != BBStates.end()); @@ -2760,7 +2767,6 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, assert(I != BBStates.end()); MyStates.MergeSucc(I->second); } - break; } // Visit all the instructions, bottom-up. @@ -2823,12 +2829,11 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, S.ResetSequenceProgress(S_Retain); S.RRI.IsRetainBlock = Class == IC_RetainBlock; - // Don't check S.IsKnownIncremented() here because it's not sufficient. - S.RRI.KnownSafe = S.IsKnownNested(); + S.RRI.KnownSafe = S.IsKnownIncremented(); S.RRI.Calls.insert(Inst); } - S.IncrementNestCount(); + S.SetKnownPositiveRefCount(); // A retain can be a potential use; procede to the generic checking // code below. @@ -2838,7 +2843,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, Arg = GetObjCArg(Inst); PtrState &S = MyStates.getPtrTopDownState(Arg); - S.DecrementNestCount(); + S.ClearRefCount(); switch (S.GetSeq()) { case S_Retain: @@ -2935,8 +2940,9 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, // Merge the states from each predecessor to compute the initial state // for the current block. - for (BBState::edge_iterator PI(MyStates.pred_begin()), - PE(MyStates.pred_end()); PI != PE; ++PI) { + BBState::edge_iterator PI(MyStates.pred_begin()), + PE(MyStates.pred_end()); + if (PI != PE) { const BasicBlock *Pred = *PI; DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); assert(I != BBStates.end()); @@ -2948,7 +2954,6 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, assert(I != BBStates.end()); MyStates.MergePred(I->second); } - break; } // Visit all the instructions, top-down. diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index d13e4ab..6d27db1 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -59,9 +59,9 @@ FunctionPass *llvm::createCFGSimplificationPass() { return new CFGSimplifyPass(); } -/// ChangeToUnreachable - Insert an unreachable instruction before the specified +/// changeToUnreachable - Insert an unreachable instruction before the specified /// instruction, making it and the rest of the code in the block dead. -static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { +static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) { BasicBlock *BB = I->getParent(); // Loop over all of the successors, removing BB's entry from any PHI // nodes. @@ -87,8 +87,8 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { } } -/// ChangeToCall - Convert the specified invoke into a normal call. -static void ChangeToCall(InvokeInst *II) { +/// changeToCall - Convert the specified invoke into a normal call. +static void changeToCall(InvokeInst *II) { SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II); NewCall->takeName(II); @@ -105,7 +105,7 @@ static void ChangeToCall(InvokeInst *II) { II->eraseFromParent(); } -static bool MarkAliveBlocks(BasicBlock *BB, +static bool markAliveBlocks(BasicBlock *BB, SmallPtrSet<BasicBlock*, 128> &Reachable) { SmallVector<BasicBlock*, 128> Worklist; @@ -129,7 +129,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, ++BBI; if (!isa<UnreachableInst>(BBI)) { // Don't insert a call to llvm.trap right before the unreachable. - ChangeToUnreachable(BBI, false); + changeToUnreachable(BBI, false); Changed = true; } break; @@ -148,7 +148,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, if (isa<UndefValue>(Ptr) || (isa<ConstantPointerNull>(Ptr) && SI->getPointerAddressSpace() == 0)) { - ChangeToUnreachable(SI, true); + changeToUnreachable(SI, true); Changed = true; break; } @@ -159,7 +159,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { Value *Callee = II->getCalledValue(); if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { - ChangeToUnreachable(II, true); + changeToUnreachable(II, true); Changed = true; } else if (II->doesNotThrow()) { if (II->use_empty() && II->onlyReadsMemory()) { @@ -168,7 +168,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, II->getUnwindDest()->removePredecessor(II->getParent()); II->eraseFromParent(); } else - ChangeToCall(II); + changeToCall(II); Changed = true; } } @@ -180,12 +180,12 @@ static bool MarkAliveBlocks(BasicBlock *BB, return Changed; } -/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even +/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false /// otherwise. -static bool RemoveUnreachableBlocksFromFn(Function &F) { +static bool removeUnreachableBlocksFromFn(Function &F) { SmallPtrSet<BasicBlock*, 128> Reachable; - bool Changed = MarkAliveBlocks(F.begin(), Reachable); + bool Changed = markAliveBlocks(F.begin(), Reachable); // If there are unreachable blocks in the CFG... if (Reachable.size() == F.size()) @@ -215,9 +215,9 @@ static bool RemoveUnreachableBlocksFromFn(Function &F) { return true; } -/// MergeEmptyReturnBlocks - If we have more than one empty (other than phi +/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi /// node) return blocks, merge them together to promote recursive block merging. -static bool MergeEmptyReturnBlocks(Function &F) { +static bool mergeEmptyReturnBlocks(Function &F) { bool Changed = false; BasicBlock *RetBlock = 0; @@ -291,9 +291,9 @@ static bool MergeEmptyReturnBlocks(Function &F) { return Changed; } -/// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function, +/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. -static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) { +static bool iterativelySimplifyCFG(Function &F, const TargetData *TD) { bool Changed = false; bool LocalChange = true; while (LocalChange) { @@ -317,24 +317,24 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) { // bool CFGSimplifyPass::runOnFunction(Function &F) { const TargetData *TD = getAnalysisIfAvailable<TargetData>(); - bool EverChanged = RemoveUnreachableBlocksFromFn(F); - EverChanged |= MergeEmptyReturnBlocks(F); - EverChanged |= IterativeSimplifyCFG(F, TD); + bool EverChanged = removeUnreachableBlocksFromFn(F); + EverChanged |= mergeEmptyReturnBlocks(F); + EverChanged |= iterativelySimplifyCFG(F, TD); // If neither pass changed anything, we're done. if (!EverChanged) return false; - // IterativeSimplifyCFG can (rarely) make some loops dead. If this happens, - // RemoveUnreachableBlocksFromFn is needed to nuke them, which means we should + // iterativelySimplifyCFG can (rarely) make some loops dead. If this happens, + // removeUnreachableBlocksFromFn is needed to nuke them, which means we should // iterate between the two optimizations. We structure the code like this to - // avoid reruning IterativeSimplifyCFG if the second pass of - // RemoveUnreachableBlocksFromFn doesn't do anything. - if (!RemoveUnreachableBlocksFromFn(F)) + // avoid reruning iterativelySimplifyCFG if the second pass of + // removeUnreachableBlocksFromFn doesn't do anything. + if (!removeUnreachableBlocksFromFn(F)) return true; do { - EverChanged = IterativeSimplifyCFG(F, TD); - EverChanged |= RemoveUnreachableBlocksFromFn(F); + EverChanged = iterativelySimplifyCFG(F, TD); + EverChanged |= removeUnreachableBlocksFromFn(F); } while (EverChanged); return true; diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 3904419..65311fe 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" @@ -38,6 +39,10 @@ using namespace llvm; STATISTIC(NumSimplified, "Number of library calls simplified"); STATISTIC(NumAnnotated, "Number of attributes added to library functions"); +static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden, + cl::init(false), + cl::desc("Enable unsafe double to float " + "shrinking for math lib calls")); //===----------------------------------------------------------------------===// // Optimizer Base Class //===----------------------------------------------------------------------===// @@ -893,16 +898,56 @@ struct MemSetOpt : public LibCallOptimization { //===----------------------------------------------------------------------===// //===---------------------------------------===// -// 'cos*' Optimizations +// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' + +struct UnaryDoubleFPOpt : public LibCallOptimization { + bool CheckRetType; + UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {} + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || + !FT->getParamType(0)->isDoubleTy()) + return 0; + + if (CheckRetType) { + // Check if all the uses for function like 'sin' are converted to float. + for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end(); + ++UseI) { + FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI); + if (Cast == 0 || !Cast->getType()->isFloatTy()) + return 0; + } + } + + // If this is something like 'floor((double)floatval)', convert to floorf. + FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); + if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) + return 0; + + // floor((double)floatval) -> (double)floorf(floatval) + Value *V = Cast->getOperand(0); + V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes()); + return B.CreateFPExt(V, B.getDoubleTy()); + } +}; +//===---------------------------------------===// +// 'cos*' Optimizations struct CosOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "cos" && + TLI->has(LibFunc::cosf)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B); + } + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 1 argument of FP type, which matches the // result type. if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isFloatingPointTy()) - return 0; + return Ret; // cos(-x) -> cos(x) Value *Op1 = CI->getArgOperand(0); @@ -910,7 +955,7 @@ struct CosOpt : public LibCallOptimization { BinaryOperator *BinExpr = cast<BinaryOperator>(Op1); return B.CreateCall(Callee, BinExpr->getOperand(1), "cos"); } - return 0; + return Ret; } }; @@ -919,13 +964,20 @@ struct CosOpt : public LibCallOptimization { struct PowOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "pow" && + TLI->has(LibFunc::powf)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B); + } + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 2 arguments of the same FP type, which match the // result type. if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || !FT->getParamType(0)->isFloatingPointTy()) - return 0; + return Ret; Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { @@ -936,7 +988,7 @@ struct PowOpt : public LibCallOptimization { } ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2); - if (Op2C == 0) return 0; + if (Op2C == 0) return Ret; if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 return ConstantFP::get(CI->getType(), 1.0); @@ -974,12 +1026,19 @@ struct PowOpt : public LibCallOptimization { struct Exp2Opt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "exp2" && + TLI->has(LibFunc::exp2)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B); + } + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 1 argument of FP type, which matches the // result type. if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isFloatingPointTy()) - return 0; + return Ret; Value *Op = CI->getArgOperand(0); // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 @@ -1016,29 +1075,7 @@ struct Exp2Opt : public LibCallOptimization { return CI; } - return 0; - } -}; - -//===---------------------------------------===// -// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' - -struct UnaryDoubleFPOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || - !FT->getParamType(0)->isDoubleTy()) - return 0; - - // If this is something like 'floor((double)floatval)', convert to floorf. - FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); - if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) - return 0; - - // floor((double)floatval) -> (double)floorf(floatval) - Value *V = Cast->getOperand(0); - V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes()); - return B.CreateFPExt(V, B.getDoubleTy()); + return Ret; } }; @@ -1534,7 +1571,8 @@ namespace { StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr; MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet; // Math Library Optimizations - CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP; + CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; + UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP; // Integer Optimizations FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii; ToAsciiOpt ToAscii; @@ -1547,10 +1585,13 @@ namespace { public: static char ID; // Pass identification SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true), - StpCpy(false), StpCpyChk(true) { + StpCpy(false), StpCpyChk(true), + UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true) { initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry()); } void AddOpt(LibFunc::Func F, LibCallOptimization* Opt); + void AddOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt); + void InitOptimizations(); bool runOnFunction(Function &F); @@ -1586,6 +1627,12 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F, LibCallOptimization* Opt) { Optimizations[TLI->getName(F)] = Opt; } +void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2, + LibCallOptimization* Opt) { + if (TLI->has(F1) && TLI->has(F2)) + Optimizations[TLI->getName(F1)] = Opt; +} + /// Optimizations - Populate the Optimizations map with all the optimizations /// we know. void SimplifyLibCalls::InitOptimizations() { @@ -1641,20 +1688,37 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["llvm.exp2.f64"] = &Exp2; Optimizations["llvm.exp2.f32"] = &Exp2; - if (TLI->has(LibFunc::fabs) && TLI->has(LibFunc::fabsf)) - Optimizations["fabs"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::floor) && TLI->has(LibFunc::floorf)) - Optimizations["floor"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::ceil) && TLI->has(LibFunc::ceilf)) - Optimizations["ceil"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::round) && TLI->has(LibFunc::roundf)) - Optimizations["round"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::rint) && TLI->has(LibFunc::rintf)) - Optimizations["rint"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::nearbyint) && TLI->has(LibFunc::nearbyintf)) - Optimizations["nearbyint"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::trunc) && TLI->has(LibFunc::truncf)) - Optimizations["trunc"] = &UnaryDoubleFP; + AddOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP); + AddOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP); + AddOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP); + AddOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP); + AddOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP); + AddOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP); + AddOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP); + + if(UnsafeFPShrink) { + AddOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP); + } // Integer Optimizations Optimizations["ffs"] = &FFS; |
