diff options
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r-- | lib/Transforms/Scalar/CodeGenPrepare.cpp | 46 | ||||
-rw-r--r-- | lib/Transforms/Scalar/DCE.cpp | 8 | ||||
-rw-r--r-- | lib/Transforms/Scalar/DeadStoreElimination.cpp | 36 | ||||
-rw-r--r-- | lib/Transforms/Scalar/EarlyCSE.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/Scalar/GVN.cpp | 26 | ||||
-rw-r--r-- | lib/Transforms/Scalar/IndVarSimplify.cpp | 17 | ||||
-rw-r--r-- | lib/Transforms/Scalar/JumpThreading.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LICM.cpp | 39 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 24 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopInstSimplify.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopRotation.cpp | 65 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 14 | ||||
-rw-r--r-- | lib/Transforms/Scalar/ObjCARC.cpp | 91 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SimplifyCFGPass.cpp | 52 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SimplifyLibCalls.cpp | 154 |
15 files changed, 387 insertions, 191 deletions
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index a8deda8..5912107 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -43,6 +43,7 @@ #include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/BypassSlowDivision.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -148,7 +149,19 @@ bool CodeGenPrepare::runOnFunction(Function &F) { PFI = getAnalysisIfAvailable<ProfileInfo>(); OptSize = F.hasFnAttr(Attribute::OptimizeForSize); - // First pass, eliminate blocks that contain only PHI nodes and an + /// This optimization identifies DIV instructions that can be + /// profitably bypassed and carried out with a shorter, faster divide. + if (TLI && TLI->isSlowDivBypassed()) { + const DenseMap<Type *, Type *> &BypassTypeMap = TLI->getBypassSlowDivTypes(); + + for (Function::iterator I = F.begin(); I != F.end(); I++) { + EverMadeChange |= bypassSlowDivision(F, + I, + BypassTypeMap); + } + } + + // Eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); @@ -988,7 +1001,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, WeakVH IterHandle(CurInstIterator); BasicBlock *BB = CurInstIterator->getParent(); - RecursivelyDeleteTriviallyDeadInstructions(Repl); + RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); if (IterHandle != CurInstIterator) { // If the iterator instruction was recursively deleted, start over at the @@ -1174,17 +1187,32 @@ static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { } +/// If we have a SelectInst that will likely profit from branch prediction, +/// turn it into a branch. bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { - // If we have a SelectInst that will likely profit from branch prediction, - // turn it into a branch. - if (DisableSelectToBranch || OptSize || !TLI || - !TLI->isPredictableSelectExpensive()) - return false; + bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); - if (!SI->getCondition()->getType()->isIntegerTy(1) || - !isFormingBranchFromSelectProfitable(SI)) + // Can we convert the 'select' to CF ? + if (DisableSelectToBranch || OptSize || !TLI || VectorCond) return false; + TargetLowering::SelectSupportKind SelectKind; + if (VectorCond) + SelectKind = TargetLowering::VectorMaskSelect; + else if (SI->getType()->isVectorTy()) + SelectKind = TargetLowering::ScalarCondVectorVal; + else + SelectKind = TargetLowering::ScalarValSelect; + + // Do we have efficient codegen support for this kind of 'selects' ? + if (TLI->isSelectSupported(SelectKind)) { + // We have efficient codegen support for the select instruction. + // Check if it is profitable to keep this 'select'. + if (!TLI->isPredictableSelectExpensive() || + !isFormingBranchFromSelectProfitable(SI)) + return false; + } + ModifiedDT = true; // First, we split the block containing the select into 2 blocks. diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index 8dbcc23..086f0a1 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -22,6 +22,7 @@ #include "llvm/Instruction.h" #include "llvm/Pass.h" #include "llvm/Support/InstIterator.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -38,10 +39,11 @@ namespace { initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry()); } virtual bool runOnBasicBlock(BasicBlock &BB) { + TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); bool Changed = false; for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) { Instruction *Inst = DI++; - if (isInstructionTriviallyDead(Inst)) { + if (isInstructionTriviallyDead(Inst, TLI)) { Inst->eraseFromParent(); Changed = true; ++DIEEliminated; @@ -87,6 +89,8 @@ char DCE::ID = 0; INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false) bool DCE::runOnFunction(Function &F) { + TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); + // Start out with all of the instructions in the worklist... std::vector<Instruction*> WorkList; for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) @@ -101,7 +105,7 @@ bool DCE::runOnFunction(Function &F) { Instruction *I = WorkList.back(); WorkList.pop_back(); - if (isInstructionTriviallyDead(I)) { // If the instruction is dead. + if (isInstructionTriviallyDead(I, TLI)) { // If the instruction is dead. // Loop over all of the values that the instruction uses, if there are // instructions being used, add them to the worklist, because they might // go dead after this one is removed. diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 8b1283f..1ff4329 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -106,6 +106,7 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } /// static void DeleteDeadInstruction(Instruction *I, MemoryDependenceAnalysis &MD, + const TargetLibraryInfo *TLI, SmallSetVector<Value*, 16> *ValueSet = 0) { SmallVector<Instruction*, 32> NowDeadInsts; @@ -130,7 +131,7 @@ static void DeleteDeadInstruction(Instruction *I, if (!Op->use_empty()) continue; if (Instruction *OpI = dyn_cast<Instruction>(Op)) - if (isInstructionTriviallyDead(OpI)) + if (isInstructionTriviallyDead(OpI, TLI)) NowDeadInsts.push_back(OpI); } @@ -276,7 +277,7 @@ static Value *getStoredPointerOperand(Instruction *I) { static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) { uint64_t Size; - if (getObjectSize(V, Size, AA.getTargetData())) + if (getObjectSize(V, Size, AA.getTargetData(), AA.getTargetLibraryInfo())) return Size; return AliasAnalysis::UnknownSize; } @@ -454,7 +455,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { Instruction *Inst = BBI++; // Handle 'free' calls specially. - if (CallInst *F = isFreeCall(Inst)) { + if (CallInst *F = isFreeCall(Inst, AA->getTargetLibraryInfo())) { MadeChange |= HandleFree(F); continue; } @@ -483,7 +484,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // in case we need it. WeakVH NextInst(BBI); - DeleteDeadInstruction(SI, *MD); + DeleteDeadInstruction(SI, *MD, AA->getTargetLibraryInfo()); if (NextInst == 0) // Next instruction deleted. BBI = BB.begin(); @@ -530,7 +531,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { << *DepWrite << "\n KILLER: " << *Inst << '\n'); // Delete the store and now-dead instructions that feed it. - DeleteDeadInstruction(DepWrite, *MD); + DeleteDeadInstruction(DepWrite, *MD, AA->getTargetLibraryInfo()); ++NumFastStores; MadeChange = true; @@ -640,7 +641,7 @@ bool DSE::HandleFree(CallInst *F) { Instruction *Next = llvm::next(BasicBlock::iterator(Dependency)); // DCE instructions only used to calculate that store - DeleteDeadInstruction(Dependency, *MD); + DeleteDeadInstruction(Dependency, *MD, AA->getTargetLibraryInfo()); ++NumFastStores; MadeChange = true; @@ -680,7 +681,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Okay, so these are dead heap objects, but if the pointer never escapes // then it's leaked by this function anyways. - else if (isAllocLikeFn(I) && !PointerMayBeCaptured(I, true, true)) + else if (isAllocLikeFn(I, AA->getTargetLibraryInfo()) && + !PointerMayBeCaptured(I, true, true)) DeadStackObjects.insert(I); } @@ -724,7 +726,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) { dbgs() << '\n'); // DCE instructions only used to calculate that store. - DeleteDeadInstruction(Dead, *MD, &DeadStackObjects); + DeleteDeadInstruction(Dead, *MD, AA->getTargetLibraryInfo(), + &DeadStackObjects); ++NumFastStores; MadeChange = true; continue; @@ -732,9 +735,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) { } // Remove any dead non-memory-mutating instructions. - if (isInstructionTriviallyDead(BBI)) { + if (isInstructionTriviallyDead(BBI, AA->getTargetLibraryInfo())) { Instruction *Inst = BBI++; - DeleteDeadInstruction(Inst, *MD, &DeadStackObjects); + DeleteDeadInstruction(Inst, *MD, AA->getTargetLibraryInfo(), + &DeadStackObjects); ++NumFastOther; MadeChange = true; continue; @@ -750,7 +754,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (CallSite CS = cast<Value>(BBI)) { // Remove allocation function calls from the list of dead stack objects; // there can't be any references before the definition. - if (isAllocLikeFn(BBI)) + if (isAllocLikeFn(BBI, AA->getTargetLibraryInfo())) DeadStackObjects.remove(BBI); // If this call does not access memory, it can't be loading any of our @@ -771,15 +775,15 @@ bool DSE::handleEndBlock(BasicBlock &BB) { LiveAllocas.push_back(*I); } - for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(), - E = LiveAllocas.end(); I != E; ++I) - DeadStackObjects.remove(*I); - // If all of the allocas were clobbered by the call then we're not going // to find anything else to process. - if (DeadStackObjects.empty()) + if (DeadStackObjects.size() == LiveAllocas.size()) break; + for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(), + E = LiveAllocas.end(); I != E; ++I) + DeadStackObjects.remove(*I); + continue; } diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 9759549..2627113 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -374,7 +374,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { Instruction *Inst = I++; // Dead instructions should just be removed. - if (isInstructionTriviallyDead(Inst)) { + if (isInstructionTriviallyDead(Inst, TLI)) { DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n'); Inst->eraseFromParent(); Changed = true; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 4822fd0..16ae6ad 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -271,16 +271,16 @@ void ValueTable::add(Value *V, uint32_t num) { valueNumbering.insert(std::make_pair(V, num)); } -uint32_t ValueTable::lookup_or_add_call(CallInst* C) { +uint32_t ValueTable::lookup_or_add_call(CallInst *C) { if (AA->doesNotAccessMemory(C)) { Expression exp = create_expression(C); - uint32_t& e = expressionNumbering[exp]; + uint32_t &e = expressionNumbering[exp]; if (!e) e = nextValueNumber++; valueNumbering[C] = e; return e; } else if (AA->onlyReadsMemory(C)) { Expression exp = create_expression(C); - uint32_t& e = expressionNumbering[exp]; + uint32_t &e = expressionNumbering[exp]; if (!e) { e = nextValueNumber++; valueNumbering[C] = e; @@ -413,7 +413,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) { case Instruction::LShr: case Instruction::AShr: case Instruction::And: - case Instruction::Or : + case Instruction::Or: case Instruction::Xor: case Instruction::ICmp: case Instruction::FCmp: @@ -632,6 +632,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) +#ifndef NDEBUG void GVN::dump(DenseMap<uint32_t, Value*>& d) { errs() << "{\n"; for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), @@ -641,6 +642,7 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) { } errs() << "}\n"; } +#endif /// IsValueFullyAvailableInBlock - Return true if we can prove that the value /// we're analyzing is fully available in the specified block. As we go, keep @@ -1436,7 +1438,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { Instruction *DepInst = DepInfo.getInst(); // Loading the allocation -> undef. - if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst) || + if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) || // Loading immediately after lifetime begin -> undef. isLifetimeStart(DepInst)) { ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, @@ -1951,7 +1953,7 @@ bool GVN::processLoad(LoadInst *L) { // If this load really doesn't depend on anything, then we must be loading an // undef value. This can happen when loading for a fresh allocation with no // intervening stores, for example. - if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst)) { + if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI)) { L->replaceAllUsesWith(UndefValue::get(L->getType())); markInstructionForDeletion(L); ++NumGVNLoad; @@ -2231,12 +2233,20 @@ bool GVN::processInstruction(Instruction *I) { Value *SwitchCond = SI->getCondition(); BasicBlock *Parent = SI->getParent(); bool Changed = false; + + // Remember how many outgoing edges there are to every successor. + SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges; + for (unsigned i = 0, n = SI->getNumSuccessors(); i != n; ++i) + ++SwitchEdges[SI->getSuccessor(i)]; + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { BasicBlock *Dst = i.getCaseSuccessor(); - BasicBlockEdge E(Parent, Dst); - if (E.isSingleEdge()) + // If there is only a single edge, propagate the case value into it. + if (SwitchEdges.lookup(Dst) == 1) { + BasicBlockEdge E(Parent, Dst); Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E); + } } return Changed; } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 37f8bdf..c933a17 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -44,6 +44,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -68,6 +69,7 @@ namespace { ScalarEvolution *SE; DominatorTree *DT; TargetData *TD; + TargetLibraryInfo *TLI; SmallVector<WeakVH, 16> DeadInsts; bool Changed; @@ -414,11 +416,11 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // new comparison. NewCompare->takeName(Compare); Compare->replaceAllUsesWith(NewCompare); - RecursivelyDeleteTriviallyDeadInstructions(Compare); + RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI); // Delete the old floating point increment. Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); - RecursivelyDeleteTriviallyDeadInstructions(Incr); + RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI); // If the FP induction variable still has uses, this is because something else // in the loop uses its value. In order to canonicalize the induction @@ -431,7 +433,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", PN->getParent()->getFirstInsertionPt()); PN->replaceAllUsesWith(Conv); - RecursivelyDeleteTriviallyDeadInstructions(PN); + RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); } Changed = true; } @@ -550,14 +552,14 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { PN->setIncomingValue(i, ExitVal); // If this instruction is dead now, delete it. - RecursivelyDeleteTriviallyDeadInstructions(Inst); + RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); if (NumPreds == 1) { // Completely replace a single-pred PHI. This is safe, because the // NewVal won't be variant in the loop, so we don't need an LCSSA phi // node anymore. PN->replaceAllUsesWith(ExitVal); - RecursivelyDeleteTriviallyDeadInstructions(PN); + RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); } } if (NumPreds != 1) { @@ -1697,6 +1699,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTree>(); TD = getAnalysisIfAvailable<TargetData>(); + TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); DeadInsts.clear(); Changed = false; @@ -1763,7 +1766,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { while (!DeadInsts.empty()) if (Instruction *Inst = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); + RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); // The Rewriter may not be used from this point on. @@ -1772,7 +1775,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { SinkUnusedInvariants(L); // Clean up dead instructions. - Changed |= DeleteDeadPHIs(L->getHeader()); + Changed |= DeleteDeadPHIs(L->getHeader(), TLI); // Check a post-condition. assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!"); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index dd42c59..20844c6 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -1455,7 +1455,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // At this point, the IR is fully up to date and consistent. Do a quick scan // over the new instructions and zap any that are constants or dead. This // frequently happens because of phi translation. - SimplifyInstructionsInBlock(NewBB, TD); + SimplifyInstructionsInBlock(NewBB, TD, TLI); // Threaded an edge! ++NumThreads; diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 0192e92..99bedce 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -108,6 +108,9 @@ namespace { BasicBlock *Preheader; // The preheader block of the current loop... Loop *CurLoop; // The current loop we are working on... AliasSetTracker *CurAST; // AliasSet information for the current loop... + bool MayThrow; // The current loop contains an instruction which + // may throw, thus preventing code motion of + // instructions with side effects. DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap; /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. @@ -240,6 +243,15 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { CurAST->add(*BB); // Incorporate the specified basic block } + MayThrow = false; + // TODO: We've already searched for instructions which may throw in subloops. + // We may want to reuse this information. + for (Loop::block_iterator BB = L->block_begin(), BBE = L->block_end(); + (BB != BBE) && !MayThrow ; ++BB) + for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); + (I != E) && !MayThrow; ++I) + MayThrow |= I->mayThrow(); + // We want to visit all of the instructions in this loop... that are not parts // of our subloops (they have already had their invariants hoisted out of // their loop, into this loop, so there is no need to process the BODIES of @@ -307,7 +319,7 @@ void LICM::SinkRegion(DomTreeNode *N) { // If the instruction is dead, we would try to sink it because it isn't used // in the loop, instead, just delete it. - if (isInstructionTriviallyDead(&I)) { + if (isInstructionTriviallyDead(&I, TLI)) { DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); ++II; CurAST->deleteValue(&I); @@ -418,17 +430,22 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { if (!FoundMod) return true; } - // FIXME: This should use mod/ref information to see if we can hoist or sink - // the call. + // FIXME: This should use mod/ref information to see if we can hoist or + // sink the call. return false; } - // Otherwise these instructions are hoistable/sinkable - return isa<BinaryOperator>(I) || isa<CastInst>(I) || - isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || - isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || - isa<ShuffleVectorInst>(I); + // Only these instructions are hoistable/sinkable. + bool HoistableKind = (isa<BinaryOperator>(I) || isa<CastInst>(I) || + isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || + isa<CmpInst>(I) || isa<InsertElementInst>(I) || + isa<ExtractElementInst>(I) || + isa<ShuffleVectorInst>(I)); + if (!HoistableKind) + return false; + + return isSafeToExecuteUnconditionally(I); } /// isNotUsedInLoop - Return true if the only users of this instruction are @@ -604,6 +621,12 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { } bool LICM::isGuaranteedToExecute(Instruction &Inst) { + + // Somewhere in this loop there is an instruction which may throw and make us + // exit the loop. + if (MayThrow) + return false; + // Otherwise we have to check to make sure that the instruction dominates all // of the exit blocks. If it doesn't, then there is a path out of the loop // which does not execute this instruction, so we can't hoist it. diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index ac1082c..a72e288 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -132,7 +132,8 @@ Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } /// and zero out all the operands of this instruction. If any of them become /// dead, delete them and the computation tree that feeds them. /// -static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { +static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE, + const TargetLibraryInfo *TLI) { SmallVector<Instruction*, 32> NowDeadInsts; NowDeadInsts.push_back(I); @@ -153,7 +154,7 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { if (!Op->use_empty()) continue; if (Instruction *OpI = dyn_cast<Instruction>(Op)) - if (isInstructionTriviallyDead(OpI)) + if (isInstructionTriviallyDead(OpI, TLI)) NowDeadInsts.push_back(OpI); } @@ -164,10 +165,11 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { /// deleteIfDeadInstruction - If the specified value is a dead instruction, /// delete it and any recursively used instructions. -static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) { +static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE, + const TargetLibraryInfo *TLI) { if (Instruction *I = dyn_cast<Instruction>(V)) - if (isInstructionTriviallyDead(I)) - deleteDeadInstruction(I, SE); + if (isInstructionTriviallyDead(I, TLI)) + deleteDeadInstruction(I, SE, TLI); } bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { @@ -490,7 +492,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){ Expander.clear(); // If we generated new code for the base pointer, clean up. - deleteIfDeadInstruction(BasePtr, *SE); + deleteIfDeadInstruction(BasePtr, *SE, TLI); return false; } @@ -538,7 +540,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - deleteDeadInstruction(TheStore, *SE); + deleteDeadInstruction(TheStore, *SE, TLI); ++NumMemSet; return true; } @@ -579,7 +581,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, getAnalysis<AliasAnalysis>(), SI)) { Expander.clear(); // If we generated new code for the base pointer, clean up. - deleteIfDeadInstruction(StoreBasePtr, *SE); + deleteIfDeadInstruction(StoreBasePtr, *SE, TLI); return false; } @@ -594,8 +596,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, StoreSize, getAnalysis<AliasAnalysis>(), SI)) { Expander.clear(); // If we generated new code for the base pointer, clean up. - deleteIfDeadInstruction(LoadBasePtr, *SE); - deleteIfDeadInstruction(StoreBasePtr, *SE); + deleteIfDeadInstruction(LoadBasePtr, *SE, TLI); + deleteIfDeadInstruction(StoreBasePtr, *SE, TLI); return false; } @@ -628,7 +630,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - deleteDeadInstruction(SI, *SE); + deleteDeadInstruction(SI, *SE, TLI); ++NumMemCpy; return true; } diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 982400c..f5daa7b 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -120,7 +120,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { ++NumSimplified; } } - LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I); + LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI); if (IsSubloopHeader && !isa<PHINode>(I)) break; diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 7eeb152..abe07aa 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -24,6 +24,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -256,6 +257,7 @@ bool LoopRotate::rotateLoop(Loop *L) { return false; BasicBlock *OrigHeader = L->getHeader(); + BasicBlock *OrigLatch = L->getLoopLatch(); BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); if (BI == 0 || BI->isUnconditional()) @@ -267,13 +269,9 @@ bool LoopRotate::rotateLoop(Loop *L) { if (!L->isLoopExiting(OrigHeader)) return false; - // Updating PHInodes in loops with multiple exits adds complexity. - // Keep it simple, and restrict loop rotation to loops with one exit only. - // In future, lift this restriction and support for multiple exits if - // required. - SmallVector<BasicBlock*, 8> ExitBlocks; - L->getExitBlocks(ExitBlocks); - if (ExitBlocks.size() > 1) + // If the loop latch already contains a branch that leaves the loop then the + // loop is already rotated. + if (OrigLatch == 0 || L->isLoopExiting(OrigLatch)) return false; // Check size of original header and reject loop if it is very big. @@ -286,11 +284,10 @@ bool LoopRotate::rotateLoop(Loop *L) { // Now, this loop is suitable for rotation. BasicBlock *OrigPreheader = L->getLoopPreheader(); - BasicBlock *OrigLatch = L->getLoopLatch(); // If the loop could not be converted to canonical form, it must have an // indirectbr in it, just give up. - if (OrigPreheader == 0 || OrigLatch == 0) + if (OrigPreheader == 0) return false; // Anything ScalarEvolution may know about this loop or the PHI nodes @@ -298,6 +295,8 @@ bool LoopRotate::rotateLoop(Loop *L) { if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>()) SE->forgetLoop(L); + DEBUG(dbgs() << "LoopRotation: rotating "; L->dump()); + // Find new Loop header. NewHeader is a Header's one and only successor // that is inside loop. Header's other successor is outside the // loop. Otherwise loop is not suitable for rotation. @@ -408,10 +407,19 @@ bool LoopRotate::rotateLoop(Loop *L) { // Update DominatorTree to reflect the CFG change we just made. Then split // edges as necessary to preserve LoopSimplify form. if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) { - // Since OrigPreheader now has the conditional branch to Exit block, it is - // the dominator of Exit. - DT->changeImmediateDominator(Exit, OrigPreheader); - DT->changeImmediateDominator(NewHeader, OrigPreheader); + // Everything that was dominated by the old loop header is now dominated + // by the original loop preheader. Conceptually the header was merged + // into the preheader, even though we reuse the actual block as a new + // loop latch. + DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader); + SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(), + OrigHeaderNode->end()); + DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader); + for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) + DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode); + + assert(DT->getNode(Exit)->getIDom() == OrigPreheaderNode); + assert(DT->getNode(NewHeader)->getIDom() == OrigPreheaderNode); // Update OrigHeader to be dominated by the new header block. DT->changeImmediateDominator(OrigHeader, OrigLatch); @@ -440,6 +448,35 @@ bool LoopRotate::rotateLoop(Loop *L) { // Update OrigHeader to be dominated by the new header block. DT->changeImmediateDominator(NewHeader, OrigPreheader); DT->changeImmediateDominator(OrigHeader, OrigLatch); + + // Brute force incremental dominator tree update. Call + // findNearestCommonDominator on all CFG predecessors of each child of the + // original header. + DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader); + SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(), + OrigHeaderNode->end()); + bool Changed; + do { + Changed = false; + for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) { + DomTreeNode *Node = HeaderChildren[I]; + BasicBlock *BB = Node->getBlock(); + + pred_iterator PI = pred_begin(BB); + BasicBlock *NearestDom = *PI; + for (pred_iterator PE = pred_end(BB); PI != PE; ++PI) + NearestDom = DT->findNearestCommonDominator(NearestDom, *PI); + + // Remember if this changes the DomTree. + if (Node->getIDom()->getBlock() != NearestDom) { + DT->changeImmediateDominator(BB, NearestDom); + Changed = true; + } + } + + // If the dominator changed, this may have an effect on other + // predecessors, continue until we reach a fixpoint. + } while (Changed); } } @@ -452,6 +489,8 @@ bool LoopRotate::rotateLoop(Loop *L) { // emitted code isn't too gross in this common case. MergeBlockIntoPredecessor(OrigHeader, this); + DEBUG(dbgs() << "LoopRotation: into "; L->dump()); + ++NumRotated; return true; } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 0ae7a51..d7495da 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -121,9 +121,11 @@ void RegSortData::print(raw_ostream &OS) const { OS << "[NumUses=" << UsedByIndices.count() << ']'; } +#ifndef NDEBUG void RegSortData::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { @@ -414,9 +416,11 @@ void Formula::print(raw_ostream &OS) const { } } +#ifndef NDEBUG void Formula::dump() const { print(errs()); errs() << '\n'; } +#endif /// isAddRecSExtable - Return true if the given addrec can be sign-extended /// without changing its value. @@ -974,9 +978,11 @@ void Cost::print(raw_ostream &OS) const { OS << ", plus " << SetupCost << " setup cost"; } +#ifndef NDEBUG void Cost::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { @@ -1060,9 +1066,11 @@ void LSRFixup::print(raw_ostream &OS) const { OS << ", Offset=" << Offset; } +#ifndef NDEBUG void LSRFixup::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { @@ -1252,9 +1260,11 @@ void LSRUse::print(raw_ostream &OS) const { OS << ", widest fixup type: " << *WidestFixupType; } +#ifndef NDEBUG void LSRUse::dump() const { print(errs()); errs() << '\n'; } +#endif /// isLegalUse - Test whether the use described by AM is "legal", meaning it can /// be completely folded into the user instruction at isel time. This includes @@ -3436,9 +3446,11 @@ void WorkItem::print(raw_ostream &OS) const { << " , add offset " << Imm; } +#ifndef NDEBUG void WorkItem::dump() const { print(errs()); errs() << '\n'; } +#endif /// GenerateCrossUseConstantOffsets - Look for registers which are a constant /// distance apart and try to form reuse opportunities between them. @@ -4731,9 +4743,11 @@ void LSRInstance::print(raw_ostream &OS) const { print_uses(OS); } +#ifndef NDEBUG void LSRInstance::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 3222f20..dce8e8b 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -1236,16 +1236,19 @@ bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) { // An ObjC-Identified object can't alias a load if it is never locally stored. if (AIsIdentified) { + // Check for an obvious escape. + if (isa<LoadInst>(B)) + return isStoredObjCPointer(A); if (BIsIdentified) { - // If both pointers have provenance, they can be directly compared. - if (A != B) - return false; - } else { - if (isa<LoadInst>(B)) - return isStoredObjCPointer(A); + // Check for an obvious escape. + if (isa<LoadInst>(A)) + return isStoredObjCPointer(B); + // Both pointers are identified and escapes aren't an evident problem. + return false; } - } else { - if (BIsIdentified && isa<LoadInst>(A)) + } else if (BIsIdentified) { + // Check for an obvious escape. + if (isa<LoadInst>(A)) return isStoredObjCPointer(B); } @@ -1381,9 +1384,6 @@ namespace { /// PtrState - This class summarizes several per-pointer runtime properties /// which are propogated through the flow graph. class PtrState { - /// NestCount - The known minimum level of retain+release nesting. - unsigned NestCount; - /// KnownPositiveRefCount - True if the reference count is known to /// be incremented. bool KnownPositiveRefCount; @@ -1401,7 +1401,7 @@ namespace { /// TODO: Encapsulate this better. RRInfo RRI; - PtrState() : NestCount(0), KnownPositiveRefCount(false), Partial(false), + PtrState() : KnownPositiveRefCount(false), Partial(false), Seq(S_None) {} void SetKnownPositiveRefCount() { @@ -1416,18 +1416,6 @@ namespace { return KnownPositiveRefCount; } - void IncrementNestCount() { - if (NestCount != UINT_MAX) ++NestCount; - } - - void DecrementNestCount() { - if (NestCount != 0) --NestCount; - } - - bool IsKnownNested() const { - return NestCount > 0; - } - void SetSeq(Sequence NewSeq) { Seq = NewSeq; } @@ -1454,7 +1442,6 @@ void PtrState::Merge(const PtrState &Other, bool TopDown) { Seq = MergeSeqs(Seq, Other.Seq, TopDown); KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount; - NestCount = std::min(NestCount, Other.NestCount); // We can't merge a plain objc_retain with an objc_retainBlock. if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock) @@ -1868,6 +1855,26 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { return AutoreleaseCallee; } +/// IsPotentialUse - Test whether the given value is possible a +/// reference-counted pointer, including tests which utilize AliasAnalysis. +static bool IsPotentialUse(const Value *Op, AliasAnalysis &AA) { + // First make the rudimentary check. + if (!IsPotentialUse(Op)) + return false; + + // Objects in constant memory are not reference-counted. + if (AA.pointsToConstantMemory(Op)) + return false; + + // Pointers in constant memory are not pointing to reference-counted objects. + if (const LoadInst *LI = dyn_cast<LoadInst>(Op)) + if (AA.pointsToConstantMemory(LI->getPointerOperand())) + return false; + + // Otherwise assume the worst. + return true; +} + /// CanAlterRefCount - Test whether the given instruction can result in a /// reference count modification (positive or negative) for the pointer's /// object. @@ -1894,7 +1901,7 @@ CanAlterRefCount(const Instruction *Inst, const Value *Ptr, for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) { const Value *Op = *I; - if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) return true; } return false; @@ -1919,14 +1926,14 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, // Comparing a pointer with null, or any other constant, isn't really a use, // because we don't care what the pointer points to, or about the values // of any other dynamic reference-counted pointers. - if (!IsPotentialUse(ICI->getOperand(1))) + if (!IsPotentialUse(ICI->getOperand(1), *PA.getAA())) return false; } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) { // For calls, just check the arguments (and not the callee operand). for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(), OE = CS.arg_end(); OI != OE; ++OI) { const Value *Op = *OI; - if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) return true; } return false; @@ -1936,14 +1943,14 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand()); // If we can't tell what the underlying object was, assume there is a // dependence. - return IsPotentialUse(Op) && PA.related(Op, Ptr); + return IsPotentialUse(Op, *PA.getAA()) && PA.related(Op, Ptr); } // Check each operand for a match. for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end(); OI != OE; ++OI) { const Value *Op = *OI; - if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) return true; } return false; @@ -2612,11 +2619,11 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release); S.RRI.ReleaseMetadata = ReleaseMetadata; - S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented(); + S.RRI.KnownSafe = S.IsKnownIncremented(); S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); S.RRI.Calls.insert(Inst); - S.IncrementNestCount(); + S.SetKnownPositiveRefCount(); break; } case IC_RetainBlock: @@ -2631,7 +2638,6 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, PtrState &S = MyStates.getPtrBottomUpState(Arg); S.SetKnownPositiveRefCount(); - S.DecrementNestCount(); switch (S.GetSeq()) { case S_Stop: @@ -2747,8 +2753,9 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, // Merge the states from each successor to compute the initial state // for the current block. - for (BBState::edge_iterator SI(MyStates.succ_begin()), - SE(MyStates.succ_end()); SI != SE; ++SI) { + BBState::edge_iterator SI(MyStates.succ_begin()), + SE(MyStates.succ_end()); + if (SI != SE) { const BasicBlock *Succ = *SI; DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ); assert(I != BBStates.end()); @@ -2760,7 +2767,6 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, assert(I != BBStates.end()); MyStates.MergeSucc(I->second); } - break; } // Visit all the instructions, bottom-up. @@ -2823,12 +2829,11 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, S.ResetSequenceProgress(S_Retain); S.RRI.IsRetainBlock = Class == IC_RetainBlock; - // Don't check S.IsKnownIncremented() here because it's not sufficient. - S.RRI.KnownSafe = S.IsKnownNested(); + S.RRI.KnownSafe = S.IsKnownIncremented(); S.RRI.Calls.insert(Inst); } - S.IncrementNestCount(); + S.SetKnownPositiveRefCount(); // A retain can be a potential use; procede to the generic checking // code below. @@ -2838,7 +2843,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, Arg = GetObjCArg(Inst); PtrState &S = MyStates.getPtrTopDownState(Arg); - S.DecrementNestCount(); + S.ClearRefCount(); switch (S.GetSeq()) { case S_Retain: @@ -2935,8 +2940,9 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, // Merge the states from each predecessor to compute the initial state // for the current block. - for (BBState::edge_iterator PI(MyStates.pred_begin()), - PE(MyStates.pred_end()); PI != PE; ++PI) { + BBState::edge_iterator PI(MyStates.pred_begin()), + PE(MyStates.pred_end()); + if (PI != PE) { const BasicBlock *Pred = *PI; DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); assert(I != BBStates.end()); @@ -2948,7 +2954,6 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, assert(I != BBStates.end()); MyStates.MergePred(I->second); } - break; } // Visit all the instructions, top-down. diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index d13e4ab..6d27db1 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -59,9 +59,9 @@ FunctionPass *llvm::createCFGSimplificationPass() { return new CFGSimplifyPass(); } -/// ChangeToUnreachable - Insert an unreachable instruction before the specified +/// changeToUnreachable - Insert an unreachable instruction before the specified /// instruction, making it and the rest of the code in the block dead. -static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { +static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) { BasicBlock *BB = I->getParent(); // Loop over all of the successors, removing BB's entry from any PHI // nodes. @@ -87,8 +87,8 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { } } -/// ChangeToCall - Convert the specified invoke into a normal call. -static void ChangeToCall(InvokeInst *II) { +/// changeToCall - Convert the specified invoke into a normal call. +static void changeToCall(InvokeInst *II) { SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II); NewCall->takeName(II); @@ -105,7 +105,7 @@ static void ChangeToCall(InvokeInst *II) { II->eraseFromParent(); } -static bool MarkAliveBlocks(BasicBlock *BB, +static bool markAliveBlocks(BasicBlock *BB, SmallPtrSet<BasicBlock*, 128> &Reachable) { SmallVector<BasicBlock*, 128> Worklist; @@ -129,7 +129,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, ++BBI; if (!isa<UnreachableInst>(BBI)) { // Don't insert a call to llvm.trap right before the unreachable. - ChangeToUnreachable(BBI, false); + changeToUnreachable(BBI, false); Changed = true; } break; @@ -148,7 +148,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, if (isa<UndefValue>(Ptr) || (isa<ConstantPointerNull>(Ptr) && SI->getPointerAddressSpace() == 0)) { - ChangeToUnreachable(SI, true); + changeToUnreachable(SI, true); Changed = true; break; } @@ -159,7 +159,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { Value *Callee = II->getCalledValue(); if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { - ChangeToUnreachable(II, true); + changeToUnreachable(II, true); Changed = true; } else if (II->doesNotThrow()) { if (II->use_empty() && II->onlyReadsMemory()) { @@ -168,7 +168,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, II->getUnwindDest()->removePredecessor(II->getParent()); II->eraseFromParent(); } else - ChangeToCall(II); + changeToCall(II); Changed = true; } } @@ -180,12 +180,12 @@ static bool MarkAliveBlocks(BasicBlock *BB, return Changed; } -/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even +/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false /// otherwise. -static bool RemoveUnreachableBlocksFromFn(Function &F) { +static bool removeUnreachableBlocksFromFn(Function &F) { SmallPtrSet<BasicBlock*, 128> Reachable; - bool Changed = MarkAliveBlocks(F.begin(), Reachable); + bool Changed = markAliveBlocks(F.begin(), Reachable); // If there are unreachable blocks in the CFG... if (Reachable.size() == F.size()) @@ -215,9 +215,9 @@ static bool RemoveUnreachableBlocksFromFn(Function &F) { return true; } -/// MergeEmptyReturnBlocks - If we have more than one empty (other than phi +/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi /// node) return blocks, merge them together to promote recursive block merging. -static bool MergeEmptyReturnBlocks(Function &F) { +static bool mergeEmptyReturnBlocks(Function &F) { bool Changed = false; BasicBlock *RetBlock = 0; @@ -291,9 +291,9 @@ static bool MergeEmptyReturnBlocks(Function &F) { return Changed; } -/// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function, +/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. -static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) { +static bool iterativelySimplifyCFG(Function &F, const TargetData *TD) { bool Changed = false; bool LocalChange = true; while (LocalChange) { @@ -317,24 +317,24 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) { // bool CFGSimplifyPass::runOnFunction(Function &F) { const TargetData *TD = getAnalysisIfAvailable<TargetData>(); - bool EverChanged = RemoveUnreachableBlocksFromFn(F); - EverChanged |= MergeEmptyReturnBlocks(F); - EverChanged |= IterativeSimplifyCFG(F, TD); + bool EverChanged = removeUnreachableBlocksFromFn(F); + EverChanged |= mergeEmptyReturnBlocks(F); + EverChanged |= iterativelySimplifyCFG(F, TD); // If neither pass changed anything, we're done. if (!EverChanged) return false; - // IterativeSimplifyCFG can (rarely) make some loops dead. If this happens, - // RemoveUnreachableBlocksFromFn is needed to nuke them, which means we should + // iterativelySimplifyCFG can (rarely) make some loops dead. If this happens, + // removeUnreachableBlocksFromFn is needed to nuke them, which means we should // iterate between the two optimizations. We structure the code like this to - // avoid reruning IterativeSimplifyCFG if the second pass of - // RemoveUnreachableBlocksFromFn doesn't do anything. - if (!RemoveUnreachableBlocksFromFn(F)) + // avoid reruning iterativelySimplifyCFG if the second pass of + // removeUnreachableBlocksFromFn doesn't do anything. + if (!removeUnreachableBlocksFromFn(F)) return true; do { - EverChanged = IterativeSimplifyCFG(F, TD); - EverChanged |= RemoveUnreachableBlocksFromFn(F); + EverChanged = iterativelySimplifyCFG(F, TD); + EverChanged |= removeUnreachableBlocksFromFn(F); } while (EverChanged); return true; diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 3904419..65311fe 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" @@ -38,6 +39,10 @@ using namespace llvm; STATISTIC(NumSimplified, "Number of library calls simplified"); STATISTIC(NumAnnotated, "Number of attributes added to library functions"); +static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden, + cl::init(false), + cl::desc("Enable unsafe double to float " + "shrinking for math lib calls")); //===----------------------------------------------------------------------===// // Optimizer Base Class //===----------------------------------------------------------------------===// @@ -893,16 +898,56 @@ struct MemSetOpt : public LibCallOptimization { //===----------------------------------------------------------------------===// //===---------------------------------------===// -// 'cos*' Optimizations +// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' + +struct UnaryDoubleFPOpt : public LibCallOptimization { + bool CheckRetType; + UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {} + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || + !FT->getParamType(0)->isDoubleTy()) + return 0; + + if (CheckRetType) { + // Check if all the uses for function like 'sin' are converted to float. + for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end(); + ++UseI) { + FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI); + if (Cast == 0 || !Cast->getType()->isFloatTy()) + return 0; + } + } + + // If this is something like 'floor((double)floatval)', convert to floorf. + FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); + if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) + return 0; + + // floor((double)floatval) -> (double)floorf(floatval) + Value *V = Cast->getOperand(0); + V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes()); + return B.CreateFPExt(V, B.getDoubleTy()); + } +}; +//===---------------------------------------===// +// 'cos*' Optimizations struct CosOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "cos" && + TLI->has(LibFunc::cosf)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B); + } + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 1 argument of FP type, which matches the // result type. if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isFloatingPointTy()) - return 0; + return Ret; // cos(-x) -> cos(x) Value *Op1 = CI->getArgOperand(0); @@ -910,7 +955,7 @@ struct CosOpt : public LibCallOptimization { BinaryOperator *BinExpr = cast<BinaryOperator>(Op1); return B.CreateCall(Callee, BinExpr->getOperand(1), "cos"); } - return 0; + return Ret; } }; @@ -919,13 +964,20 @@ struct CosOpt : public LibCallOptimization { struct PowOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "pow" && + TLI->has(LibFunc::powf)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B); + } + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 2 arguments of the same FP type, which match the // result type. if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || !FT->getParamType(0)->isFloatingPointTy()) - return 0; + return Ret; Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { @@ -936,7 +988,7 @@ struct PowOpt : public LibCallOptimization { } ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2); - if (Op2C == 0) return 0; + if (Op2C == 0) return Ret; if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 return ConstantFP::get(CI->getType(), 1.0); @@ -974,12 +1026,19 @@ struct PowOpt : public LibCallOptimization { struct Exp2Opt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "exp2" && + TLI->has(LibFunc::exp2)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B); + } + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 1 argument of FP type, which matches the // result type. if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isFloatingPointTy()) - return 0; + return Ret; Value *Op = CI->getArgOperand(0); // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 @@ -1016,29 +1075,7 @@ struct Exp2Opt : public LibCallOptimization { return CI; } - return 0; - } -}; - -//===---------------------------------------===// -// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' - -struct UnaryDoubleFPOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || - !FT->getParamType(0)->isDoubleTy()) - return 0; - - // If this is something like 'floor((double)floatval)', convert to floorf. - FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); - if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) - return 0; - - // floor((double)floatval) -> (double)floorf(floatval) - Value *V = Cast->getOperand(0); - V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes()); - return B.CreateFPExt(V, B.getDoubleTy()); + return Ret; } }; @@ -1534,7 +1571,8 @@ namespace { StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr; MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet; // Math Library Optimizations - CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP; + CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; + UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP; // Integer Optimizations FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii; ToAsciiOpt ToAscii; @@ -1547,10 +1585,13 @@ namespace { public: static char ID; // Pass identification SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true), - StpCpy(false), StpCpyChk(true) { + StpCpy(false), StpCpyChk(true), + UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true) { initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry()); } void AddOpt(LibFunc::Func F, LibCallOptimization* Opt); + void AddOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt); + void InitOptimizations(); bool runOnFunction(Function &F); @@ -1586,6 +1627,12 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F, LibCallOptimization* Opt) { Optimizations[TLI->getName(F)] = Opt; } +void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2, + LibCallOptimization* Opt) { + if (TLI->has(F1) && TLI->has(F2)) + Optimizations[TLI->getName(F1)] = Opt; +} + /// Optimizations - Populate the Optimizations map with all the optimizations /// we know. void SimplifyLibCalls::InitOptimizations() { @@ -1641,20 +1688,37 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["llvm.exp2.f64"] = &Exp2; Optimizations["llvm.exp2.f32"] = &Exp2; - if (TLI->has(LibFunc::fabs) && TLI->has(LibFunc::fabsf)) - Optimizations["fabs"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::floor) && TLI->has(LibFunc::floorf)) - Optimizations["floor"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::ceil) && TLI->has(LibFunc::ceilf)) - Optimizations["ceil"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::round) && TLI->has(LibFunc::roundf)) - Optimizations["round"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::rint) && TLI->has(LibFunc::rintf)) - Optimizations["rint"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::nearbyint) && TLI->has(LibFunc::nearbyintf)) - Optimizations["nearbyint"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::trunc) && TLI->has(LibFunc::truncf)) - Optimizations["trunc"] = &UnaryDoubleFP; + AddOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP); + AddOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP); + AddOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP); + AddOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP); + AddOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP); + AddOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP); + AddOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP); + + if(UnsafeFPShrink) { + AddOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP); + } // Integer Optimizations Optimizations["ffs"] = &FFS; |