diff options
Diffstat (limited to 'lib/Transforms/Utils')
-rw-r--r-- | lib/Transforms/Utils/BasicBlockUtils.cpp | 9 | ||||
-rw-r--r-- | lib/Transforms/Utils/BreakCriticalEdges.cpp | 9 | ||||
-rw-r--r-- | lib/Transforms/Utils/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Transforms/Utils/CodeExtractor.cpp | 3 | ||||
-rw-r--r-- | lib/Transforms/Utils/FlattenCFG.cpp | 6 | ||||
-rw-r--r-- | lib/Transforms/Utils/GlobalStatus.cpp | 183 | ||||
-rw-r--r-- | lib/Transforms/Utils/InlineFunction.cpp | 3 | ||||
-rw-r--r-- | lib/Transforms/Utils/LCSSA.cpp | 15 | ||||
-rw-r--r-- | lib/Transforms/Utils/Local.cpp | 208 | ||||
-rw-r--r-- | lib/Transforms/Utils/LoopUnroll.cpp | 8 | ||||
-rw-r--r-- | lib/Transforms/Utils/LowerExpectIntrinsic.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/Utils/LowerInvoke.cpp | 1 | ||||
-rw-r--r-- | lib/Transforms/Utils/LowerSwitch.cpp | 62 | ||||
-rw-r--r-- | lib/Transforms/Utils/Mem2Reg.cpp | 7 | ||||
-rw-r--r-- | lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 298 | ||||
-rw-r--r-- | lib/Transforms/Utils/SSAUpdater.cpp | 6 | ||||
-rw-r--r-- | lib/Transforms/Utils/SimplifyCFG.cpp | 134 | ||||
-rw-r--r-- | lib/Transforms/Utils/SimplifyLibCalls.cpp | 271 | ||||
-rw-r--r-- | lib/Transforms/Utils/SpecialCaseList.cpp | 97 |
19 files changed, 909 insertions, 414 deletions
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index e17a416..12de9ee 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -248,7 +248,6 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { // If the edge isn't critical, then BB has a single successor or Succ has a // single pred. Split the block. - BasicBlock::iterator SplitPoint; if (BasicBlock *SP = Succ->getSinglePredecessor()) { // If the successor only has a single pred, split the top of the successor // block. @@ -401,8 +400,12 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. - for (unsigned i = 0, e = Preds.size(); i != e; ++i) - PN->removeIncomingValue(Preds[i], false); + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + // Explicitly check the BB index here to handle duplicates in Preds. + int Idx = PN->getBasicBlockIndex(Preds[i]); + if (Idx >= 0) + PN->removeIncomingValue(Idx, false); + } } else { // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 8f3ff96..0e7f7f7 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -22,7 +22,6 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ProfileInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" @@ -45,7 +44,6 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<DominatorTree>(); AU.addPreserved<LoopInfo>(); - AU.addPreserved<ProfileInfo>(); // No loop canonicalization guarantees are broken by this pass. AU.addPreservedID(LoopSimplifyID); @@ -213,10 +211,9 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); - ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); // If we have nothing to update, just return. - if (DT == 0 && LI == 0 && PI == 0) + if (DT == 0 && LI == 0) return NewBB; // Now update analysis information. Since the only predecessor of NewBB is @@ -369,9 +366,5 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, } } - // Update ProfileInfo if it is around. - if (PI) - PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges); - return NewBB; } diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 3648fd6..5afd6b8 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_library(LLVMTransformUtils CmpInstAnalysis.cpp CodeExtractor.cpp DemoteRegToStack.cpp + GlobalStatus.cpp InlineFunction.cpp InstructionNamer.cpp IntegerDivision.cpp diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 82013f9..6f00864 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -665,8 +665,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, TheSwitch->setCondition(call); TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); // Remove redundant case - SwitchInst::CaseIt ToBeRemoved(TheSwitch, NumExitBlocks-1); - TheSwitch->removeCase(ToBeRemoved); + TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); break; } } diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp index 9cbe15d..1da226b 100644 --- a/lib/Transforms/Utils/FlattenCFG.cpp +++ b/lib/Transforms/Utils/FlattenCFG.cpp @@ -266,8 +266,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, BasicBlock *CB; BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator()); bool Iteration = true; - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + IRBuilder<>::InsertPointGuard Guard(Builder); Value *PC = PBI->getCondition(); do { @@ -298,7 +297,6 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, new UnreachableInst(CB->getContext(), CB); } while (Iteration); - Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock); return true; } @@ -372,7 +370,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, /// Check whether \param BB is the merge block of a if-region. If yes, check /// whether there exists an adjacent if-region upstream, the two if-regions -/// contain identical instuctions and can be legally merged. \returns true if +/// contain identical instructions and can be legally merged. \returns true if /// the two if-regions are merged. /// /// From: diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp new file mode 100644 index 0000000..5f0a563 --- /dev/null +++ b/lib/Transforms/Utils/GlobalStatus.cpp @@ -0,0 +1,183 @@ +//===-- GlobalStatus.cpp - Compute status info for globals -----------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Transforms/Utils/GlobalStatus.h" + +using namespace llvm; + +/// Return the stronger of the two ordering. If the two orderings are acquire +/// and release, then return AcquireRelease. +/// +static AtomicOrdering strongerOrdering(AtomicOrdering X, AtomicOrdering Y) { + if (X == Acquire && Y == Release) + return AcquireRelease; + if (Y == Acquire && X == Release) + return AcquireRelease; + return (AtomicOrdering)std::max(X, Y); +} + +/// It is safe to destroy a constant iff it is only used by constants itself. +/// Note that constants cannot be cyclic, so this test is pretty easy to +/// implement recursively. +/// +bool llvm::isSafeToDestroyConstant(const Constant *C) { + if (isa<GlobalValue>(C)) + return false; + + for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; + ++UI) + if (const Constant *CU = dyn_cast<Constant>(*UI)) { + if (!isSafeToDestroyConstant(CU)) + return false; + } else + return false; + return true; +} + +static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, + SmallPtrSet<const PHINode *, 16> &PhiUsers) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) { + const User *U = *UI; + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { + GS.HasNonInstructionUser = true; + + // If the result of the constantexpr isn't pointer type, then we won't + // know to expect it in various places. Just reject early. + if (!isa<PointerType>(CE->getType())) + return true; + + if (analyzeGlobalAux(CE, GS, PhiUsers)) + return true; + } else if (const Instruction *I = dyn_cast<Instruction>(U)) { + if (!GS.HasMultipleAccessingFunctions) { + const Function *F = I->getParent()->getParent(); + if (GS.AccessingFunction == 0) + GS.AccessingFunction = F; + else if (GS.AccessingFunction != F) + GS.HasMultipleAccessingFunctions = true; + } + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { + GS.IsLoaded = true; + // Don't hack on volatile loads. + if (LI->isVolatile()) + return true; + GS.Ordering = strongerOrdering(GS.Ordering, LI->getOrdering()); + } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) { + // Don't allow a store OF the address, only stores TO the address. + if (SI->getOperand(0) == V) + return true; + + // Don't hack on volatile stores. + if (SI->isVolatile()) + return true; + + GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering()); + + // If this is a direct store to the global (i.e., the global is a scalar + // value, not an aggregate), keep more specific information about + // stores. + if (GS.StoredType != GlobalStatus::Stored) { + if (const GlobalVariable *GV = + dyn_cast<GlobalVariable>(SI->getOperand(1))) { + Value *StoredVal = SI->getOperand(0); + + if (Constant *C = dyn_cast<Constant>(StoredVal)) { + if (C->isThreadDependent()) { + // The stored value changes between threads; don't track it. + return true; + } + } + + if (StoredVal == GV->getInitializer()) { + if (GS.StoredType < GlobalStatus::InitializerStored) + GS.StoredType = GlobalStatus::InitializerStored; + } else if (isa<LoadInst>(StoredVal) && + cast<LoadInst>(StoredVal)->getOperand(0) == GV) { + if (GS.StoredType < GlobalStatus::InitializerStored) + GS.StoredType = GlobalStatus::InitializerStored; + } else if (GS.StoredType < GlobalStatus::StoredOnce) { + GS.StoredType = GlobalStatus::StoredOnce; + GS.StoredOnceValue = StoredVal; + } else if (GS.StoredType == GlobalStatus::StoredOnce && + GS.StoredOnceValue == StoredVal) { + // noop. + } else { + GS.StoredType = GlobalStatus::Stored; + } + } else { + GS.StoredType = GlobalStatus::Stored; + } + } + } else if (isa<BitCastInst>(I)) { + if (analyzeGlobalAux(I, GS, PhiUsers)) + return true; + } else if (isa<GetElementPtrInst>(I)) { + if (analyzeGlobalAux(I, GS, PhiUsers)) + return true; + } else if (isa<SelectInst>(I)) { + if (analyzeGlobalAux(I, GS, PhiUsers)) + return true; + } else if (const PHINode *PN = dyn_cast<PHINode>(I)) { + // PHI nodes we can check just like select or GEP instructions, but we + // have to be careful about infinite recursion. + if (PhiUsers.insert(PN)) // Not already visited. + if (analyzeGlobalAux(I, GS, PhiUsers)) + return true; + } else if (isa<CmpInst>(I)) { + GS.IsCompared = true; + } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) { + if (MTI->isVolatile()) + return true; + if (MTI->getArgOperand(0) == V) + GS.StoredType = GlobalStatus::Stored; + if (MTI->getArgOperand(1) == V) + GS.IsLoaded = true; + } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) { + assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!"); + if (MSI->isVolatile()) + return true; + GS.StoredType = GlobalStatus::Stored; + } else if (ImmutableCallSite C = I) { + if (!C.isCallee(UI)) + return true; + GS.IsLoaded = true; + } else { + return true; // Any other non-load instruction might take address! + } + } else if (const Constant *C = dyn_cast<Constant>(U)) { + GS.HasNonInstructionUser = true; + // We might have a dead and dangling constant hanging off of here. + if (!isSafeToDestroyConstant(C)) + return true; + } else { + GS.HasNonInstructionUser = true; + // Otherwise must be some other user. + return true; + } + } + + return false; +} + +bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) { + SmallPtrSet<const PHINode *, 16> PhiUsers; + return analyzeGlobalAux(V, GS, PhiUsers); +} + +GlobalStatus::GlobalStatus() + : IsCompared(false), IsLoaded(false), StoredType(NotStored), + StoredOnceValue(0), AccessingFunction(0), + HasMultipleAccessingFunctions(false), HasNonInstructionUser(false), + Ordering(NotAtomic) {} diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index dabb67b..d021bce 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -193,7 +193,8 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, CallInst *CI = dyn_cast<CallInst>(I); // If this call cannot unwind, don't convert it to an invoke. - if (!CI || CI->doesNotThrow()) + // Inline asm calls cannot throw. + if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue())) continue; // Convert this function call into an invoke instruction. First, split the diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index 2d1b166..f15e8d5 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -55,7 +55,6 @@ namespace { DominatorTree *DT; LoopInfo *LI; ScalarEvolution *SE; - std::vector<BasicBlock*> LoopBlocks; PredIteratorCache PredCache; Loop *L; @@ -82,11 +81,6 @@ namespace { // Check the special guarantees that LCSSA makes. assert(L->isLCSSAForm(*DT) && "LCSSA form not preserved!"); } - - /// inLoop - returns true if the given block is within the current loop - bool inLoop(BasicBlock *B) const { - return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B); - } }; } @@ -129,11 +123,6 @@ bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) { if (ExitBlocks.empty()) return false; - // Speed up queries by creating a sorted vector of blocks. - LoopBlocks.clear(); - LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end()); - array_pod_sort(LoopBlocks.begin(), LoopBlocks.end()); - // Look at all the instructions in the loop, checking to see if they have uses // outside the loop. If so, rewrite those uses. bool MadeChange = false; @@ -198,7 +187,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, if (PHINode *PN = dyn_cast<PHINode>(U)) UserBB = PN->getIncomingBlock(UI); - if (InstBB != UserBB && !inLoop(UserBB)) + if (InstBB != UserBB && !L->contains(UserBB)) UsesToRewrite.push_back(&UI.getUse()); } @@ -244,7 +233,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, // If the exit block has a predecessor not within the loop, arrange for // the incoming value use corresponding to that predecessor to be // rewritten in terms of a different LCSSA PHI. - if (!inLoop(*PI)) + if (!L->contains(*PI)) UsesToRewrite.push_back( &PN->getOperandUse( PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1))); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 08e1808..2768041 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -16,10 +16,10 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/DIBuilder.h" #include "llvm/DebugInfo.h" @@ -43,6 +43,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +STATISTIC(NumRemoved, "Number of unreachable basic blocks removed"); + //===----------------------------------------------------------------------===// // Local constant propagation. // @@ -193,33 +195,28 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // Otherwise, we can fold this switch into a conditional branch // instruction if it has only one non-default destination. SwitchInst::CaseIt FirstCase = SI->case_begin(); - IntegersSubset& Case = FirstCase.getCaseValueEx(); - if (Case.isSingleNumber()) { - // FIXME: Currently work with ConstantInt based numbers. - Value *Cond = Builder.CreateICmpEQ(SI->getCondition(), - Case.getSingleNumber(0).toConstantInt(), - "cond"); - - // Insert the new branch. - BranchInst *NewBr = Builder.CreateCondBr(Cond, - FirstCase.getCaseSuccessor(), - SI->getDefaultDest()); - MDNode* MD = SI->getMetadata(LLVMContext::MD_prof); - if (MD && MD->getNumOperands() == 3) { - ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2)); - ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1)); - assert(SICase && SIDef); - // The TrueWeight should be the weight for the single case of SI. - NewBr->setMetadata(LLVMContext::MD_prof, - MDBuilder(BB->getContext()). - createBranchWeights(SICase->getValue().getZExtValue(), - SIDef->getValue().getZExtValue())); - } + Value *Cond = Builder.CreateICmpEQ(SI->getCondition(), + FirstCase.getCaseValue(), "cond"); - // Delete the old switch. - SI->eraseFromParent(); - return true; + // Insert the new branch. + BranchInst *NewBr = Builder.CreateCondBr(Cond, + FirstCase.getCaseSuccessor(), + SI->getDefaultDest()); + MDNode* MD = SI->getMetadata(LLVMContext::MD_prof); + if (MD && MD->getNumOperands() == 3) { + ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2)); + ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1)); + assert(SICase && SIDef); + // The TrueWeight should be the weight for the single case of SI. + NewBr->setMetadata(LLVMContext::MD_prof, + MDBuilder(BB->getContext()). + createBranchWeights(SICase->getValue().getZExtValue(), + SIDef->getValue().getZExtValue())); } + + // Delete the old switch. + SI->eraseFromParent(); + return true; } return false; } @@ -415,7 +412,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD, Instruction *Inst = BI++; WeakVH BIHandle(BI); - if (recursivelySimplifyInstruction(Inst, TD)) { + if (recursivelySimplifyInstruction(Inst, TD, TLI)) { MadeChange = true; if (BIHandle != BI) BI = BB->begin(); @@ -515,11 +512,6 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { DT->changeImmediateDominator(DestBB, PredBBIDom); DT->eraseNode(PredBB); } - ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); - if (PI) { - PI->replaceAllUses(PredBB, DestBB); - PI->removeEdge(ProfileInfo::getEdge(PredBB, DestBB)); - } } // Nuke BB. PredBB->eraseFromParent(); @@ -533,7 +525,7 @@ static bool CanMergeValues(Value *First, Value *Second) { } /// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an -/// almost-empty BB ending in an unconditional branch to Succ, into succ. +/// almost-empty BB ending in an unconditional branch to Succ, into Succ. /// /// Assumption: Succ is the single successor for BB. /// @@ -1053,7 +1045,11 @@ bool llvm::LowerDbgDeclare(Function &F) { for (SmallVectorImpl<DbgDeclareInst *>::iterator I = Dbgs.begin(), E = Dbgs.end(); I != E; ++I) { DbgDeclareInst *DDI = *I; - if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) { + AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress()); + // If this is an alloca for a scalar variable, insert a dbg.value + // at each load and store to the alloca and erase the dbg.declare. + if (AI && !AI->isArrayAllocation()) { + // We only remove the dbg.declare intrinsic if all uses are // converted to dbg.value intrinsics. bool RemoveDDI = true; @@ -1121,33 +1117,153 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, return true; } -bool llvm::removeUnreachableBlocks(Function &F) { - SmallPtrSet<BasicBlock*, 16> Reachable; +/// changeToUnreachable - Insert an unreachable instruction before the specified +/// instruction, making it and the rest of the code in the block dead. +static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) { + BasicBlock *BB = I->getParent(); + // Loop over all of the successors, removing BB's entry from any PHI + // nodes. + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) + (*SI)->removePredecessor(BB); + + // Insert a call to llvm.trap right before this. This turns the undefined + // behavior into a hard fail instead of falling through into random code. + if (UseLLVMTrap) { + Function *TrapFn = + Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap); + CallInst *CallTrap = CallInst::Create(TrapFn, "", I); + CallTrap->setDebugLoc(I->getDebugLoc()); + } + new UnreachableInst(I->getContext(), I); + + // All instructions after this are dead. + BasicBlock::iterator BBI = I, BBE = BB->end(); + while (BBI != BBE) { + if (!BBI->use_empty()) + BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); + BB->getInstList().erase(BBI++); + } +} + +/// changeToCall - Convert the specified invoke into a normal call. +static void changeToCall(InvokeInst *II) { + SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); + CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II); + NewCall->takeName(II); + NewCall->setCallingConv(II->getCallingConv()); + NewCall->setAttributes(II->getAttributes()); + NewCall->setDebugLoc(II->getDebugLoc()); + II->replaceAllUsesWith(NewCall); + + // Follow the call by a branch to the normal destination. + BranchInst::Create(II->getNormalDest(), II); + + // Update PHI nodes in the unwind destination + II->getUnwindDest()->removePredecessor(II->getParent()); + II->eraseFromParent(); +} + +static bool markAliveBlocks(BasicBlock *BB, + SmallPtrSet<BasicBlock*, 128> &Reachable) { + SmallVector<BasicBlock*, 128> Worklist; - Worklist.push_back(&F.getEntryBlock()); - Reachable.insert(&F.getEntryBlock()); + Worklist.push_back(BB); + Reachable.insert(BB); + bool Changed = false; do { - BasicBlock *BB = Worklist.pop_back_val(); + BB = Worklist.pop_back_val(); + + // Do a quick scan of the basic block, turning any obviously unreachable + // instructions into LLVM unreachable insts. The instruction combining pass + // canonicalizes unreachable insts into stores to null or undef. + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){ + if (CallInst *CI = dyn_cast<CallInst>(BBI)) { + if (CI->doesNotReturn()) { + // If we found a call to a no-return function, insert an unreachable + // instruction after it. Make sure there isn't *already* one there + // though. + ++BBI; + if (!isa<UnreachableInst>(BBI)) { + // Don't insert a call to llvm.trap right before the unreachable. + changeToUnreachable(BBI, false); + Changed = true; + } + break; + } + } + + // Store to undef and store to null are undefined and used to signal that + // they should be changed to unreachable by passes that can't modify the + // CFG. + if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + // Don't touch volatile stores. + if (SI->isVolatile()) continue; + + Value *Ptr = SI->getOperand(1); + + if (isa<UndefValue>(Ptr) || + (isa<ConstantPointerNull>(Ptr) && + SI->getPointerAddressSpace() == 0)) { + changeToUnreachable(SI, true); + Changed = true; + break; + } + } + } + + // Turn invokes that call 'nounwind' functions into ordinary calls. + if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + Value *Callee = II->getCalledValue(); + if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { + changeToUnreachable(II, true); + Changed = true; + } else if (II->doesNotThrow()) { + if (II->use_empty() && II->onlyReadsMemory()) { + // jump to the normal destination branch. + BranchInst::Create(II->getNormalDest(), II); + II->getUnwindDest()->removePredecessor(II->getParent()); + II->eraseFromParent(); + } else + changeToCall(II); + Changed = true; + } + } + + Changed |= ConstantFoldTerminator(BB, true); for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) if (Reachable.insert(*SI)) Worklist.push_back(*SI); } while (!Worklist.empty()); + return Changed; +} +/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even +/// if they are in a dead cycle. Return true if a change was made, false +/// otherwise. +bool llvm::removeUnreachableBlocks(Function &F) { + SmallPtrSet<BasicBlock*, 128> Reachable; + bool Changed = markAliveBlocks(F.begin(), Reachable); + + // If there are unreachable blocks in the CFG... if (Reachable.size() == F.size()) - return false; + return Changed; assert(Reachable.size() < F.size()); - for (Function::iterator I = llvm::next(F.begin()), E = F.end(); I != E; ++I) { - if (Reachable.count(I)) + NumRemoved += F.size()-Reachable.size(); + + // Loop over all of the basic blocks that are not reachable, dropping all of + // their internal references... + for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) { + if (Reachable.count(BB)) continue; - for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI) + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) if (Reachable.count(*SI)) - (*SI)->removePredecessor(I); - I->dropAllReferences(); + (*SI)->removePredecessor(BB); + BB->dropAllReferences(); } - for (Function::iterator I = llvm::next(F.begin()), E=F.end(); I != E;) + for (Function::iterator I = ++F.begin(); I != F.end();) if (!Reachable.count(I)) I = F.getBasicBlockList().erase(I); else diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index cb581b3..162807d 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -90,7 +90,8 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, // Move all definitions in the successor to the predecessor... OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList()); - std::string OldName = BB->getName(); + // OldName will be valid until erased. + StringRef OldName = BB->getName(); // Erase basic block from the function... @@ -102,12 +103,13 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, } } LI->removeBlock(BB); - BB->eraseFromParent(); // Inherit predecessor's name if it exists... if (!OldName.empty() && !OnlyPred->hasName()) OnlyPred->setName(OldName); + BB->eraseFromParent(); + return OnlyPred; } @@ -239,8 +241,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, DEBUG(dbgs() << "!\n"); } - std::vector<BasicBlock*> LoopBlocks = L->getBlocks(); - bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp index 4aee8ff..e017f50 100644 --- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp +++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp @@ -29,7 +29,7 @@ using namespace llvm; -STATISTIC(IfHandled, "Number of 'expect' intrinsic intructions handled"); +STATISTIC(IfHandled, "Number of 'expect' intrinsic instructions handled"); static cl::opt<uint32_t> LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64), diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index f66b54d..9799a30 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -346,7 +346,6 @@ splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) { // Scan all of the uses and see if the live range is live across an unwind // edge. If we find a use live across an invoke edge, create an alloca // and spill the value. - std::set<InvokeInst*> InvokesWithStoreInserted; // Find all of the blocks that this value is live in. std::set<BasicBlock*> LiveBBs; diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 955b853..2d2a8a5 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -66,6 +66,18 @@ namespace { BasicBlock* OrigBlock, BasicBlock* Default); unsigned Clusterify(CaseVector& Cases, SwitchInst *SI); }; + + /// The comparison function for sorting the switch case values in the vector. + /// WARNING: Case ranges should be disjoint! + struct CaseCmp { + bool operator () (const LowerSwitch::CaseRange& C1, + const LowerSwitch::CaseRange& C2) { + + const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); + const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); + return CI1->getValue().slt(CI2->getValue()); + } + }; } char LowerSwitch::ID = 0; @@ -147,7 +159,7 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, Function::iterator FI = OrigBlock; F->getBasicBlockList().insert(++FI, NewNode); - ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_ULT, + ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, Val, Pivot.Low, "Pivot"); NewNode->getInstList().push_back(Comp); BranchInst::Create(LBranch, RBranch, Comp, NewNode); @@ -222,34 +234,40 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, // Clusterify - Transform simple list of Cases into list of CaseRange's unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { - - IntegersSubsetToBB TheClusterifier; + unsigned numCmps = 0; // Start with "simple" cases - for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); - i != e; ++i) { - BasicBlock *SuccBB = i.getCaseSuccessor(); - IntegersSubset CaseRanges = i.getCaseValueEx(); - TheClusterifier.add(CaseRanges, SuccBB); - } - - TheClusterifier.optimize(); + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) + Cases.push_back(CaseRange(i.getCaseValue(), i.getCaseValue(), + i.getCaseSuccessor())); - size_t numCmps = 0; - for (IntegersSubsetToBB::RangeIterator i = TheClusterifier.begin(), - e = TheClusterifier.end(); i != e; ++i, ++numCmps) { - IntegersSubsetToBB::Cluster &C = *i; - - // FIXME: Currently work with ConstantInt based numbers. - // Changing it to APInt based is a pretty heavy for this commit. - Cases.push_back(CaseRange(C.first.getLow().toConstantInt(), - C.first.getHigh().toConstantInt(), C.second)); - if (C.first.isSingleNumber()) + std::sort(Cases.begin(), Cases.end(), CaseCmp()); + + // Merge case into clusters + if (Cases.size()>=2) + for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) { + int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue(); + int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue(); + BasicBlock* nextBB = J->BB; + BasicBlock* currentBB = I->BB; + + // If the two neighboring cases go to the same destination, merge them + // into a single case. + if ((nextValue-currentValue==1) && (currentBB == nextBB)) { + I->High = J->High; + J = Cases.erase(J); + } else { + I = J++; + } + } + + for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { + if (I->Low != I->High) // A range counts double, since it requires two compares. ++numCmps; } - return numCmps; + return numCmps; } // processSwitchInst - Replace the specified switch instruction with a sequence diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp index ebd7db6..61b3965 100644 --- a/lib/Transforms/Utils/Mem2Reg.cpp +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -16,7 +16,6 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" @@ -28,7 +27,6 @@ STATISTIC(NumPromoted, "Number of alloca's promoted"); namespace { struct PromotePass : public FunctionPass { static char ID; // Pass identification, replacement for typeid - PromotePass() : FunctionPass(ID) { initializePromotePassPass(*PassRegistry::getPassRegistry()); } @@ -64,7 +62,6 @@ bool PromotePass::runOnFunction(Function &F) { bool Changed = false; DominatorTree &DT = getAnalysis<DominatorTree>(); - const DataLayout *DL = getAnalysisIfAvailable<DataLayout>(); while (1) { Allocas.clear(); @@ -73,12 +70,12 @@ bool PromotePass::runOnFunction(Function &F) { // the entry node for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? - if (isAllocaPromotable(AI, DL)) + if (isAllocaPromotable(AI)) Allocas.push_back(AI); if (Allocas.empty()) break; - PromoteMemToReg(Allocas, DT, DL); + PromoteMemToReg(Allocas, DT); NumPromoted += Allocas.size(); Changed = true; } diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 6910180..8f6eee3 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -30,7 +30,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -46,7 +45,6 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" -#include "llvm/InstVisitor.h" #include "llvm/Support/CFG.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> @@ -58,16 +56,56 @@ STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); -namespace { +bool llvm::isAllocaPromotable(const AllocaInst *AI) { + // FIXME: If the memory unit is of pointer or integer type, we can permit + // assignments to subsections of the memory unit. + + // Only allow direct and non-volatile loads and stores... + for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE; ++UI) { // Loop over all of the uses of the alloca + const User *U = *UI; + if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { + // Note that atomic loads can be transformed; atomic semantics do + // not have any meaning for a local alloca. + if (LI->isVolatile()) + return false; + } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (SI->getOperand(0) == AI) + return false; // Don't allow a store OF the AI, only INTO the AI. + // Note that atomic stores can be transformed; atomic semantics do + // not have any meaning for a local alloca. + if (SI->isVolatile()) + return false; + } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) + return false; + } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + if (BCI->getType() != Type::getInt8PtrTy(U->getContext())) + return false; + if (!onlyUsedByLifetimeMarkers(BCI)) + return false; + } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { + if (GEPI->getType() != Type::getInt8PtrTy(U->getContext())) + return false; + if (!GEPI->hasAllZeroIndices()) + return false; + if (!onlyUsedByLifetimeMarkers(GEPI)) + return false; + } else { + return false; + } + } -struct AllocaInfo : private InstVisitor<AllocaInfo, bool> { - const DataLayout *DL; + return true; +} + +namespace { +struct AllocaInfo { SmallVector<BasicBlock *, 32> DefiningBlocks; SmallVector<BasicBlock *, 32> UsingBlocks; - SmallVector<Instruction *, 8> DeadInsts; - Type *AllocaTy; StoreInst *OnlyStore; BasicBlock *OnlyBlock; bool OnlyUsedInOneBlock; @@ -75,13 +113,9 @@ struct AllocaInfo : private InstVisitor<AllocaInfo, bool> { Value *AllocaPointerVal; DbgDeclareInst *DbgDeclare; - AllocaInfo(const DataLayout *DL) : DL(DL) {} - void clear() { DefiningBlocks.clear(); UsingBlocks.clear(); - DeadInsts.clear(); - AllocaTy = 0; OnlyStore = 0; OnlyBlock = 0; OnlyUsedInOneBlock = true; @@ -91,116 +125,39 @@ struct AllocaInfo : private InstVisitor<AllocaInfo, bool> { /// Scan the uses of the specified alloca, filling in the AllocaInfo used /// by the rest of the pass to reason about the uses of this alloca. - bool analyzeAlloca(AllocaInst &AI) { + void AnalyzeAlloca(AllocaInst *AI) { clear(); - AllocaTy = AI.getAllocatedType(); - enqueueUsers(AI); - - // Walk queued up uses in the worklist to handle nested uses. - while (!UseWorklist.empty()) { - U = UseWorklist.pop_back_val(); - Instruction &I = *cast<Instruction>(U->getUser()); - if (!visit(I)) - return false; // Propagate failure to promote up. + // As we scan the uses of the alloca instruction, keep track of stores, + // and decide whether all of the loads and stores to the alloca are within + // the same basic block. + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E;) { + Instruction *User = cast<Instruction>(*UI++); + + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { + // Remember the basic blocks which define new values for the alloca + DefiningBlocks.push_back(SI->getParent()); + AllocaPointerVal = SI->getOperand(0); + OnlyStore = SI; + } else { + LoadInst *LI = cast<LoadInst>(User); + // Otherwise it must be a load instruction, keep track of variable + // reads. + UsingBlocks.push_back(LI->getParent()); + AllocaPointerVal = LI; + } if (OnlyUsedInOneBlock) { if (OnlyBlock == 0) - OnlyBlock = I.getParent(); - else if (OnlyBlock != I.getParent()) + OnlyBlock = User->getParent(); + else if (OnlyBlock != User->getParent()) OnlyUsedInOneBlock = false; } } - DbgDeclare = FindAllocaDbgDeclare(&AI); - return true; - } - -private: - // Befriend the base class so it can call through private visitor methods. - friend class InstVisitor<AllocaInfo, bool>; - - /// \brief A use pointer that is non-null when visiting uses. - Use *U; - - /// \brief A worklist for recursively visiting all uses of an alloca. - SmallVector<Use *, 8> UseWorklist; - - /// \brief A set for preventing cyclic visitation. - SmallPtrSet<Use *, 8> VisitedUses; - - void enqueueUsers(Instruction &I) { - for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; - ++UI) - if (VisitedUses.insert(&UI.getUse())) - UseWorklist.push_back(&UI.getUse()); + DbgDeclare = FindAllocaDbgDeclare(AI); } - - bool visitLoadInst(LoadInst &LI) { - if (LI.isVolatile() || LI.getType() != AllocaTy) - return false; - - // Keep track of variable reads. - UsingBlocks.push_back(LI.getParent()); - AllocaPointerVal = &LI; - return true; - } - - bool visitStoreInst(StoreInst &SI) { - if (SI.isVolatile() || SI.getValueOperand() == U->get() || - SI.getValueOperand()->getType() != AllocaTy) - return false; - - // Remember the basic blocks which define new values for the alloca - DefiningBlocks.push_back(SI.getParent()); - AllocaPointerVal = SI.getOperand(0); - OnlyStore = &SI; - return true; - } - - bool visitBitCastInst(BitCastInst &BC) { - if (BC.use_empty()) - DeadInsts.push_back(&BC); - else - enqueueUsers(BC); - return true; - } - - bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { - if (GEPI.use_empty()) { - DeadInsts.push_back(&GEPI); - return true; - } - - enqueueUsers(GEPI); - - return GEPI.hasAllZeroIndices(); - } - - // We can promote through debug info intrinsics as they don't alter the - // value stored in memory. - bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) { - DeadInsts.push_back(&I); - return true; - } - - bool visitIntrinsicInst(IntrinsicInst &II) { - switch (II.getIntrinsicID()) { - default: - return false; - - // Lifetime intrinsics don't preclude promoting the memory to a register. - // FIXME: We should use these to promote to undef when outside of a valid - // lifetime. - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - DeadInsts.push_back(&II); - return true; - } - } - - // The fallback is that the alloca cannot be promoted. - bool visitInstruction(Instruction &I) { return false; } }; // Data package used by RenamePass() @@ -278,7 +235,6 @@ struct PromoteMem2Reg { std::vector<AllocaInst *> Allocas; DominatorTree &DT; DIBuilder DIB; - const DataLayout *DL; /// An AliasSetTracker object to update. If null, don't update it. AliasSetTracker *AST; @@ -324,9 +280,9 @@ struct PromoteMem2Reg { public: PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, - const DataLayout *DL, AliasSetTracker *AST) + AliasSetTracker *AST) : Allocas(Allocas.begin(), Allocas.end()), DT(DT), - DIB(*DT.getRoot()->getParent()->getParent()), DL(DL), AST(AST) {} + DIB(*DT.getRoot()->getParent()->getParent()), AST(AST) {} void run(); @@ -357,39 +313,27 @@ private: } // end of anonymous namespace -/// \brief Walk a small vector of dead instructions and recursively remove them -/// and subsequently dead instructions. -/// -/// This is only valid to call on dead instructions using an alloca which is -/// promotable, as we leverage that assumption to delete them faster. -static void removeDeadInstructions(AllocaInst *AI, - SmallVectorImpl<Instruction *> &DeadInsts) { - while (!DeadInsts.empty()) { - Instruction *I = DeadInsts.pop_back_val(); - - // Don't delete the alloca itself. - if (I == AI) - continue; +static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { + // Knowing that this alloca is promotable, we know that it's safe to kill all + // instructions except for load and store. - // Note that we open code the deletion algorithm here because we know - // apriori that all of the instructions using an alloca that reaches here - // are trivially dead when their use list becomes empty (The only risk are - // lifetime markers which we specifically want to nuke). By coding it here - // we can skip the triviality test and be more efficient. - // - // Null out all of the instruction's operands to see if any operand becomes - // dead as we go. - for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; - ++OI) { - Instruction *Op = dyn_cast<Instruction>(*OI); - if (!Op) - continue; - - OI->set(0); - if (!Op->use_empty()) - continue; + for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE;) { + Instruction *I = cast<Instruction>(*UI); + ++UI; + if (isa<LoadInst>(I) || isa<StoreInst>(I)) + continue; - DeadInsts.push_back(Op); + if (!I->getType()->isVoidTy()) { + // The only users of this bitcast/GEP instruction are lifetime intrinsics. + // Follow the use/def chain to erase them now instead of leaving it for + // dead code elimination later. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE;) { + Instruction *Inst = cast<Instruction>(*UI); + ++UI; + Inst->eraseFromParent(); + } } I->eraseFromParent(); } @@ -474,6 +418,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, DIBuilder DIB(*AI->getParent()->getParent()->getParent()); ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB); DDI->eraseFromParent(); + LBI.deleteValue(DDI); } // Remove the (now dead) store and alloca. Info.OnlyStore->eraseFromParent(); @@ -486,16 +431,6 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, return true; } -namespace { -/// This is a helper predicate used to search by the first element of a pair. -struct StoreIndexSearchPredicate { - bool operator()(const std::pair<unsigned, StoreInst *> &LHS, - const std::pair<unsigned, StoreInst *> &RHS) { - return LHS.first < RHS.first; - } -}; -} - /// Many allocas are only used within a single basic block. If this is the /// case, avoid traversing the CFG and inserting a lot of potentially useless /// PHI nodes by just performing a single linear pass over the basic block @@ -528,8 +463,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, // Sort the stores by their index, making it efficient to do a lookup with a // binary search. - std::sort(StoresByIndex.begin(), StoresByIndex.end(), - StoreIndexSearchPredicate()); + std::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first()); // Walk all of the loads from this alloca, replacing them with the nearest // store above them, if any. @@ -544,7 +478,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, StoresByIndexTy::iterator I = std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), std::make_pair(LoadIdx, static_cast<StoreInst *>(0)), - StoreIndexSearchPredicate()); + less_first()); if (I == StoresByIndex.begin()) // If there is no store before this load, the load takes the undef value. @@ -577,8 +511,10 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, LBI.deleteValue(AI); // The alloca's debuginfo can be removed as well. - if (DbgDeclareInst *DDI = Info.DbgDeclare) + if (DbgDeclareInst *DDI = Info.DbgDeclare) { DDI->eraseFromParent(); + LBI.deleteValue(DDI); + } ++NumLocalPromoted; } @@ -590,23 +526,17 @@ void PromoteMem2Reg::run() { PointerAllocaValues.resize(Allocas.size()); AllocaDbgDeclares.resize(Allocas.size()); - AllocaInfo Info(DL); + AllocaInfo Info; LargeBlockInfo LBI; for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { AllocaInst *AI = Allocas[AllocaNum]; + assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); - // Calculate the set of read and write-locations for each alloca. This is - // analogous to finding the 'uses' and 'definitions' of each variable. - bool Good = Info.analyzeAlloca(*AI); - (void)Good; - assert(Good && "Cannot promote non-promotable alloca!"); - - // Nuke all of the dead instructions. - removeDeadInstructions(AI, Info.DeadInsts); + removeLifetimeIntrinsicUsers(AI); if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. @@ -620,6 +550,10 @@ void PromoteMem2Reg::run() { continue; } + // Calculate the set of read and write-locations for each alloca. This is + // analogous to finding the 'uses' and 'definitions' of each variable. + Info.AnalyzeAlloca(AI); + // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { @@ -904,16 +838,6 @@ void PromoteMem2Reg::ComputeLiveInBlocks( } } -namespace { -typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair; - -struct DomTreeNodeCompare { - bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) { - return LHS.second < RHS.second; - } -}; -} // end anonymous namespace - /// At this point, we're committed to promoting the alloca using IDF's, and the /// standard SSA construction algorithm. Determine which blocks need phi nodes /// and see if we can optimize out some work by avoiding insertion of dead phi @@ -931,9 +855,9 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, // Use a priority queue keyed on dominator tree level so that inserted nodes // are handled from the bottom of the dominator tree upwards. - typedef std::priority_queue<DomTreeNodePair, - SmallVector<DomTreeNodePair, 32>, - DomTreeNodeCompare> IDFPriorityQueue; + typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair; + typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>, + less_second> IDFPriorityQueue; IDFPriorityQueue PQ; for (SmallPtrSet<BasicBlock *, 32>::const_iterator I = DefBlocks.begin(), @@ -1145,19 +1069,11 @@ NextIteration: goto NextIteration; } -bool llvm::isAllocaPromotable(const AllocaInst *AI, const DataLayout *DL) { - // We cast away constness because we re-use the non-const analysis that the - // actual promotion routine uses. While it is non-const, it doesn't actually - // mutate anything at this phase, and we discard the non-const results that - // promotion uses to mutate the alloca. - return AllocaInfo(DL).analyzeAlloca(*const_cast<AllocaInst *>(AI)); -} - void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, - const DataLayout *DL, AliasSetTracker *AST) { + AliasSetTracker *AST) { // If there is nothing to do, bail out... if (Allocas.empty()) return; - PromoteMem2Reg(Allocas, DT, DL, AST).run(); + PromoteMem2Reg(Allocas, DT, AST).run(); } diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index fc85ef3..30adbfa 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -63,7 +63,7 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { } static bool IsEquivalentPHI(PHINode *PHI, - DenseMap<BasicBlock*, Value*> &ValueMapping) { + SmallDenseMap<BasicBlock*, Value*, 8> &ValueMapping) { unsigned PHINumValues = PHI->getNumIncomingValues(); if (PHINumValues != ValueMapping.size()) return false; @@ -136,8 +136,8 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // Otherwise, we do need a PHI: check to see if we already have one available // in this block that produces the right value. if (isa<PHINode>(BB->begin())) { - DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(), - PredValues.end()); + SmallDenseMap<BasicBlock*, Value*, 8> ValueMapping(PredValues.begin(), + PredValues.end()); PHINode *SomePHI; for (BasicBlock::iterator It = BB->begin(); (SomePHI = dyn_cast<PHINode>(It)); ++It) { diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index c4c1423..ff50b12 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -475,9 +476,13 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { CV = ICI->getOperand(0); // Unwrap any lossless ptrtoint cast. - if (TD && CV && CV->getType() == TD->getIntPtrType(CV->getContext())) - if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) - CV = PTII->getOperand(0); + if (TD && CV) { + if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) { + Value *Ptr = PTII->getPointerOperand(); + if (PTII->getType() == TD->getIntPtrType(Ptr->getType())) + CV = Ptr; + } + } return CV; } @@ -699,9 +704,10 @@ namespace { }; } -static int ConstantIntSortPredicate(const void *P1, const void *P2) { - const ConstantInt *LHS = *(const ConstantInt*const*)P1; - const ConstantInt *RHS = *(const ConstantInt*const*)P2; +static int ConstantIntSortPredicate(ConstantInt *const *P1, + ConstantInt *const *P2) { + const ConstantInt *LHS = *P1; + const ConstantInt *RHS = *P2; if (LHS->getValue().ult(RHS->getValue())) return 1; if (LHS->getValue() == RHS->getValue()) @@ -924,7 +930,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // Convert pointer to int before we switch. if (CV->getType()->isPointerTy()) { assert(TD && "Cannot switch on pointer without DataLayout"); - CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()), + CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getType()), "magicptr"); } @@ -1556,6 +1562,19 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { return true; } +/// \returns True if this block contains a CallInst with the NoDuplicate +/// attribute. +static bool HasNoDuplicateCall(const BasicBlock *BB) { + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + const CallInst *CI = dyn_cast<CallInst>(I); + if (!CI) + continue; + if (CI->cannotDuplicate()) + return true; + } + return false; +} + /// BlockIsSimpleEnoughToThreadThrough - Return true if we can thread a branch /// across this block. static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { @@ -1603,6 +1622,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) { // Now we know that this block has multiple preds and two succs. if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false; + if (HasNoDuplicateCall(BB)) return false; + // Okay, this is a simple enough basic block. See if any phi values are // constants. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { @@ -2069,14 +2090,19 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Ensure that any values used in the bonus instruction are also used // by the terminator of the predecessor. This means that those values // must already have been resolved, so we won't be inhibiting the - // out-of-order core by speculating them earlier. - if (BonusInst) { + // out-of-order core by speculating them earlier. We also allow + // instructions that are used by the terminator's condition because it + // exposes more merging opportunities. + bool UsedByBranch = (BonusInst && BonusInst->hasOneUse() && + *BonusInst->use_begin() == Cond); + + if (BonusInst && !UsedByBranch) { // Collect the values used by the bonus inst SmallPtrSet<Value*, 4> UsedValues; for (Instruction::op_iterator OI = BonusInst->op_begin(), OE = BonusInst->op_end(); OI != OE; ++OI) { Value *V = *OI; - if (!isa<Constant>(V)) + if (!isa<Constant>(V) && !isa<Argument>(V)) UsedValues.insert(V); } @@ -2787,7 +2813,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD, if (CompVal->getType()->isPointerTy()) { assert(TD && "Cannot switch on pointer without DataLayout"); CompVal = Builder.CreatePtrToInt(CompVal, - TD->getIntPtrType(CompVal->getContext()), + TD->getIntPtrType(CompVal->getType()), "magicptr"); } @@ -3160,7 +3186,7 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { /// and use it to remove dead cases. static bool EliminateDeadSwitchCases(SwitchInst *SI) { Value *Cond = SI->getCondition(); - unsigned Bits = cast<IntegerType>(Cond->getType())->getBitWidth(); + unsigned Bits = Cond->getType()->getIntegerBitWidth(); APInt KnownZero(Bits, 0), KnownOne(Bits, 0); ComputeMaskedBits(Cond, KnownZero, KnownOne); @@ -3303,28 +3329,10 @@ static Constant *LookupConstant(Value *V, /// simple instructions such as binary operations where both operands are /// constant or can be replaced by constants from the ConstantPool. Returns the /// resulting constant on success, 0 otherwise. -static Constant *ConstantFold(Instruction *I, - const SmallDenseMap<Value*, Constant*>& ConstantPool) { - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { - Constant *A = LookupConstant(BO->getOperand(0), ConstantPool); - if (!A) - return 0; - Constant *B = LookupConstant(BO->getOperand(1), ConstantPool); - if (!B) - return 0; - return ConstantExpr::get(BO->getOpcode(), A, B); - } - - if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) { - Constant *A = LookupConstant(I->getOperand(0), ConstantPool); - if (!A) - return 0; - Constant *B = LookupConstant(I->getOperand(1), ConstantPool); - if (!B) - return 0; - return ConstantExpr::getCompare(Cmp->getPredicate(), A, B); - } - +static Constant * +ConstantFold(Instruction *I, + const SmallDenseMap<Value *, Constant *> &ConstantPool, + const DataLayout *DL) { if (SelectInst *Select = dyn_cast<SelectInst>(I)) { Constant *A = LookupConstant(Select->getCondition(), ConstantPool); if (!A) @@ -3336,14 +3344,19 @@ static Constant *ConstantFold(Instruction *I, return 0; } - if (CastInst *Cast = dyn_cast<CastInst>(I)) { - Constant *A = LookupConstant(I->getOperand(0), ConstantPool); - if (!A) + SmallVector<Constant *, 4> COps; + for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) { + if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool)) + COps.push_back(A); + else return 0; - return ConstantExpr::getCast(Cast->getOpcode(), A, Cast->getDestTy()); } - return 0; + if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) + return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0], + COps[1], DL); + + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), COps, DL); } /// GetCaseResults - Try to determine the resulting constant values in phi nodes @@ -3355,7 +3368,8 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, - SmallVectorImpl<std::pair<PHINode*,Constant*> > &Res) { + SmallVectorImpl<std::pair<PHINode *, Constant *> > &Res, + const DataLayout *DL) { // The block from which we enter the common destination. BasicBlock *Pred = SI->getParent(); @@ -3374,7 +3388,7 @@ GetCaseResults(SwitchInst *SI, } else if (isa<DbgInfoIntrinsic>(I)) { // Skip debug intrinsic. continue; - } else if (Constant *C = ConstantFold(I, ConstantPool)) { + } else if (Constant *C = ConstantFold(I, ConstantPool, DL)) { // Instruction is side-effect free and constant. ConstantPool.insert(std::make_pair(I, C)); } else { @@ -3698,7 +3712,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy; ResultsTy Results; if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest, - Results)) + Results, TD)) return false; // Append the result from this case to the list for each phi. @@ -3712,7 +3726,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, // Get the resulting values for the default case. SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList; if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest, - DefaultResultsList)) + DefaultResultsList, TD)) return false; for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) { PHINode *PHI = DefaultResultsList[I].first; @@ -3733,14 +3747,32 @@ static bool SwitchToLookupTable(SwitchInst *SI, CommonDest->getParent(), CommonDest); - // Check whether the condition value is within the case range, and branch to - // the new BB. + // Compute the table index value. Builder.SetInsertPoint(SI); Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal, "switch.tableidx"); - Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get( - MinCaseVal->getType(), TableSize)); - Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + + // Compute the maximum table size representable by the integer type we are + // switching upon. + unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits(); + uint64_t MaxTableSize = CaseSize > 63? UINT64_MAX : 1ULL << CaseSize; + assert(MaxTableSize >= TableSize && + "It is impossible for a switch to have more entries than the max " + "representable value of its input integer type's size."); + + // If we have a fully covered lookup table, unconditionally branch to the + // lookup table BB. Otherwise, check if the condition value is within the case + // range. If it is so, branch to the new BB. Otherwise branch to SI's default + // destination. + const bool GeneratingCoveredLookupTable = MaxTableSize == TableSize; + if (GeneratingCoveredLookupTable) { + Builder.CreateBr(LookupBB); + SI->getDefaultDest()->removePredecessor(SI->getParent()); + } else { + Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get( + MinCaseVal->getType(), TableSize)); + Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + } // Populate the BB that does the lookups. Builder.SetInsertPoint(LookupBB); @@ -3769,9 +3801,11 @@ static bool SwitchToLookupTable(SwitchInst *SI, Builder.CreateBr(CommonDest); // Remove the switch. - for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) { + for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { BasicBlock *Succ = SI->getSuccessor(i); - if (Succ == SI->getDefaultDest()) continue; + + if (Succ == SI->getDefaultDest()) + continue; Succ->removePredecessor(SI->getParent()); } SI->eraseFromParent(); diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 094c201..15b3e66 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -17,6 +17,7 @@ #include "llvm/Transforms/Utils/SimplifyLibCalls.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Triple.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -26,11 +27,16 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" using namespace llvm; +static cl::opt<bool> +ColdErrorCalls("error-reporting-is-cold", cl::init(true), + cl::Hidden, cl::desc("Treat error-reporting calls as cold")); + /// This class is the abstract base class for the set of optimizations that /// corresponds to one library call. namespace { @@ -118,6 +124,21 @@ static bool callHasFloatingPointArgument(const CallInst *CI) { return false; } +/// \brief Check whether the overloaded unary floating point function +/// corresponing to \a Ty is available. +static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, + LibFunc::Func DoubleFn, LibFunc::Func FloatFn, + LibFunc::Func LongDoubleFn) { + switch (Ty->getTypeID()) { + case Type::FloatTyID: + return TLI->has(FloatFn); + case Type::DoubleTyID: + return TLI->has(DoubleFn); + default: + return TLI->has(LongDoubleFn); + } +} + //===----------------------------------------------------------------------===// // Fortified Library Call Optimizations //===----------------------------------------------------------------------===// @@ -477,7 +498,7 @@ struct StrChrOpt : public LibCallOptimization { // Compute the offset, make sure to handle the case when we're searching for // zero (a weird way to spell strlen). - size_t I = CharC->getSExtValue() == 0 ? + size_t I = (0xFF & CharC->getSExtValue()) == 0 ? Str.size() : Str.find(CharC->getSExtValue()); if (I == StringRef::npos) // Didn't find the char. strchr returns null. return Constant::getNullValue(CI->getType()); @@ -513,7 +534,7 @@ struct StrRChrOpt : public LibCallOptimization { } // Compute the offset. - size_t I = CharC->getSExtValue() == 0 ? + size_t I = (0xFF & CharC->getSExtValue()) == 0 ? Str.size() : Str.rfind(CharC->getSExtValue()); if (I == StringRef::npos) // Didn't find the char. Return null. return Constant::getNullValue(CI->getType()); @@ -774,7 +795,7 @@ struct StrPBrkOpt : public LibCallOptimization { // Constant folding. if (HasS1 && HasS2) { size_t I = S1.find_first_of(S2); - if (I == std::string::npos) // No match. + if (I == StringRef::npos) // No match. return Constant::getNullValue(CI->getType()); return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk"); @@ -912,7 +933,7 @@ struct StrStrOpt : public LibCallOptimization { // If both strings are known, constant fold it. if (HasStr1 && HasStr2) { - std::string::size_type Offset = SearchStr.find(ToFindStr); + size_t Offset = SearchStr.find(ToFindStr); if (Offset == StringRef::npos) // strstr("foo", "bar") -> null return Constant::getNullValue(CI->getType()); @@ -1031,7 +1052,7 @@ struct MemSetOpt : public LibCallOptimization { if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isIntegerTy() || - FT->getParamType(2) != TD->getIntPtrType(*Context)) + FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0))) return 0; // memset(p, v, n) -> llvm.memset(p, v, n, 1) @@ -1133,9 +1154,13 @@ struct PowOpt : public UnsafeFPLibCallOptimization { Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { - if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0 + // pow(1.0, x) -> 1.0 + if (Op1C->isExactlyValue(1.0)) return Op1C; - if (Op1C->isExactlyValue(2.0)) // pow(2.0, x) -> exp2(x) + // pow(2.0, x) -> exp2(x) + if (Op1C->isExactlyValue(2.0) && + hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f, + LibFunc::exp2l)) return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes()); } @@ -1145,7 +1170,11 @@ struct PowOpt : public UnsafeFPLibCallOptimization { if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 return ConstantFP::get(CI->getType(), 1.0); - if (Op2C->isExactlyValue(0.5)) { + if (Op2C->isExactlyValue(0.5) && + hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf, + LibFunc::sqrtl) && + hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::fabs, LibFunc::fabsf, + LibFunc::fabsl)) { // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). // This is faster than calling pow, and still handles negative zero // and negative infinity correctly. @@ -1178,7 +1207,7 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization { virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { Value *Ret = NULL; if (UnsafeFPShrink && Callee->getName() == "exp2" && - TLI->has(LibFunc::exp2)) { + TLI->has(LibFunc::exp2f)) { UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B); } @@ -1229,6 +1258,155 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization { } }; +struct SinCosPiOpt : public LibCallOptimization { + SinCosPiOpt() {} + + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Make sure the prototype is as expected, otherwise the rest of the + // function is probably invalid and likely to abort. + if (!isTrigLibCall(CI)) + return 0; + + Value *Arg = CI->getArgOperand(0); + SmallVector<CallInst *, 1> SinCalls; + SmallVector<CallInst *, 1> CosCalls; + SmallVector<CallInst *, 1> SinCosCalls; + + bool IsFloat = Arg->getType()->isFloatTy(); + + // Look for all compatible sinpi, cospi and sincospi calls with the same + // argument. If there are enough (in some sense) we can make the + // substitution. + for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); + UI != UE; ++UI) + classifyArgUse(*UI, CI->getParent(), IsFloat, SinCalls, CosCalls, + SinCosCalls); + + // It's only worthwhile if both sinpi and cospi are actually used. + if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty())) + return 0; + + Value *Sin, *Cos, *SinCos; + insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos, + SinCos); + + replaceTrigInsts(SinCalls, Sin); + replaceTrigInsts(CosCalls, Cos); + replaceTrigInsts(SinCosCalls, SinCos); + + return 0; + } + + bool isTrigLibCall(CallInst *CI) { + Function *Callee = CI->getCalledFunction(); + FunctionType *FT = Callee->getFunctionType(); + + // We can only hope to do anything useful if we can ignore things like errno + // and floating-point exceptions. + bool AttributesSafe = CI->hasFnAttr(Attribute::NoUnwind) && + CI->hasFnAttr(Attribute::ReadNone); + + // Other than that we need float(float) or double(double) + return AttributesSafe && FT->getNumParams() == 1 && + FT->getReturnType() == FT->getParamType(0) && + (FT->getParamType(0)->isFloatTy() || + FT->getParamType(0)->isDoubleTy()); + } + + void classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat, + SmallVectorImpl<CallInst *> &SinCalls, + SmallVectorImpl<CallInst *> &CosCalls, + SmallVectorImpl<CallInst *> &SinCosCalls) { + CallInst *CI = dyn_cast<CallInst>(Val); + + if (!CI) + return; + + Function *Callee = CI->getCalledFunction(); + StringRef FuncName = Callee->getName(); + LibFunc::Func Func; + if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func) || + !isTrigLibCall(CI)) + return; + + if (IsFloat) { + if (Func == LibFunc::sinpif) + SinCalls.push_back(CI); + else if (Func == LibFunc::cospif) + CosCalls.push_back(CI); + else if (Func == LibFunc::sincospi_stretf) + SinCosCalls.push_back(CI); + } else { + if (Func == LibFunc::sinpi) + SinCalls.push_back(CI); + else if (Func == LibFunc::cospi) + CosCalls.push_back(CI); + else if (Func == LibFunc::sincospi_stret) + SinCosCalls.push_back(CI); + } + } + + void replaceTrigInsts(SmallVectorImpl<CallInst*> &Calls, Value *Res) { + for (SmallVectorImpl<CallInst*>::iterator I = Calls.begin(), + E = Calls.end(); + I != E; ++I) { + LCS->replaceAllUsesWith(*I, Res); + } + } + + void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, + bool UseFloat, Value *&Sin, Value *&Cos, + Value *&SinCos) { + Type *ArgTy = Arg->getType(); + Type *ResTy; + StringRef Name; + + Triple T(OrigCallee->getParent()->getTargetTriple()); + if (UseFloat) { + Name = "__sincospi_stretf"; + + assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now"); + // x86_64 can't use {float, float} since that would be returned in both + // xmm0 and xmm1, which isn't what a real struct would do. + ResTy = T.getArch() == Triple::x86_64 + ? static_cast<Type *>(VectorType::get(ArgTy, 2)) + : static_cast<Type *>(StructType::get(ArgTy, ArgTy, NULL)); + } else { + Name = "__sincospi_stret"; + ResTy = StructType::get(ArgTy, ArgTy, NULL); + } + + Module *M = OrigCallee->getParent(); + Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(), + ResTy, ArgTy, NULL); + + if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) { + // If the argument is an instruction, it must dominate all uses so put our + // sincos call there. + BasicBlock::iterator Loc = ArgInst; + B.SetInsertPoint(ArgInst->getParent(), ++Loc); + } else { + // Otherwise (e.g. for a constant) the beginning of the function is as + // good a place as any. + BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock(); + B.SetInsertPoint(&EntryBB, EntryBB.begin()); + } + + SinCos = B.CreateCall(Callee, Arg, "sincospi"); + + if (SinCos->getType()->isStructTy()) { + Sin = B.CreateExtractValue(SinCos, 0, "sinpi"); + Cos = B.CreateExtractValue(SinCos, 1, "cospi"); + } else { + Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0), + "sinpi"); + Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1), + "cospi"); + } + } + +}; + //===----------------------------------------------------------------------===// // Integer Library Call Optimizations //===----------------------------------------------------------------------===// @@ -1333,6 +1511,54 @@ struct ToAsciiOpt : public LibCallOptimization { // Formatting and IO Library Call Optimizations //===----------------------------------------------------------------------===// +struct ErrorReportingOpt : public LibCallOptimization { + ErrorReportingOpt(int S = -1) : StreamArg(S) {} + + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &) { + // Error reporting calls should be cold, mark them as such. + // This applies even to non-builtin calls: it is only a hint and applies to + // functions that the frontend might not understand as builtins. + + // This heuristic was suggested in: + // Improving Static Branch Prediction in a Compiler + // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu + // Proceedings of PACT'98, Oct. 1998, IEEE + + if (!CI->hasFnAttr(Attribute::Cold) && isReportingError(Callee, CI)) { + CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold); + } + + return 0; + } + +protected: + bool isReportingError(Function *Callee, CallInst *CI) { + if (!ColdErrorCalls) + return false; + + if (!Callee || !Callee->isDeclaration()) + return false; + + if (StreamArg < 0) + return true; + + // These functions might be considered cold, but only if their stream + // argument is stderr. + + if (StreamArg >= (int) CI->getNumArgOperands()) + return false; + LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg)); + if (!LI) + return false; + GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand()); + if (!GV || !GV->isDeclaration()) + return false; + return GV->getName() == "stderr"; + } + + int StreamArg; +}; + struct PrintFOpt : public LibCallOptimization { Value *optimizeFixedFormatString(Function *Callee, CallInst *CI, IRBuilder<> &B) { @@ -1361,7 +1587,7 @@ struct PrintFOpt : public LibCallOptimization { // printf("foo\n") --> puts("foo") if (FormatStr[FormatStr.size()-1] == '\n' && - FormatStr.find('%') == std::string::npos) { // no format characters. + FormatStr.find('%') == StringRef::npos) { // No format characters. // Create a string literal with no \n on it. We expect the constant merge // pass to be run after this pass, to merge duplicate strings. FormatStr = FormatStr.drop_back(); @@ -1513,6 +1739,9 @@ struct SPrintFOpt : public LibCallOptimization { struct FPrintFOpt : public LibCallOptimization { Value *optimizeFixedFormatString(Function *Callee, CallInst *CI, IRBuilder<> &B) { + ErrorReportingOpt ER(/* StreamArg = */ 0); + (void) ER.callOptimizer(Callee, CI, B); + // All the optimizations depend on the format string. StringRef FormatStr; if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) @@ -1590,6 +1819,9 @@ struct FPrintFOpt : public LibCallOptimization { struct FWriteOpt : public LibCallOptimization { virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + ErrorReportingOpt ER(/* StreamArg = */ 3); + (void) ER.callOptimizer(Callee, CI, B); + // Require a pointer, an integer, an integer, a pointer, returning integer. FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() || @@ -1623,6 +1855,9 @@ struct FWriteOpt : public LibCallOptimization { struct FPutsOpt : public LibCallOptimization { virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + ErrorReportingOpt ER(/* StreamArg = */ 1); + (void) ER.callOptimizer(Callee, CI, B); + // These optimizations require DataLayout. if (!TD) return 0; @@ -1741,6 +1976,7 @@ static MemSetOpt MemSet; // Math library call optimizations. static UnaryDoubleFPOpt UnaryDoubleFP(false); static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); +static SinCosPiOpt SinCosPi; // Integer library call optimizations. static FFSOpt FFS; @@ -1750,6 +1986,9 @@ static IsAsciiOpt IsAscii; static ToAsciiOpt ToAscii; // Formatting and IO library call optimizations. +static ErrorReportingOpt ErrorReporting; +static ErrorReportingOpt ErrorReporting0(0); +static ErrorReportingOpt ErrorReporting1(1); static PrintFOpt PrintF; static SPrintFOpt SPrintF; static FPrintFOpt FPrintF; @@ -1825,6 +2064,11 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { case LibFunc::cos: case LibFunc::cosl: return &Cos; + case LibFunc::sinpif: + case LibFunc::sinpi: + case LibFunc::cospif: + case LibFunc::cospi: + return &SinCosPi; case LibFunc::powf: case LibFunc::pow: case LibFunc::powl: @@ -1859,6 +2103,13 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { return &FPuts; case LibFunc::puts: return &Puts; + case LibFunc::perror: + return &ErrorReporting; + case LibFunc::vfprintf: + case LibFunc::fiprintf: + return &ErrorReporting0; + case LibFunc::fputc: + return &ErrorReporting1; case LibFunc::ceil: case LibFunc::fabs: case LibFunc::floor: diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Transforms/Utils/SpecialCaseList.cpp index b98cb5b..2ef692c 100644 --- a/lib/Transforms/Utils/SpecialCaseList.cpp +++ b/lib/Transforms/Utils/SpecialCaseList.cpp @@ -49,29 +49,45 @@ struct SpecialCaseList::Entry { } }; -SpecialCaseList::SpecialCaseList(const StringRef Path) { - // Validate and open blacklist file. - if (Path.empty()) return; +SpecialCaseList::SpecialCaseList() : Entries() {} + +SpecialCaseList *SpecialCaseList::create( + const StringRef Path, std::string &Error) { + if (Path.empty()) + return new SpecialCaseList(); OwningPtr<MemoryBuffer> File; if (error_code EC = MemoryBuffer::getFile(Path, File)) { - report_fatal_error("Can't open blacklist file: " + Path + ": " + - EC.message()); + Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str(); + return 0; } + return create(File.get(), Error); +} - init(File.get()); +SpecialCaseList *SpecialCaseList::create( + const MemoryBuffer *MB, std::string &Error) { + OwningPtr<SpecialCaseList> SCL(new SpecialCaseList()); + if (!SCL->parse(MB, Error)) + return 0; + return SCL.take(); } -SpecialCaseList::SpecialCaseList(const MemoryBuffer *MB) { - init(MB); +SpecialCaseList *SpecialCaseList::createOrDie(const StringRef Path) { + std::string Error; + if (SpecialCaseList *SCL = create(Path, Error)) + return SCL; + report_fatal_error(Error); } -void SpecialCaseList::init(const MemoryBuffer *MB) { +bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) { // Iterate through each line in the blacklist file. SmallVector<StringRef, 16> Lines; SplitString(MB->getBuffer(), Lines, "\n\r"); StringMap<StringMap<std::string> > Regexps; + assert(Entries.empty() && + "parse() should be called on an empty SpecialCaseList"); + int LineNo = 1; for (SmallVectorImpl<StringRef>::iterator I = Lines.begin(), E = Lines.end(); - I != E; ++I) { + I != E; ++I, ++LineNo) { // Ignore empty lines and lines starting with "#" if (I->empty() || I->startswith("#")) continue; @@ -80,7 +96,9 @@ void SpecialCaseList::init(const MemoryBuffer *MB) { StringRef Prefix = SplitLine.first; if (SplitLine.second.empty()) { // Missing ':' in the line. - report_fatal_error("malformed blacklist line: " + SplitLine.first); + Error = (Twine("Malformed line ") + Twine(LineNo) + ": '" + + SplitLine.first + "'").str(); + return false; } std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("="); @@ -113,10 +131,11 @@ void SpecialCaseList::init(const MemoryBuffer *MB) { // Check that the regexp is valid. Regex CheckRE(Regexp); - std::string Error; - if (!CheckRE.isValid(Error)) { - report_fatal_error("malformed blacklist regex: " + SplitLine.second + - ": " + Error); + std::string REError; + if (!CheckRE.isValid(REError)) { + Error = (Twine("Malformed regex in line ") + Twine(LineNo) + ": '" + + SplitLine.second + "': " + REError).str(); + return false; } // Add this regexp into the proper group by its prefix. @@ -135,6 +154,7 @@ void SpecialCaseList::init(const MemoryBuffer *MB) { Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue()); } } + return true; } SpecialCaseList::~SpecialCaseList() { @@ -149,18 +169,12 @@ SpecialCaseList::~SpecialCaseList() { } } -bool SpecialCaseList::findCategory(const Function &F, - StringRef &Category) const { - return findCategory(*F.getParent(), Category) || - findCategory("fun", F.getName(), Category); -} - bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const { return isIn(*F.getParent(), Category) || inSectionCategory("fun", F.getName(), Category); } -static StringRef GetGVTypeString(const GlobalVariable &G) { +static StringRef GetGlobalTypeString(const GlobalValue &G) { // Types of GlobalVariables are always pointer types. Type *GType = G.getType()->getElementType(); // For now we support blacklisting struct types only. @@ -171,46 +185,29 @@ static StringRef GetGVTypeString(const GlobalVariable &G) { return "<unknown type>"; } -bool SpecialCaseList::findCategory(const GlobalVariable &G, - StringRef &Category) const { - return findCategory(*G.getParent(), Category) || - findCategory("global", G.getName(), Category) || - findCategory("type", GetGVTypeString(G), Category); -} - bool SpecialCaseList::isIn(const GlobalVariable &G, const StringRef Category) const { return isIn(*G.getParent(), Category) || inSectionCategory("global", G.getName(), Category) || - inSectionCategory("type", GetGVTypeString(G), Category); + inSectionCategory("type", GetGlobalTypeString(G), Category); } -bool SpecialCaseList::findCategory(const Module &M, StringRef &Category) const { - return findCategory("src", M.getModuleIdentifier(), Category); +bool SpecialCaseList::isIn(const GlobalAlias &GA, + const StringRef Category) const { + if (isIn(*GA.getParent(), Category)) + return true; + + if (isa<FunctionType>(GA.getType()->getElementType())) + return inSectionCategory("fun", GA.getName(), Category); + + return inSectionCategory("global", GA.getName(), Category) || + inSectionCategory("type", GetGlobalTypeString(GA), Category); } bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const { return inSectionCategory("src", M.getModuleIdentifier(), Category); } -bool SpecialCaseList::findCategory(const StringRef Section, - const StringRef Query, - StringRef &Category) const { - StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section); - if (I == Entries.end()) return false; - - for (StringMap<Entry>::const_iterator II = I->second.begin(), - IE = I->second.end(); - II != IE; ++II) { - if (II->getValue().match(Query)) { - Category = II->first(); - return true; - } - } - - return false; -} - bool SpecialCaseList::inSectionCategory(const StringRef Section, const StringRef Query, const StringRef Category) const { |