diff options
Diffstat (limited to 'lib/Transforms/Scalar/GVN.cpp')
-rw-r--r-- | lib/Transforms/Scalar/GVN.cpp | 516 |
1 files changed, 284 insertions, 232 deletions
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 7dba4e2..73a1f25 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -20,11 +20,12 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionTracker.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -44,7 +45,7 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -457,7 +458,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) { return e; } -/// lookup - Returns the value number of the specified value. Fails if +/// Returns the value number of the specified value. Fails if /// the value has not yet been numbered. uint32_t ValueTable::lookup(Value *V) const { DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); @@ -465,7 +466,7 @@ uint32_t ValueTable::lookup(Value *V) const { return VI->second; } -/// lookup_or_add_cmp - Returns the value number of the given comparison, +/// Returns the value number of the given comparison, /// assigning it a new number if it did not have one before. Useful when /// we deduced the result of a comparison, but don't immediately have an /// instruction realizing that comparison to hand. @@ -478,14 +479,14 @@ uint32_t ValueTable::lookup_or_add_cmp(unsigned Opcode, return e; } -/// clear - Remove all entries from the ValueTable. +/// Remove all entries from the ValueTable. void ValueTable::clear() { valueNumbering.clear(); expressionNumbering.clear(); nextValueNumber = 1; } -/// erase - Remove a value from the value numbering. +/// Remove a value from the value numbering. void ValueTable::erase(Value *V) { valueNumbering.erase(V); } @@ -581,8 +582,8 @@ namespace { return cast<MemIntrinsic>(Val.getPointer()); } - /// MaterializeAdjustedValue - Emit code into this block to adjust the value - /// defined here to the specified type. This handles various coercion cases. + /// Emit code into this block to adjust the value defined here to the + /// specified type. This handles various coercion cases. Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const; }; @@ -592,12 +593,12 @@ namespace { DominatorTree *DT; const DataLayout *DL; const TargetLibraryInfo *TLI; - AssumptionTracker *AT; + AssumptionCache *AC; SetVector<BasicBlock *> DeadBlocks; ValueTable VN; - /// LeaderTable - A mapping from value numbers to lists of Value*'s that + /// A mapping from value numbers to lists of Value*'s that /// have that value number. Use findLeader to query it. struct LeaderTableEntry { Value *Val; @@ -622,7 +623,7 @@ namespace { bool runOnFunction(Function &F) override; - /// markInstructionForDeletion - This removes the specified instruction from + /// This removes the specified instruction from /// our various maps and marks it for deletion. void markInstructionForDeletion(Instruction *I) { VN.erase(I); @@ -634,8 +635,7 @@ namespace { AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); } MemoryDependenceAnalysis &getMemDep() const { return *MD; } private: - /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for - /// its value number. + /// Push a new Value to the LeaderTable onto the list for its value number. void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) { LeaderTableEntry &Curr = LeaderTable[N]; if (!Curr.Val) { @@ -651,7 +651,7 @@ namespace { Curr.Next = Node; } - /// removeFromLeaderTable - Scan the list of values corresponding to a given + /// Scan the list of values corresponding to a given /// value number, and remove the given instruction if encountered. void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) { LeaderTableEntry* Prev = nullptr; @@ -682,9 +682,9 @@ namespace { // This transformation requires dominator postdominator info void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionTracker>(); + AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<TargetLibraryInfo>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); if (!NoLoads) AU.addRequired<MemoryDependenceAnalysis>(); AU.addRequired<AliasAnalysis>(); @@ -709,6 +709,9 @@ namespace { void dump(DenseMap<uint32_t, Value*> &d); bool iterateOnFunction(Function &F); bool performPRE(Function &F); + bool performScalarPRE(Instruction *I); + bool performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, + unsigned int ValNo); Value *findLeader(const BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; @@ -725,16 +728,16 @@ namespace { char GVN::ID = 0; } -// createGVNPass - The public interface to this file... +// The public interface to this file... FunctionPass *llvm::createGVNPass(bool NoLoads) { return new GVN(NoLoads); } INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) @@ -750,7 +753,7 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) { } #endif -/// IsValueFullyAvailableInBlock - Return true if we can prove that the value +/// Return true if we can prove that the value /// we're analyzing is fully available in the specified block. As we go, keep /// track of which blocks we know are fully alive in FullyAvailableBlocks. This /// map is actually a tri-state map with the following values: @@ -796,7 +799,7 @@ static bool IsValueFullyAvailableInBlock(BasicBlock *BB, return true; -// SpeculationFailure - If we get here, we found out that this is not, after +// If we get here, we found out that this is not, after // all, a fully-available block. We have a problem if we speculated on this and // used the speculation to mark other blocks as available. SpeculationFailure: @@ -831,8 +834,7 @@ SpeculationFailure: } -/// CanCoerceMustAliasedValueToLoad - Return true if -/// CoerceAvailableValueToLoadType will succeed. +/// Return true if CoerceAvailableValueToLoadType will succeed. static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, const DataLayout &DL) { @@ -851,7 +853,7 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, return true; } -/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and +/// If we saw a store of a value to memory, and /// then a load from a must-aliased pointer of a different type, try to coerce /// the stored value. LoadedTy is the type of the load we want to replace and /// InsertPt is the place to insert new instructions. @@ -936,7 +938,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); } -/// AnalyzeLoadFromClobberingWrite - This function is called when we have a +/// This function is called when we have a /// memdep query of a load that ends up being a clobbering memory write (store, /// memset, memcpy, memmove). This means that the write *may* provide bits used /// by the load but we can't be sure because the pointers don't mustalias. @@ -1016,7 +1018,7 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, return LoadOffset-StoreOffset; } -/// AnalyzeLoadFromClobberingStore - This function is called when we have a +/// This function is called when we have a /// memdep query of a load that ends up being a clobbering store. static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, StoreInst *DepSI, @@ -1032,7 +1034,7 @@ static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, StorePtr, StoreSize, DL); } -/// AnalyzeLoadFromClobberingLoad - This function is called when we have a +/// This function is called when we have a /// memdep query of a load that ends up being clobbered by another load. See if /// the other load can feed into the second load. static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, @@ -1108,7 +1110,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, } -/// GetStoreValueForLoad - This function is called when we have a +/// This function is called when we have a /// memdep query of a load that ends up being a clobbering store. This means /// that the store provides bits used by the load but we the pointers don't /// mustalias. Check this case to see if there is anything more we can do @@ -1147,7 +1149,7 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, DL); } -/// GetLoadValueForLoad - This function is called when we have a +/// This function is called when we have a /// memdep query of a load that ends up being a clobbering load. This means /// that the load *may* provide bits used by the load but we can't be sure /// because the pointers don't mustalias. Check this case to see if there is @@ -1210,7 +1212,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, } -/// GetMemInstValueForLoad - This function is called when we have a +/// This function is called when we have a /// memdep query of a load that ends up being a clobbering mem intrinsic. static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, Type *LoadTy, Instruction *InsertPt, @@ -1267,7 +1269,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, } -/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, +/// Given a set of loads specified by ValuesPerBlock, /// construct SSA form, allowing us to eliminate LI. This returns the value /// that should be used at LI's definition site. static Value *ConstructSSAForLoadSet(LoadInst *LI, @@ -1621,7 +1623,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // If all preds have a single successor, then we know it is safe to insert // the load on the pred (?!?), so we can insert code to materialize the // pointer if it is not available. - PHITransAddr Address(LI->getPointerOperand(), DL, AT); + PHITransAddr Address(LI->getPointerOperand(), DL, AC); Value *LoadPtr = nullptr; LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, *DT, NewInsts); @@ -1702,13 +1704,12 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, return true; } -/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are +/// Attempt to eliminate a load whose dependencies are /// non-local by performing PHI construction. bool GVN::processNonLocalLoad(LoadInst *LI) { // Step 1: Find the non-local dependencies of the load. LoadDepVect Deps; - AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); - MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps); + MD->getNonLocalPointerDependency(LI, Deps); // If we had to process more than one hundred blocks to find the // dependencies, this load isn't worth worrying about. Optimizing @@ -1729,6 +1730,15 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return false; } + // If this load follows a GEP, see if we can PRE the indices before analyzing. + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0))) { + for (GetElementPtrInst::op_iterator OI = GEP->idx_begin(), + OE = GEP->idx_end(); + OI != OE; ++OI) + if (Instruction *I = dyn_cast<Instruction>(OI->get())) + performScalarPRE(I); + } + // Step 2: Analyze the availability of the load AvailValInBlkVect ValuesPerBlock; UnavailBlkVect UnavailableBlocks; @@ -1807,7 +1817,7 @@ static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { I->replaceAllUsesWith(Repl); } -/// processLoad - Attempt to eliminate a load, first by eliminating it +/// Attempt to eliminate a load, first by eliminating it /// locally, and then attempting non-local elimination if that fails. bool GVN::processLoad(LoadInst *L) { if (!MD) @@ -2006,7 +2016,7 @@ bool GVN::processLoad(LoadInst *L) { return false; } -// findLeader - In order to find a leader for a given value number at a +// In order to find a leader for a given value number at a // specific basic block, we first obtain the list of all Values for that number, // and then scan the list to find one whose block dominates the block in // question. This is fast because dominator tree queries consist of only @@ -2034,9 +2044,8 @@ Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) { return Val; } -/// replaceAllDominatedUsesWith - Replace all uses of 'From' with 'To' if the -/// use is dominated by the given basic block. Returns the number of uses that -/// were replaced. +/// Replace all uses of 'From' with 'To' if the use is dominated by the given +/// basic block. Returns the number of uses that were replaced. unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To, const BasicBlockEdge &Root) { unsigned Count = 0; @@ -2052,7 +2061,7 @@ unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To, return Count; } -/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return +/// There is an edge from 'Src' to 'Dst'. Return /// true if every path from the entry block to 'Dst' passes via this edge. In /// particular 'Dst' must not be reachable via another edge from 'Src'. static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, @@ -2069,7 +2078,7 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, return Pred != nullptr; } -/// propagateEquality - The given values are known to be equal in every block +/// The given values are known to be equal in every block /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with /// 'RHS' everywhere in the scope. Returns whether a change was made. bool GVN::propagateEquality(Value *LHS, Value *RHS, @@ -2096,15 +2105,15 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, std::swap(LHS, RHS); assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!"); - // If there is no obvious reason to prefer the left-hand side over the right- - // hand side, ensure the longest lived term is on the right-hand side, so the - // shortest lived term will be replaced by the longest lived. This tends to - // expose more simplifications. + // If there is no obvious reason to prefer the left-hand side over the + // right-hand side, ensure the longest lived term is on the right-hand side, + // so the shortest lived term will be replaced by the longest lived. + // This tends to expose more simplifications. uint32_t LVN = VN.lookup_or_add(LHS); if ((isa<Argument>(LHS) && isa<Argument>(RHS)) || (isa<Instruction>(LHS) && isa<Instruction>(RHS))) { - // Move the 'oldest' value to the right-hand side, using the value number as - // a proxy for age. + // Move the 'oldest' value to the right-hand side, using the value number + // as a proxy for age. uint32_t RVN = VN.lookup_or_add(RHS); if (LVN < RVN) { std::swap(LHS, RHS); @@ -2133,10 +2142,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, NumGVNEqProp += NumReplacements; } - // Now try to deduce additional equalities from this one. For example, if the - // known equality was "(A != B)" == "false" then it follows that A and B are - // equal in the scope. Only boolean equalities with an explicit true or false - // RHS are currently supported. + // Now try to deduce additional equalities from this one. For example, if + // the known equality was "(A != B)" == "false" then it follows that A and B + // are equal in the scope. Only boolean equalities with an explicit true or + // false RHS are currently supported. if (!RHS->getType()->isIntegerTy(1)) // Not a boolean equality - bail out. continue; @@ -2161,7 +2170,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, // If we are propagating an equality like "(A == B)" == "true" then also // propagate the equality A == B. When propagating a comparison such as // "(A >= B)" == "true", replace all instances of "A < B" with "false". - if (ICmpInst *Cmp = dyn_cast<ICmpInst>(LHS)) { + if (CmpInst *Cmp = dyn_cast<CmpInst>(LHS)) { Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1); // If "A == B" is known true, or "A != B" is known false, then replace @@ -2170,12 +2179,28 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE)) Worklist.push_back(std::make_pair(Op0, Op1)); + // Handle the floating point versions of equality comparisons too. + if ((isKnownTrue && Cmp->getPredicate() == CmpInst::FCMP_OEQ) || + (isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE)) { + + // Floating point -0.0 and 0.0 compare equal, so we can only + // propagate values if we know that we have a constant and that + // its value is non-zero. + + // FIXME: We should do this optimization if 'no signed zeros' is + // applicable via an instruction-level fast-math-flag or some other + // indicator that relaxed FP semantics are being used. + + if (isa<ConstantFP>(Op1) && !cast<ConstantFP>(Op1)->isZero()) + Worklist.push_back(std::make_pair(Op0, Op1)); + } + // If "A >= B" is known true, replace "A < B" with false everywhere. CmpInst::Predicate NotPred = Cmp->getInversePredicate(); Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse); - // Since we don't have the instruction "A < B" immediately to hand, work out - // the value number that it would have and use that to find an appropriate - // instruction (if any). + // Since we don't have the instruction "A < B" immediately to hand, work + // out the value number that it would have and use that to find an + // appropriate instruction (if any). uint32_t NextNum = VN.getNextUnusedValueNumber(); uint32_t Num = VN.lookup_or_add_cmp(Cmp->getOpcode(), NotPred, Op0, Op1); // If the number we were assigned was brand new then there is no point in @@ -2203,7 +2228,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, return Changed; } -/// processInstruction - When calculating availability, handle an instruction +/// When calculating availability, handle an instruction /// by inserting it into the appropriate sets bool GVN::processInstruction(Instruction *I) { // Ignore dbg info intrinsics. @@ -2214,7 +2239,7 @@ bool GVN::processInstruction(Instruction *I) { // to value numbering it. Value numbering often exposes redundancies, for // example if it determines that %y is equal to %x then the instruction // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. - if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AT)) { + if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) { I->replaceAllUsesWith(V); if (MD && V->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); @@ -2334,8 +2359,8 @@ bool GVN::runOnFunction(Function& F) { DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); DL = DLP ? &DLP->getDataLayout() : nullptr; - AT = &getAnalysis<AssumptionTracker>(); - TLI = &getAnalysis<TargetLibraryInfo>(); + AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); VN.setMemDep(MD); VN.setDomTree(DT); @@ -2348,7 +2373,8 @@ bool GVN::runOnFunction(Function& F) { for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { BasicBlock *BB = FI++; - bool removedBlock = MergeBlockIntoPredecessor(BB, this); + bool removedBlock = MergeBlockIntoPredecessor( + BB, DT, /* LoopInfo */ nullptr, VN.getAliasAnalysis(), MD); if (removedBlock) ++NumGVNBlocks; Changed |= removedBlock; @@ -2431,175 +2457,204 @@ bool GVN::processBlock(BasicBlock *BB) { return ChangedFunction; } -/// performPRE - Perform a purely local form of PRE that looks for diamond -/// control flow patterns and attempts to perform simple PRE at the join point. -bool GVN::performPRE(Function &F) { - bool Changed = false; +// Instantiate an expression in a predecessor that lacked it. +bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, + unsigned int ValNo) { + // Because we are going top-down through the block, all value numbers + // will be available in the predecessor by the time we need them. Any + // that weren't originally present will have been instantiated earlier + // in this loop. + bool success = true; + for (unsigned i = 0, e = Instr->getNumOperands(); i != e; ++i) { + Value *Op = Instr->getOperand(i); + if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) + continue; + + if (Value *V = findLeader(Pred, VN.lookup(Op))) { + Instr->setOperand(i, V); + } else { + success = false; + break; + } + } + + // Fail out if we encounter an operand that is not available in + // the PRE predecessor. This is typically because of loads which + // are not value numbered precisely. + if (!success) + return false; + + Instr->insertBefore(Pred->getTerminator()); + Instr->setName(Instr->getName() + ".pre"); + Instr->setDebugLoc(Instr->getDebugLoc()); + VN.add(Instr, ValNo); + + // Update the availability map to include the new instruction. + addToLeaderTable(ValNo, Instr, Pred); + return true; +} + +bool GVN::performScalarPRE(Instruction *CurInst) { SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap; - for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) { - // Nothing to PRE in the entry block. - if (CurrentBlock == &F.getEntryBlock()) continue; - // Don't perform PRE on a landing pad. - if (CurrentBlock->isLandingPad()) continue; + if (isa<AllocaInst>(CurInst) || isa<TerminatorInst>(CurInst) || + isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() || + CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || + isa<DbgInfoIntrinsic>(CurInst)) + return false; - for (BasicBlock::iterator BI = CurrentBlock->begin(), - BE = CurrentBlock->end(); BI != BE; ) { - Instruction *CurInst = BI++; + // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from + // sinking the compare again, and it would force the code generator to + // move the i1 from processor flags or predicate registers into a general + // purpose register. + if (isa<CmpInst>(CurInst)) + return false; - if (isa<AllocaInst>(CurInst) || - isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) || - CurInst->getType()->isVoidTy() || - CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || - isa<DbgInfoIntrinsic>(CurInst)) - continue; + // We don't currently value number ANY inline asm calls. + if (CallInst *CallI = dyn_cast<CallInst>(CurInst)) + if (CallI->isInlineAsm()) + return false; - // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from - // sinking the compare again, and it would force the code generator to - // move the i1 from processor flags or predicate registers into a general - // purpose register. - if (isa<CmpInst>(CurInst)) - continue; + uint32_t ValNo = VN.lookup(CurInst); + + // Look for the predecessors for PRE opportunities. We're + // only trying to solve the basic diamond case, where + // a value is computed in the successor and one predecessor, + // but not the other. We also explicitly disallow cases + // where the successor is its own predecessor, because they're + // more complicated to get right. + unsigned NumWith = 0; + unsigned NumWithout = 0; + BasicBlock *PREPred = nullptr; + BasicBlock *CurrentBlock = CurInst->getParent(); + predMap.clear(); + + for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); + PI != PE; ++PI) { + BasicBlock *P = *PI; + // We're not interested in PRE where the block is its + // own predecessor, or in blocks with predecessors + // that are not reachable. + if (P == CurrentBlock) { + NumWithout = 2; + break; + } else if (!DT->isReachableFromEntry(P)) { + NumWithout = 2; + break; + } - // We don't currently value number ANY inline asm calls. - if (CallInst *CallI = dyn_cast<CallInst>(CurInst)) - if (CallI->isInlineAsm()) - continue; + Value *predV = findLeader(P, ValNo); + if (!predV) { + predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P)); + PREPred = P; + ++NumWithout; + } else if (predV == CurInst) { + /* CurInst dominates this predecessor. */ + NumWithout = 2; + break; + } else { + predMap.push_back(std::make_pair(predV, P)); + ++NumWith; + } + } - uint32_t ValNo = VN.lookup(CurInst); - - // Look for the predecessors for PRE opportunities. We're - // only trying to solve the basic diamond case, where - // a value is computed in the successor and one predecessor, - // but not the other. We also explicitly disallow cases - // where the successor is its own predecessor, because they're - // more complicated to get right. - unsigned NumWith = 0; - unsigned NumWithout = 0; - BasicBlock *PREPred = nullptr; - predMap.clear(); - - for (pred_iterator PI = pred_begin(CurrentBlock), - PE = pred_end(CurrentBlock); PI != PE; ++PI) { - BasicBlock *P = *PI; - // We're not interested in PRE where the block is its - // own predecessor, or in blocks with predecessors - // that are not reachable. - if (P == CurrentBlock) { - NumWithout = 2; - break; - } else if (!DT->isReachableFromEntry(P)) { - NumWithout = 2; - break; - } + // Don't do PRE when it might increase code size, i.e. when + // we would need to insert instructions in more than one pred. + if (NumWithout > 1 || NumWith == 0) + return false; - Value* predV = findLeader(P, ValNo); - if (!predV) { - predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P)); - PREPred = P; - ++NumWithout; - } else if (predV == CurInst) { - /* CurInst dominates this predecessor. */ - NumWithout = 2; - break; - } else { - predMap.push_back(std::make_pair(predV, P)); - ++NumWith; - } - } + // We may have a case where all predecessors have the instruction, + // and we just need to insert a phi node. Otherwise, perform + // insertion. + Instruction *PREInstr = nullptr; - // Don't do PRE when it might increase code size, i.e. when - // we would need to insert instructions in more than one pred. - if (NumWithout != 1 || NumWith == 0) - continue; + if (NumWithout != 0) { + // Don't do PRE across indirect branch. + if (isa<IndirectBrInst>(PREPred->getTerminator())) + return false; - // Don't do PRE across indirect branch. - if (isa<IndirectBrInst>(PREPred->getTerminator())) - continue; + // We can't do PRE safely on a critical edge, so instead we schedule + // the edge to be split and perform the PRE the next time we iterate + // on the function. + unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); + if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { + toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); + return false; + } + // We need to insert somewhere, so let's give it a shot + PREInstr = CurInst->clone(); + if (!performScalarPREInsertion(PREInstr, PREPred, ValNo)) { + // If we failed insertion, make sure we remove the instruction. + DEBUG(verifyRemoved(PREInstr)); + delete PREInstr; + return false; + } + } - // We can't do PRE safely on a critical edge, so instead we schedule - // the edge to be split and perform the PRE the next time we iterate - // on the function. - unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); - if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { - toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); - continue; - } + // Either we should have filled in the PRE instruction, or we should + // not have needed insertions. + assert (PREInstr != nullptr || NumWithout == 0); - // Instantiate the expression in the predecessor that lacked it. - // Because we are going top-down through the block, all value numbers - // will be available in the predecessor by the time we need them. Any - // that weren't originally present will have been instantiated earlier - // in this loop. - Instruction *PREInstr = CurInst->clone(); - bool success = true; - for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { - Value *Op = PREInstr->getOperand(i); - if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) - continue; + ++NumGVNPRE; - if (Value *V = findLeader(PREPred, VN.lookup(Op))) { - PREInstr->setOperand(i, V); - } else { - success = false; - break; - } - } + // Create a PHI to make the value available in this block. + PHINode *Phi = + PHINode::Create(CurInst->getType(), predMap.size(), + CurInst->getName() + ".pre-phi", CurrentBlock->begin()); + for (unsigned i = 0, e = predMap.size(); i != e; ++i) { + if (Value *V = predMap[i].first) + Phi->addIncoming(V, predMap[i].second); + else + Phi->addIncoming(PREInstr, PREPred); + } + + VN.add(Phi, ValNo); + addToLeaderTable(ValNo, Phi, CurrentBlock); + Phi->setDebugLoc(CurInst->getDebugLoc()); + CurInst->replaceAllUsesWith(Phi); + if (Phi->getType()->getScalarType()->isPointerTy()) { + // Because we have added a PHI-use of the pointer value, it has now + // "escaped" from alias analysis' perspective. We need to inform + // AA of this. + for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii) { + unsigned jj = PHINode::getOperandNumForIncomingValue(ii); + VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj)); + } - // Fail out if we encounter an operand that is not available in - // the PRE predecessor. This is typically because of loads which - // are not value numbered precisely. - if (!success) { - DEBUG(verifyRemoved(PREInstr)); - delete PREInstr; - continue; - } + if (MD) + MD->invalidateCachedPointerInfo(Phi); + } + VN.erase(CurInst); + removeFromLeaderTable(ValNo, CurInst, CurrentBlock); - PREInstr->insertBefore(PREPred->getTerminator()); - PREInstr->setName(CurInst->getName() + ".pre"); - PREInstr->setDebugLoc(CurInst->getDebugLoc()); - VN.add(PREInstr, ValNo); - ++NumGVNPRE; - - // Update the availability map to include the new instruction. - addToLeaderTable(ValNo, PREInstr, PREPred); - - // Create a PHI to make the value available in this block. - PHINode* Phi = PHINode::Create(CurInst->getType(), predMap.size(), - CurInst->getName() + ".pre-phi", - CurrentBlock->begin()); - for (unsigned i = 0, e = predMap.size(); i != e; ++i) { - if (Value *V = predMap[i].first) - Phi->addIncoming(V, predMap[i].second); - else - Phi->addIncoming(PREInstr, PREPred); - } + DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); + if (MD) + MD->removeInstruction(CurInst); + DEBUG(verifyRemoved(CurInst)); + CurInst->eraseFromParent(); + ++NumGVNInstr; + + return true; +} - VN.add(Phi, ValNo); - addToLeaderTable(ValNo, Phi, CurrentBlock); - Phi->setDebugLoc(CurInst->getDebugLoc()); - CurInst->replaceAllUsesWith(Phi); - if (Phi->getType()->getScalarType()->isPointerTy()) { - // Because we have added a PHI-use of the pointer value, it has now - // "escaped" from alias analysis' perspective. We need to inform - // AA of this. - for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; - ++ii) { - unsigned jj = PHINode::getOperandNumForIncomingValue(ii); - VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj)); - } +/// Perform a purely local form of PRE that looks for diamond +/// control flow patterns and attempts to perform simple PRE at the join point. +bool GVN::performPRE(Function &F) { + bool Changed = false; + for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) { + // Nothing to PRE in the entry block. + if (CurrentBlock == &F.getEntryBlock()) + continue; - if (MD) - MD->invalidateCachedPointerInfo(Phi); - } - VN.erase(CurInst); - removeFromLeaderTable(ValNo, CurInst, CurrentBlock); + // Don't perform PRE on a landing pad. + if (CurrentBlock->isLandingPad()) + continue; - DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); - if (MD) MD->removeInstruction(CurInst); - DEBUG(verifyRemoved(CurInst)); - CurInst->eraseFromParent(); - Changed = true; + for (BasicBlock::iterator BI = CurrentBlock->begin(), + BE = CurrentBlock->end(); + BI != BE;) { + Instruction *CurInst = BI++; + Changed = performScalarPRE(CurInst); } } @@ -2612,50 +2667,48 @@ bool GVN::performPRE(Function &F) { /// Split the critical edge connecting the given two blocks, and return /// the block inserted to the critical edge. BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) { - BasicBlock *BB = SplitCriticalEdge(Pred, Succ, this); + BasicBlock *BB = SplitCriticalEdge( + Pred, Succ, CriticalEdgeSplittingOptions(getAliasAnalysis(), DT)); if (MD) MD->invalidateCachedPredecessors(); return BB; } -/// splitCriticalEdges - Split critical edges found during the previous +/// Split critical edges found during the previous /// iteration that may enable further optimization. bool GVN::splitCriticalEdges() { if (toSplit.empty()) return false; do { std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val(); - SplitCriticalEdge(Edge.first, Edge.second, this); + SplitCriticalEdge(Edge.first, Edge.second, + CriticalEdgeSplittingOptions(getAliasAnalysis(), DT)); } while (!toSplit.empty()); if (MD) MD->invalidateCachedPredecessors(); return true; } -/// iterateOnFunction - Executes one iteration of GVN +/// Executes one iteration of GVN bool GVN::iterateOnFunction(Function &F) { cleanupGlobalSets(); // Top-down walk of the dominator tree bool Changed = false; -#if 0 - // Needed for value numbering with phi construction to work. - ReversePostOrderTraversal<Function*> RPOT(&F); - for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(), - RE = RPOT.end(); RI != RE; ++RI) - Changed |= processBlock(*RI); -#else // Save the blocks this function have before transformation begins. GVN may // split critical edge, and hence may invalidate the RPO/DT iterator. // std::vector<BasicBlock *> BBVect; BBVect.reserve(256); - for (DomTreeNode *X : depth_first(DT->getRootNode())) - BBVect.push_back(X->getBlock()); + // Needed for value numbering with phi construction to work. + ReversePostOrderTraversal<Function *> RPOT(&F); + for (ReversePostOrderTraversal<Function *>::rpo_iterator RI = RPOT.begin(), + RE = RPOT.end(); + RI != RE; ++RI) + BBVect.push_back(*RI); for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end(); I != E; I++) Changed |= processBlock(*I); -#endif return Changed; } @@ -2666,7 +2719,7 @@ void GVN::cleanupGlobalSets() { TableAllocator.Reset(); } -/// verifyRemoved - Verify that the specified instruction does not occur in our +/// Verify that the specified instruction does not occur in our /// internal data structures. void GVN::verifyRemoved(const Instruction *Inst) const { VN.verifyRemoved(Inst); @@ -2685,11 +2738,10 @@ void GVN::verifyRemoved(const Instruction *Inst) const { } } -// BB is declared dead, which implied other blocks become dead as well. This -// function is to add all these blocks to "DeadBlocks". For the dead blocks' -// live successors, update their phi nodes by replacing the operands -// corresponding to dead blocks with UndefVal. -// +/// BB is declared dead, which implied other blocks become dead as well. This +/// function is to add all these blocks to "DeadBlocks". For the dead blocks' +/// live successors, update their phi nodes by replacing the operands +/// corresponding to dead blocks with UndefVal. void GVN::addDeadBlock(BasicBlock *BB) { SmallVector<BasicBlock *, 4> NewDead; SmallSetVector<BasicBlock *, 4> DF; |