diff options
Diffstat (limited to 'lib/Transforms/Scalar')
21 files changed, 446 insertions, 205 deletions
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index a6f0cf3..d660c72 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -32,12 +32,3 @@ add_llvm_library(LLVMScalarOpts Sink.cpp TailRecursionElimination.cpp ) - -add_llvm_library_dependencies(LLVMScalarOpts - LLVMAnalysis - LLVMCore - LLVMInstCombine - LLVMSupport - LLVMTarget - LLVMTransformUtils - ) diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index f8f18b2..f9abfe9 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -69,6 +70,7 @@ namespace { /// TLI - Keep a pointer of a TargetLowering to consult for determining /// transformation profitability. const TargetLowering *TLI; + const TargetLibraryInfo *TLInfo; DominatorTree *DT; ProfileInfo *PFI; @@ -97,6 +99,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<DominatorTree>(); AU.addPreserved<ProfileInfo>(); + AU.addRequired<TargetLibraryInfo>(); } private: @@ -116,7 +119,10 @@ namespace { } char CodeGenPrepare::ID = 0; -INITIALIZE_PASS(CodeGenPrepare, "codegenprepare", +INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare", + "Optimize for code generation", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare", "Optimize for code generation", false, false) FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { @@ -127,6 +133,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { bool EverMadeChange = false; ModifiedDT = false; + TLInfo = &getAnalysis<TargetLibraryInfo>(); DT = getAnalysisIfAvailable<DominatorTree>(); PFI = getAnalysisIfAvailable<ProfileInfo>(); @@ -542,7 +549,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { WeakVH IterHandle(CurInstIterator); ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, - ModifiedDT ? 0 : DT); + TLInfo, ModifiedDT ? 0 : DT); // If the iterator instruction was recursively deleted, start over at the // start of the block. diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index 664c3f6..5430f62 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -24,6 +24,8 @@ #include "llvm/Constant.h" #include "llvm/Instruction.h" #include "llvm/Pass.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/InstIterator.h" #include "llvm/ADT/Statistic.h" #include <set> @@ -42,19 +44,22 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<TargetLibraryInfo>(); } }; } char ConstantPropagation::ID = 0; -INITIALIZE_PASS(ConstantPropagation, "constprop", +INITIALIZE_PASS_BEGIN(ConstantPropagation, "constprop", + "Simple constant propagation", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(ConstantPropagation, "constprop", "Simple constant propagation", false, false) FunctionPass *llvm::createConstantPropagationPass() { return new ConstantPropagation(); } - bool ConstantPropagation::runOnFunction(Function &F) { // Initialize the worklist to all of the instructions ready to process... std::set<Instruction*> WorkList; @@ -62,13 +67,15 @@ bool ConstantPropagation::runOnFunction(Function &F) { WorkList.insert(&*i); } bool Changed = false; + TargetData *TD = getAnalysisIfAvailable<TargetData>(); + TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); while (!WorkList.empty()) { Instruction *I = *WorkList.begin(); WorkList.erase(WorkList.begin()); // Get an element from the worklist... if (!I->use_empty()) // Don't muck with dead instructions... - if (Constant *C = ConstantFoldInstruction(I)) { + if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) { // Add all of the users of this instruction to the worklist, they might // be constant propagatable now... for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index f5688cb..8729019 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -416,7 +416,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // writes to addresses which will definitely be overwritten later if (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + Earlier.Size) && - LaterOff + Later.Size >= EarlierOff + Earlier.Size) + int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size)) return OverwriteEnd; // Otherwise, they don't completely overlap. @@ -624,6 +624,7 @@ static void FindUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks, BasicBlock *BB, DominatorTree *DT) { for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { BasicBlock *Pred = *I; + if (Pred == BB) continue; TerminatorInst *PredTI = Pred->getTerminator(); if (PredTI->getNumSuccessors() != 1) continue; @@ -853,4 +854,3 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, I != E; ++I) DeadStackObjects.erase(*I); } - diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index c0223d2..5241e11 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" @@ -215,6 +216,7 @@ namespace { class EarlyCSE : public FunctionPass { public: const TargetData *TD; + const TargetLibraryInfo *TLI; DominatorTree *DT; typedef RecyclingAllocator<BumpPtrAllocator, ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy; @@ -263,6 +265,7 @@ private: // This transformation requires dominator postdominator info virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<DominatorTree>(); + AU.addRequired<TargetLibraryInfo>(); AU.setPreservesCFG(); } }; @@ -277,6 +280,7 @@ FunctionPass *llvm::createEarlyCSEPass() { INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false) bool EarlyCSE::processNode(DomTreeNode *Node) { @@ -328,7 +332,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // If the instruction can be simplified (e.g. X+0 = X) then replace it with // its simpler value. - if (Value *V = SimplifyInstruction(Inst, TD, DT)) { + if (Value *V = SimplifyInstruction(Inst, TD, TLI, DT)) { DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n'); Inst->replaceAllUsesWith(V); Inst->eraseFromParent(); @@ -455,6 +459,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { bool EarlyCSE::runOnFunction(Function &F) { TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); DT = &getAnalysis<DominatorTree>(); // Tables that the pass uses when walking the domtree. diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index a51cbb6..374fdd7 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/ADT/DenseMap.h" @@ -446,7 +447,8 @@ namespace { MemoryDependenceAnalysis *MD; DominatorTree *DT; const TargetData *TD; - + const TargetLibraryInfo *TLI; + ValueTable VN; /// LeaderTable - A mapping from value numbers to lists of Value*'s that @@ -530,6 +532,7 @@ namespace { // This transformation requires dominator postdominator info virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<DominatorTree>(); + AU.addRequired<TargetLibraryInfo>(); if (!NoLoads) AU.addRequired<MemoryDependenceAnalysis>(); AU.addRequired<AliasAnalysis>(); @@ -568,6 +571,7 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) { INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) @@ -2032,7 +2036,7 @@ bool GVN::processInstruction(Instruction *I) { // to value numbering it. Value numbering often exposes redundancies, for // example if it determines that %y is equal to %x then the instruction // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. - if (Value *V = SimplifyInstruction(I, TD, DT)) { + if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) { I->replaceAllUsesWith(V); if (MD && V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); @@ -2134,6 +2138,7 @@ bool GVN::runOnFunction(Function& F) { MD = &getAnalysis<MemoryDependenceAnalysis>(); DT = &getAnalysis<DominatorTree>(); TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); VN.setMemDep(MD); VN.setDomTree(DT); diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp index 0772b48..ad8689a 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -182,7 +182,7 @@ bool GlobalMerge::doInitialization(Module &M) { continue; // Ignore fancy-aligned globals for now. - unsigned Alignment = I->getAlignment(); + unsigned Alignment = TD->getPreferredAlignment(I); Type *Ty = I->getType()->getElementType(); if (Alignment > TD->getABITypeAlignment(Ty)) continue; diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 1f21108..6d52b22 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -58,18 +58,16 @@ STATISTIC(NumLFTR , "Number of loop exit tests replaced"); STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); -namespace llvm { - cl::opt<bool> EnableIVRewrite( - "enable-iv-rewrite", cl::Hidden, - cl::desc("Enable canonical induction variable rewriting")); - - // Trip count verification can be enabled by default under NDEBUG if we - // implement a strong expression equivalence checker in SCEV. Until then, we - // use the verify-indvars flag, which may assert in some cases. - cl::opt<bool> VerifyIndvars( - "verify-indvars", cl::Hidden, - cl::desc("Verify the ScalarEvolution result after running indvars")); -} +static cl::opt<bool> EnableIVRewrite( + "enable-iv-rewrite", cl::Hidden, + cl::desc("Enable canonical induction variable rewriting")); + +// Trip count verification can be enabled by default under NDEBUG if we +// implement a strong expression equivalence checker in SCEV. Until then, we +// use the verify-indvars flag, which may assert in some cases. +static cl::opt<bool> VerifyIndvars( + "verify-indvars", cl::Hidden, + cl::desc("Verify the ScalarEvolution result after running indvars")); namespace { class IndVarSimplify : public LoopPass { @@ -180,6 +178,11 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { // base of a recurrence. This handles the case in which SCEV expansion // converts a pointer type recurrence into a nonrecurrent pointer base // indexed by an integer recurrence. + + // If the GEP base pointer is a vector of pointers, abort. + if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy()) + return false; + const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); if (FromBase == ToBase) @@ -946,9 +949,13 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { else return 0; + // When creating this AddExpr, don't apply the current operations NSW or NUW + // flags. This instruction may be guarded by control flow that the no-wrap + // behavior depends on. Non-control-equivalent instructions can be mapped to + // the same SCEV expression, and it would be incorrect to transfer NSW/NUW + // semantics to those operations. const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>( - SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr, - IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW)); + SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr)); if (!AddRec || AddRec->getLoop() != L) return 0; @@ -1231,7 +1238,11 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L, /// BackedgeTakenInfo. If these expressions have not been reduced, then /// expanding them may incur additional cost (albeit in the loop preheader). static bool isHighCostExpansion(const SCEV *S, BranchInst *BI, + SmallPtrSet<const SCEV*, 8> &Processed, ScalarEvolution *SE) { + if (!Processed.insert(S)) + return false; + // If the backedge-taken count is a UDiv, it's very likely a UDiv that // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a // precise expression, rather than a UDiv from the user's code. If we can't @@ -1259,7 +1270,7 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI, if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); I != E; ++I) { - if (isHighCostExpansion(*I, BI, SE)) + if (isHighCostExpansion(*I, BI, Processed, SE)) return true; } return false; @@ -1302,7 +1313,8 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { if (!BI) return false; - if (isHighCostExpansion(BackedgeTakenCount, BI, SE)) + SmallPtrSet<const SCEV*, 8> Processed; + if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE)) return false; return true; diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index f410af3..c78db3f 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -24,6 +24,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Statistic.h" @@ -75,6 +76,7 @@ namespace { /// class JumpThreading : public FunctionPass { TargetData *TD; + TargetLibraryInfo *TLI; LazyValueInfo *LVI; #ifdef NDEBUG SmallPtrSet<BasicBlock*, 16> LoopHeaders; @@ -107,6 +109,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<LazyValueInfo>(); AU.addPreserved<LazyValueInfo>(); + AU.addRequired<TargetLibraryInfo>(); } void FindLoopHeaders(Function &F); @@ -133,6 +136,7 @@ char JumpThreading::ID = 0; INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading", "Jump Threading", false, false) INITIALIZE_PASS_DEPENDENCY(LazyValueInfo) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_END(JumpThreading, "jump-threading", "Jump Threading", false, false) @@ -144,6 +148,7 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } bool JumpThreading::runOnFunction(Function &F) { DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); LVI = &getAnalysis<LazyValueInfo>(); FindLoopHeaders(F); @@ -674,7 +679,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // Run constant folding to see if we can reduce the condition to a simple // constant. if (Instruction *I = dyn_cast<Instruction>(Condition)) { - Value *SimpleVal = ConstantFoldInstruction(I, TD); + Value *SimpleVal = ConstantFoldInstruction(I, TD, TLI); if (SimpleVal) { I->replaceAllUsesWith(SimpleVal); I->eraseFromParent(); @@ -921,8 +926,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Split them out to their own block. UnavailablePred = - SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(), - "thread-pre-split", this); + SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split", this); } // If the value isn't available in all predecessors, then there will be @@ -1334,8 +1338,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, else { DEBUG(dbgs() << " Factoring out " << PredBBs.size() << " common predecessors.\n"); - PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(), - ".thr_comm", this); + PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this); } // And finally, do it! @@ -1479,8 +1482,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, else { DEBUG(dbgs() << " Factoring out " << PredBBs.size() << " common predecessors.\n"); - PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(), - ".thr_comm", this); + PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this); } // Okay, we decided to do this! Clone all the instructions in BB onto the end diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 8098b36..8795cd8 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -43,8 +43,11 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" @@ -84,6 +87,7 @@ namespace { AU.addPreserved<AliasAnalysis>(); AU.addPreserved("scalar-evolution"); AU.addPreservedID(LoopSimplifyID); + AU.addRequired<TargetLibraryInfo>(); } bool doFinalization() { @@ -96,6 +100,9 @@ namespace { LoopInfo *LI; // Current LoopInfo DominatorTree *DT; // Dominator Tree for the current Loop. + TargetData *TD; // TargetData for constant folding. + TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding. + // State that is updated as we process loops. bool Changed; // Set to true when we change anything. BasicBlock *Preheader; // The preheader block of the current loop... @@ -177,6 +184,7 @@ INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false) @@ -194,6 +202,9 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { AA = &getAnalysis<AliasAnalysis>(); DT = &getAnalysis<DominatorTree>(); + TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); + CurAST = new AliasSetTracker(*AA); // Collect Alias info from subloops. for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end(); @@ -333,7 +344,7 @@ void LICM::HoistRegion(DomTreeNode *N) { // Try constant folding this instruction. If all the operands are // constants, it is technically hoistable, but it would be better to just // fold it. - if (Constant *C = ConstantFoldInstruction(&I)) { + if (Constant *C = ConstantFoldInstruction(&I, TD, TLI)) { DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); CurAST->copyValue(&I, C); CurAST->deleteValue(&I); @@ -369,7 +380,7 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { // in the same alias set as something that ends up being modified. if (AA->pointsToConstantMemory(LI->getOperand(0))) return true; - if (LI->getMetadata(LI->getContext().getMDKindID("invariant.load"))) + if (LI->getMetadata("invariant.load")) return true; // Don't hoist loads which have may-aliased stores in loop. @@ -581,7 +592,7 @@ void LICM::hoist(Instruction &I) { /// bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { // If it is not a trapping instruction, it is always safe to hoist. - if (Inst.isSafeToSpeculativelyExecute()) + if (isSafeToSpeculativelyExecute(&Inst)) return true; return isGuaranteedToExecute(Inst); diff --git a/lib/Transforms/Scalar/LLVMBuild.txt b/lib/Transforms/Scalar/LLVMBuild.txt index 027634d..cee9119 100644 --- a/lib/Transforms/Scalar/LLVMBuild.txt +++ b/lib/Transforms/Scalar/LLVMBuild.txt @@ -21,4 +21,3 @@ name = Scalar parent = Transforms library_name = ScalarOpts required_libraries = Analysis Core InstCombine Support Target TransformUtils - diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index af25c5c..f0f05e6 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/Statistic.h" @@ -43,6 +44,7 @@ namespace { AU.addPreservedID(LoopSimplifyID); AU.addPreservedID(LCSSAID); AU.addPreserved("scalar-evolution"); + AU.addRequired<TargetLibraryInfo>(); } }; } @@ -50,6 +52,7 @@ namespace { char LoopInstSimplify::ID = 0; INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify", "Simplify instructions in loops", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(LCSSA) @@ -64,6 +67,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); LoopInfo *LI = &getAnalysis<LoopInfo>(); const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); SmallVector<BasicBlock*, 8> ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); @@ -104,7 +108,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // Don't bother simplifying unused instructions. if (!I->use_empty()) { - Value *V = SimplifyInstruction(I, TD, DT); + Value *V = SimplifyInstruction(I, TD, TLI, DT); if (V && LI->replacementPreservesLCSSAForm(I, V)) { // Mark all uses for resimplification next time round the loop. for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 4ae51d5..840614e 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -77,19 +77,17 @@ #include <algorithm> using namespace llvm; -namespace llvm { -cl::opt<bool> EnableNested( +static cl::opt<bool> EnableNested( "enable-lsr-nested", cl::Hidden, cl::desc("Enable LSR on nested loops")); -cl::opt<bool> EnableRetry( - "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry")); +static cl::opt<bool> EnableRetry( + "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry")); // Temporary flag to cleanup congruent phis after LSR phi expansion. // It's currently disabled until we can determine whether it's truly useful or // not. The flag should be removed after the v3.0 release. -cl::opt<bool> EnablePhiElim( - "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination")); -} +static cl::opt<bool> EnablePhiElim( + "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination")); namespace { @@ -636,6 +634,19 @@ static Type *getAccessType(const Instruction *Inst) { return AccessTy; } +/// isExistingPhi - Return true if this AddRec is already a phi in its loop. +static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { + for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) { + if (SE.isSCEVable(PN->getType()) && + (SE.getEffectiveSCEVType(PN->getType()) == + SE.getEffectiveSCEVType(AR->getType())) && + SE.getSCEV(PN) == AR) + return true; + } + return false; +} + /// DeleteTriviallyDeadInstructions - If any of the instructions is the /// specified set are trivially dead, delete them and see if this makes any of /// their operands subsequently dead. @@ -705,7 +716,8 @@ public: const DenseSet<const SCEV *> &VisitedRegs, const Loop *L, const SmallVectorImpl<int64_t> &Offsets, - ScalarEvolution &SE, DominatorTree &DT); + ScalarEvolution &SE, DominatorTree &DT, + SmallPtrSet<const SCEV *, 16> *LoserRegs = 0); void print(raw_ostream &OS) const; void dump() const; @@ -718,7 +730,8 @@ private: void RatePrimaryRegister(const SCEV *Reg, SmallPtrSet<const SCEV *, 16> &Regs, const Loop *L, - ScalarEvolution &SE, DominatorTree &DT); + ScalarEvolution &SE, DominatorTree &DT, + SmallPtrSet<const SCEV *, 16> *LoserRegs); }; } @@ -738,18 +751,13 @@ void Cost::RateRegister(const SCEV *Reg, // on other loops, and cannot be expected to change sibling loops. If the // AddRec exists, consider it's register free and leave it alone. Otherwise, // do not consider this formula at all. - // FIXME: why do we need to generate such fomulae? else if (!EnableNested || L->contains(AR->getLoop()) || (!AR->getLoop()->contains(L) && DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) { - for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin(); - PHINode *PN = dyn_cast<PHINode>(I); ++I) { - if (SE.isSCEVable(PN->getType()) && - (SE.getEffectiveSCEVType(PN->getType()) == - SE.getEffectiveSCEVType(AR->getType())) && - SE.getSCEV(PN) == AR) - return; - } + if (isExistingPhi(AR, SE)) + return; + + // For !EnableNested, never rewrite IVs in other loops. if (!EnableNested) { Loose(); return; @@ -791,13 +799,22 @@ void Cost::RateRegister(const SCEV *Reg, } /// RatePrimaryRegister - Record this register in the set. If we haven't seen it -/// before, rate it. +/// before, rate it. Optional LoserRegs provides a way to declare any formula +/// that refers to one of those regs an instant loser. void Cost::RatePrimaryRegister(const SCEV *Reg, SmallPtrSet<const SCEV *, 16> &Regs, const Loop *L, - ScalarEvolution &SE, DominatorTree &DT) { - if (Regs.insert(Reg)) + ScalarEvolution &SE, DominatorTree &DT, + SmallPtrSet<const SCEV *, 16> *LoserRegs) { + if (LoserRegs && LoserRegs->count(Reg)) { + Loose(); + return; + } + if (Regs.insert(Reg)) { RateRegister(Reg, Regs, L, SE, DT); + if (isLoser()) + LoserRegs->insert(Reg); + } } void Cost::RateFormula(const Formula &F, @@ -805,14 +822,15 @@ void Cost::RateFormula(const Formula &F, const DenseSet<const SCEV *> &VisitedRegs, const Loop *L, const SmallVectorImpl<int64_t> &Offsets, - ScalarEvolution &SE, DominatorTree &DT) { + ScalarEvolution &SE, DominatorTree &DT, + SmallPtrSet<const SCEV *, 16> *LoserRegs) { // Tally up the registers. if (const SCEV *ScaledReg = F.ScaledReg) { if (VisitedRegs.count(ScaledReg)) { Loose(); return; } - RatePrimaryRegister(ScaledReg, Regs, L, SE, DT); + RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs); if (isLoser()) return; } @@ -823,7 +841,7 @@ void Cost::RateFormula(const Formula &F, Loose(); return; } - RatePrimaryRegister(BaseReg, Regs, L, SE, DT); + RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs); if (isLoser()) return; } @@ -1105,7 +1123,6 @@ bool LSRUse::InsertFormula(const Formula &F) { Formulae.push_back(F); // Record registers now being used by this use. - if (F.ScaledReg) Regs.insert(F.ScaledReg); Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); return true; @@ -1116,7 +1133,6 @@ void LSRUse::DeleteFormula(Formula &F) { if (&F != &Formulae.back()) std::swap(F, Formulae.back()); Formulae.pop_back(); - assert(!Formulae.empty() && "LSRUse has no formulae left!"); } /// RecomputeRegs - Recompute the Regs field, and update RegUses. @@ -1389,7 +1405,6 @@ class LSRInstance { LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU); -public: void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); void CountRegisters(const Formula &F, size_t LUIdx); @@ -1450,6 +1465,7 @@ public: void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, Pass *P); +public: LSRInstance(const TargetLowering *tli, Loop *l, Pass *P); bool getChanged() const { return Changed; } @@ -2045,7 +2061,8 @@ void LSRInstance::CollectInterestingTypesAndFactors() { do { const SCEV *S = Worklist.pop_back_val(); if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { - Strides.insert(AR->getStepRecurrence(SE)); + if (EnableNested || AR->getLoop() == L) + Strides.insert(AR->getStepRecurrence(SE)); Worklist.push_back(AR->getStart()); } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { Worklist.append(Add->op_begin(), Add->op_end()); @@ -2914,6 +2931,7 @@ LSRInstance::GenerateAllReuseFormulae() { void LSRInstance::FilterOutUndesirableDedicatedRegisters() { DenseSet<const SCEV *> VisitedRegs; SmallPtrSet<const SCEV *, 16> Regs; + SmallPtrSet<const SCEV *, 16> LoserRegs; #ifndef NDEBUG bool ChangedFormulae = false; #endif @@ -2933,46 +2951,66 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { FIdx != NumForms; ++FIdx) { Formula &F = LU.Formulae[FIdx]; - SmallVector<const SCEV *, 2> Key; - for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(), - JE = F.BaseRegs.end(); J != JE; ++J) { - const SCEV *Reg = *J; - if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx)) - Key.push_back(Reg); + // Some formulas are instant losers. For example, they may depend on + // nonexistent AddRecs from other loops. These need to be filtered + // immediately, otherwise heuristics could choose them over others leading + // to an unsatisfactory solution. Passing LoserRegs into RateFormula here + // avoids the need to recompute this information across formulae using the + // same bad AddRec. Passing LoserRegs is also essential unless we remove + // the corresponding bad register from the Regs set. + Cost CostF; + Regs.clear(); + CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT, + &LoserRegs); + if (CostF.isLoser()) { + // During initial formula generation, undesirable formulae are generated + // by uses within other loops that have some non-trivial address mode or + // use the postinc form of the IV. LSR needs to provide these formulae + // as the basis of rediscovering the desired formula that uses an AddRec + // corresponding to the existing phi. Once all formulae have been + // generated, these initial losers may be pruned. + DEBUG(dbgs() << " Filtering loser "; F.print(dbgs()); + dbgs() << "\n"); } - if (F.ScaledReg && - RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx)) - Key.push_back(F.ScaledReg); - // Unstable sort by host order ok, because this is only used for - // uniquifying. - std::sort(Key.begin(), Key.end()); - - std::pair<BestFormulaeTy::const_iterator, bool> P = - BestFormulae.insert(std::make_pair(Key, FIdx)); - if (!P.second) { + else { + SmallVector<const SCEV *, 2> Key; + for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(), + JE = F.BaseRegs.end(); J != JE; ++J) { + const SCEV *Reg = *J; + if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx)) + Key.push_back(Reg); + } + if (F.ScaledReg && + RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx)) + Key.push_back(F.ScaledReg); + // Unstable sort by host order ok, because this is only used for + // uniquifying. + std::sort(Key.begin(), Key.end()); + + std::pair<BestFormulaeTy::const_iterator, bool> P = + BestFormulae.insert(std::make_pair(Key, FIdx)); + if (P.second) + continue; + Formula &Best = LU.Formulae[P.first->second]; - Cost CostF; - CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT); - Regs.clear(); Cost CostBest; - CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT); Regs.clear(); + CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT); if (CostF < CostBest) std::swap(F, Best); DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); dbgs() << "\n" " in favor of formula "; Best.print(dbgs()); dbgs() << '\n'); + } #ifndef NDEBUG - ChangedFormulae = true; + ChangedFormulae = true; #endif - LU.DeleteFormula(F); - --FIdx; - --NumForms; - Any = true; - continue; - } + LU.DeleteFormula(F); + --FIdx; + --NumForms; + Any = true; } // Now that we've filtered out some formulae, recompute the Regs set. diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 37f4c2c..22dbfe3 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -40,10 +40,9 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached.")); -// Temporary flag to be removed in 3.0 static cl::opt<bool> -NoSCEVUnroll("disable-unroll-scev", cl::init(false), cl::Hidden, - cl::desc("Use ScalarEvolution to analyze loop trip counts for unrolling")); +UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden, + cl::desc("Unroll loops with run-time trip counts")); namespace { class LoopUnroll : public LoopPass { @@ -68,6 +67,10 @@ namespace { // explicit -unroll-threshold). static const unsigned OptSizeUnrollThreshold = 50; + // Default unroll count for loops with run-time trip count if + // -unroll-count is not set + static const unsigned UnrollRuntimeCount = 8; + unsigned CurrentCount; unsigned CurrentThreshold; bool CurrentAllowPartial; @@ -148,23 +151,21 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // Find trip count and trip multiple if count is not available unsigned TripCount = 0; unsigned TripMultiple = 1; - if (!NoSCEVUnroll) { - // Find "latch trip count". UnrollLoop assumes that control cannot exit - // via the loop latch on any iteration prior to TripCount. The loop may exit - // early via an earlier branch. - BasicBlock *LatchBlock = L->getLoopLatch(); - if (LatchBlock) { - TripCount = SE->getSmallConstantTripCount(L, LatchBlock); - TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock); - } - } - else { - TripCount = L->getSmallConstantTripCount(); - if (TripCount == 0) - TripMultiple = L->getSmallConstantTripMultiple(); + // Find "latch trip count". UnrollLoop assumes that control cannot exit + // via the loop latch on any iteration prior to TripCount. The loop may exit + // early via an earlier branch. + BasicBlock *LatchBlock = L->getLoopLatch(); + if (LatchBlock) { + TripCount = SE->getSmallConstantTripCount(L, LatchBlock); + TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock); } - // Automatically select an unroll count. + // Use a default unroll-count if the user doesn't specify a value + // and the trip count is a run-time value. The default is different + // for run-time or compile-time trip count loops. unsigned Count = CurrentCount; + if (UnrollRuntime && CurrentCount == 0 && TripCount == 0) + Count = UnrollRuntimeCount; + if (Count == 0) { // Conservative heuristic: if we know the trip count, see if we can // completely unroll (subject to the threshold, checked below); otherwise @@ -189,15 +190,23 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { if (TripCount != 1 && Size > Threshold) { DEBUG(dbgs() << " Too large to fully unroll with count: " << Count << " because size: " << Size << ">" << Threshold << "\n"); - if (!CurrentAllowPartial) { + if (!CurrentAllowPartial && !(UnrollRuntime && TripCount == 0)) { DEBUG(dbgs() << " will not try to unroll partially because " << "-unroll-allow-partial not given\n"); return false; } - // Reduce unroll count to be modulo of TripCount for partial unrolling - Count = Threshold / LoopSize; - while (Count != 0 && TripCount%Count != 0) { - Count--; + if (TripCount) { + // Reduce unroll count to be modulo of TripCount for partial unrolling + Count = CurrentThreshold / LoopSize; + while (Count != 0 && TripCount%Count != 0) + Count--; + } + else if (UnrollRuntime) { + // Reduce unroll count to be a lower power-of-two value + while (Count != 0 && Size > CurrentThreshold) { + Count >>= 1; + Size = LoopSize*Count; + } } if (Count < 2) { DEBUG(dbgs() << " could not unroll partially\n"); @@ -208,7 +217,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { } // Unroll the loop. - if (!UnrollLoop(L, Count, TripCount, TripMultiple, LI, &LPM)) + if (!UnrollLoop(L, Count, TripCount, UnrollRuntime, TripMultiple, LI, &LPM)) return false; return true; diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 458949c..a2d0e98 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -71,7 +71,9 @@ namespace { // LoopProcessWorklist - Used to check if second loop needs processing // after RewriteLoopBodyWithConditionConstant rewrites first loop. std::vector<Loop*> LoopProcessWorklist; - SmallPtrSet<Value *,8> UnswitchedVals; + + // FIXME: Consider custom class for this. + std::map<const SwitchInst*, SmallPtrSet<const Value *,8> > UnswitchedVals; bool OptimizeForSize; bool redoLoop; @@ -117,7 +119,15 @@ namespace { private: virtual void releaseMemory() { - UnswitchedVals.clear(); + // We need to forget about all switches in the current loop. + // FIXME: Do it better than enumerating all blocks of code + // and see if it is a switch instruction. + for (Loop::block_iterator I = currentLoop->block_begin(), + E = currentLoop->block_end(); I != E; ++I) { + SwitchInst* SI = dyn_cast<SwitchInst>((*I)->getTerminator()); + if (SI) + UnswitchedVals.erase(SI); + } } /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist, @@ -128,6 +138,12 @@ namespace { if (I != LoopProcessWorklist.end()) LoopProcessWorklist.erase(I); } + + /// For new loop switches we clone info about values that was + /// already unswitched and has redundant successors. + /// Note, that new loop data is stored inside the VMap. + void CloneUnswitchedVals(const ValueToValueMapTy& VMap, + const BasicBlock* SrcBB); void initLoopData() { loopHeader = currentLoop->getHeader(); @@ -255,13 +271,25 @@ bool LoopUnswitch::processCurrentLoop() { } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), currentLoop, Changed); - if (LoopCond && SI->getNumCases() > 1) { + unsigned NumCases = SI->getNumCases(); + if (LoopCond && NumCases > 1) { // Find a value to unswitch on: // FIXME: this should chose the most expensive case! // FIXME: scan for a case with a non-critical edge? - Constant *UnswitchVal = SI->getCaseValue(1); + Constant *UnswitchVal = NULL; + // Do not process same value again and again. - if (!UnswitchedVals.insert(UnswitchVal)) + // At this point we have some cases already unswitched and + // some not yet unswitched. Let's find the first not yet unswitched one. + for (unsigned i = 1; i < NumCases; ++i) { + Constant* UnswitchValCandidate = SI->getCaseValue(i); + if (!UnswitchedVals[SI].count(UnswitchValCandidate)) { + UnswitchVal = UnswitchValCandidate; + break; + } + } + + if (!UnswitchVal) continue; if (UnswitchIfProfitable(LoopCond, UnswitchVal)) { @@ -287,6 +315,23 @@ bool LoopUnswitch::processCurrentLoop() { return Changed; } +/// For new loop switches we clone info about values that was +/// already unswitched and has redundant successors. +/// Not that new loop data is stored inside the VMap. +void LoopUnswitch::CloneUnswitchedVals(const ValueToValueMapTy& VMap, + const BasicBlock* SrcBB) { + + const SwitchInst* SI = dyn_cast<SwitchInst>(SrcBB->getTerminator()); + if (SI && UnswitchedVals.count(SI)) { + // Don't clone a totally simplified switch. + if (isa<Constant>(SI->getCondition())) + return; + Value* I = VMap.lookup(SI); + assert(I && "All instructions that are in SrcBB must be in VMap."); + UnswitchedVals[cast<SwitchInst>(I)] = UnswitchedVals[SI]; + } +} + /// isTrivialLoopExitBlock - Check to see if all paths from BB exit the /// loop with no side effects (including infinite loops). /// @@ -378,14 +423,25 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, // Check to see if a successor of the switch is guaranteed to go to the // latch block or exit through a one exit block without having any // side-effects. If so, determine the value of Cond that causes it to do - // this. Note that we can't trivially unswitch on the default case. - for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) - if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, + // this. + // Note that we can't trivially unswitch on the default case or + // on already unswitched cases. + for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) { + BasicBlock* LoopExitCandidate; + if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop, SI->getSuccessor(i)))) { // Okay, we found a trivial case, remember the value that is trivial. - if (Val) *Val = SI->getCaseValue(i); + ConstantInt* CaseVal = SI->getCaseValue(i); + + // Check that it was not unswitched before, since already unswitched + // trivial vals are looks trivial too. + if (UnswitchedVals[SI].count(CaseVal)) + continue; + LoopExitBB = LoopExitCandidate; + if (Val) *Val = CaseVal; break; } + } } // If we didn't find a single unique LoopExit block, or if the loop exit block @@ -447,8 +503,14 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { // expansion, and the number of basic blocks, to avoid loops with // large numbers of branches which cause loop unswitching to go crazy. // This is a very ad-hoc heuristic. - if (Metrics.NumInsts > Threshold || - Metrics.NumBlocks * 5 > Threshold || + + unsigned NumUnswitched = + (NumSwitches + NumBranches) + 1 /*take in account current iteration*/; + + unsigned NumInsts = Metrics.NumInsts * NumUnswitched; + unsigned NumBlocks = Metrics.NumBlocks * NumUnswitched; + + if (NumInsts > Threshold || NumBlocks * 5 > Threshold || Metrics.containsIndirectBr || Metrics.isRecursive) { DEBUG(dbgs() << "NOT unswitching loop %" << currentLoop->getHeader()->getName() << ", cost too high: " @@ -565,8 +627,7 @@ void LoopUnswitch::SplitExitEdges(Loop *L, // Although SplitBlockPredecessors doesn't preserve loop-simplify in // general, if we call it on all predecessors of all exits then it does. if (!ExitBlock->isLandingPad()) { - SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(), - ".us-lcssa", this); + SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", this); } else { SmallVector<BasicBlock*, 2> NewBBs; SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa", @@ -621,6 +682,12 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, ValueToValueMapTy VMap; for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F); + + // Inherit simplified switches info for NewBB + // We needn't pass NewBB since its instructions are already contained + // inside the VMap. + CloneUnswitchedVals(VMap, LoopBlocks[i]); + NewBlocks.push_back(NewBB); VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L); @@ -907,9 +974,13 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, Instruction *U = dyn_cast<Instruction>(*UI); if (!U || !L->contains(U)) continue; - U->replaceUsesOfWith(LIC, Replacement); Worklist.push_back(U); } + + for (std::vector<Instruction*>::iterator UI = Worklist.begin(); + UI != Worklist.end(); ++UI) + (*UI)->replaceUsesOfWith(LIC, Replacement); + SimplifyCode(Worklist, L); return; } @@ -942,6 +1013,9 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, BasicBlock *Switch = SI->getParent(); BasicBlock *SISucc = SI->getSuccessor(DeadCase); BasicBlock *Latch = L->getLoopLatch(); + + UnswitchedVals[SI].insert(Val); + if (!SI->findCaseDest(SISucc)) continue; // Edge is critical. // If the DeadCase successor dominates the loop latch, then the // transformation isn't safe since it will delete the sole predecessor edge @@ -1017,7 +1091,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { // See if instruction simplification can hack this up. This is common for // things like "select false, X, Y" after unswitching made the condition be // 'false'. - if (Value *V = SimplifyInstruction(I, 0, DT)) + if (Value *V = SimplifyInstruction(I, 0, 0, DT)) if (LI->replacementPreservesLCSSAForm(I, V)) { ReplaceUsesOfWith(I, V, Worklist, L, LPM); continue; diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 9e4f51f..7335626 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -147,8 +147,8 @@ struct MemsetRange { } // end anon namespace bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const { - // If we found more than 8 stores to merge or 64 bytes, use memset. - if (TheStores.size() >= 8 || End-Start >= 64) return true; + // If we found more than 4 stores to merge or 16 bytes, use memset. + if (TheStores.size() >= 4 || End-Start >= 16) return true; // If there is nothing to merge, don't do anything. if (TheStores.size() < 2) return false; diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 80f5f01..8e9449f 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -179,9 +179,13 @@ static bool IsPotentialUse(const Value *Op) { Arg->hasNestAttr() || Arg->hasStructRetAttr()) return false; - // Only consider values with pointer types, and not function pointers. + // Only consider values with pointer types. + // It seemes intuitive to exclude function pointer types as well, since + // functions are never reference-counted, however clang occasionally + // bitcasts reference-counted pointers to function-pointer type + // temporarily. PointerType *Ty = dyn_cast<PointerType>(Op->getType()); - if (!Ty || isa<FunctionType>(Ty->getElementType())) + if (!Ty) return false; // Conservatively assume anything else is a potential use. return true; @@ -896,8 +900,9 @@ bool ObjCARCExpand::runOnFunction(Function &F) { #include "llvm/LLVMContext.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/CFG.h" -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/DenseSet.h" STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); @@ -1165,6 +1170,7 @@ namespace { /// Partial - True of we've seen an opportunity for partial RR elimination, /// such as pushing calls into a CFG triangle or into one side of a /// CFG diamond. + /// TODO: Consider moving this to PtrState. bool Partial; /// ReleaseMetadata - If the Calls are objc_release calls and they all have @@ -1251,16 +1257,6 @@ namespace { Seq = NewSeq; } - void SetSeqToRelease(MDNode *M) { - if (Seq == S_None || Seq == S_Use) { - Seq = M ? S_MovableRelease : S_Release; - RRI.ReleaseMetadata = M; - } else if (Seq != S_MovableRelease || RRI.ReleaseMetadata != M) { - Seq = S_Release; - RRI.ReleaseMetadata = 0; - } - } - Sequence GetSeq() const { return Seq; } @@ -1488,7 +1484,7 @@ namespace { /// metadata. unsigned ImpreciseReleaseMDKind; - /// CopyOnEscape - The Metadata Kind for clang.arc.copy_on_escape + /// CopyOnEscapeMDKind - The Metadata Kind for clang.arc.copy_on_escape /// metadata. unsigned CopyOnEscapeMDKind; @@ -2255,6 +2251,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, // guards against loops in the middle of a sequence. if (SomeSuccHasSame && !AllSuccsHaveSame) S.ClearSequenceProgress(); + break; } case S_CanRelease: { const Value *Arg = I->first; @@ -2289,6 +2286,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, // guards against loops in the middle of a sequence. if (SomeSuccHasSame && !AllSuccsHaveSame) S.ClearSequenceProgress(); + break; } } } @@ -2350,8 +2348,11 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) NestingDetected = true; - S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind)); S.RRI.clear(); + + MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); + S.SetSeq(ReleaseMetadata ? S_MovableRelease : S_Release); + S.RRI.ReleaseMetadata = ReleaseMetadata; S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented(); S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); S.RRI.Calls.insert(Inst); @@ -2494,18 +2495,16 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, if (Pred == BB) continue; DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); - assert(I != BBStates.end()); // If we haven't seen this node yet, then we've found a CFG cycle. // Be optimistic here; it's CheckForCFGHazards' job detect trouble. - if (!I->second.isVisitedTopDown()) + if (I == BBStates.end() || !I->second.isVisitedTopDown()) continue; MyStates.InitFromPred(I->second); while (PI != PE) { Pred = *PI++; if (Pred != BB) { I = BBStates.find(Pred); - assert(I != BBStates.end()); - if (I->second.isVisitedTopDown()) + if (I == BBStates.end() || I->second.isVisitedTopDown()) MyStates.MergePred(I->second); } } @@ -2661,49 +2660,106 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, return NestingDetected; } -// Visit - Visit the function both top-down and bottom-up. -bool -ObjCARCOpt::Visit(Function &F, - DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases) { - // Use reverse-postorder on the reverse CFG for bottom-up, because we - // magically know that loops will be well behaved, i.e. they won't repeatedly - // call retain on a single pointer without doing a release. We can't use - // ReversePostOrderTraversal here because we want to walk up from each - // function exit point. +static void +ComputePostOrders(Function &F, + SmallVectorImpl<BasicBlock *> &PostOrder, + SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder) { + /// Backedges - Backedges detected in the DFS. These edges will be + /// ignored in the reverse-CFG DFS, so that loops with multiple exits will be + /// traversed in the desired order. + DenseSet<std::pair<BasicBlock *, BasicBlock *> > Backedges; + + /// Visited - The visited set, for doing DFS walks. SmallPtrSet<BasicBlock *, 16> Visited; - SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> Stack; - SmallVector<BasicBlock *, 16> Order; + + // Do DFS, computing the PostOrder. + SmallPtrSet<BasicBlock *, 16> OnStack; + SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack; + BasicBlock *EntryBB = &F.getEntryBlock(); + SuccStack.push_back(std::make_pair(EntryBB, succ_begin(EntryBB))); + Visited.insert(EntryBB); + OnStack.insert(EntryBB); + do { + dfs_next_succ: + succ_iterator End = succ_end(SuccStack.back().first); + while (SuccStack.back().second != End) { + BasicBlock *BB = *SuccStack.back().second++; + if (Visited.insert(BB)) { + SuccStack.push_back(std::make_pair(BB, succ_begin(BB))); + OnStack.insert(BB); + goto dfs_next_succ; + } + if (OnStack.count(BB)) + Backedges.insert(std::make_pair(SuccStack.back().first, BB)); + } + OnStack.erase(SuccStack.back().first); + PostOrder.push_back(SuccStack.pop_back_val().first); + } while (!SuccStack.empty()); + + Visited.clear(); + + // Compute the exits, which are the starting points for reverse-CFG DFS. + SmallVector<BasicBlock *, 4> Exits; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { BasicBlock *BB = I; if (BB->getTerminator()->getNumSuccessors() == 0) - Stack.push_back(std::make_pair(BB, pred_begin(BB))); + Exits.push_back(BB); } - while (!Stack.empty()) { - pred_iterator End = pred_end(Stack.back().first); - while (Stack.back().second != End) { - BasicBlock *BB = *Stack.back().second++; - if (Visited.insert(BB)) - Stack.push_back(std::make_pair(BB, pred_begin(BB))); - } - Order.push_back(Stack.pop_back_val().first); + + // Do reverse-CFG DFS, computing the reverse-CFG PostOrder. + SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> PredStack; + for (SmallVectorImpl<BasicBlock *>::iterator I = Exits.begin(), E = Exits.end(); + I != E; ++I) { + BasicBlock *ExitBB = *I; + PredStack.push_back(std::make_pair(ExitBB, pred_begin(ExitBB))); + Visited.insert(ExitBB); + while (!PredStack.empty()) { + reverse_dfs_next_succ: + pred_iterator End = pred_end(PredStack.back().first); + while (PredStack.back().second != End) { + BasicBlock *BB = *PredStack.back().second++; + // Skip backedges detected in the forward-CFG DFS. + if (Backedges.count(std::make_pair(BB, PredStack.back().first))) + continue; + if (Visited.insert(BB)) { + PredStack.push_back(std::make_pair(BB, pred_begin(BB))); + goto reverse_dfs_next_succ; + } + } + ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first); + } } +} + +// Visit - Visit the function both top-down and bottom-up. +bool +ObjCARCOpt::Visit(Function &F, + DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases) { + + // Use reverse-postorder traversals, because we magically know that loops + // will be well behaved, i.e. they won't repeatedly call retain on a single + // pointer without doing a release. We can't use the ReversePostOrderTraversal + // class here because we want the reverse-CFG postorder to consider each + // function exit point, and we want to ignore selected cycle edges. + SmallVector<BasicBlock *, 16> PostOrder; + SmallVector<BasicBlock *, 16> ReverseCFGPostOrder; + ComputePostOrders(F, PostOrder, ReverseCFGPostOrder); + + // Use reverse-postorder on the reverse CFG for bottom-up. bool BottomUpNestingDetected = false; for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I = - Order.rbegin(), E = Order.rend(); I != E; ++I) { - BasicBlock *BB = *I; - BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains); - } + ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend(); + I != E; ++I) + BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains); - // Use regular reverse-postorder for top-down. + // Use reverse-postorder for top-down. bool TopDownNestingDetected = false; - typedef ReversePostOrderTraversal<Function *> RPOTType; - RPOTType RPOT(&F); - for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { - BasicBlock *BB = *I; - TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases); - } + for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I = + PostOrder.rbegin(), E = PostOrder.rend(); + I != E; ++I) + TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases); return TopDownNestingDetected && BottomUpNestingDetected; } @@ -3139,7 +3195,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) { UE = Alloca->use_end(); UI != UE; ) { CallInst *UserInst = cast<CallInst>(*UI++); if (!UserInst->use_empty()) - UserInst->replaceAllUsesWith(UserInst->getOperand(1)); + UserInst->replaceAllUsesWith(UserInst->getArgOperand(0)); UserInst->eraseFromParent(); } Alloca->eraseFromParent(); diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index f6762ad..e4cb55c 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -156,6 +157,7 @@ namespace { /// class SCCPSolver : public InstVisitor<SCCPSolver> { const TargetData *TD; + const TargetLibraryInfo *TLI; SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable. DenseMap<Value*, LatticeVal> ValueState; // The state each value is in. @@ -206,7 +208,8 @@ class SCCPSolver : public InstVisitor<SCCPSolver> { typedef std::pair<BasicBlock*, BasicBlock*> Edge; DenseSet<Edge> KnownFeasibleEdges; public: - SCCPSolver(const TargetData *td) : TD(td) {} + SCCPSolver(const TargetData *td, const TargetLibraryInfo *tli) + : TD(td), TLI(tli) {} /// MarkBlockExecutable - This method can be used by clients to mark all of /// the blocks that are known to be intrinsically live in the processed unit. @@ -1125,7 +1128,7 @@ CallOverdefined: // If we can constant fold this, mark the result of the call as a // constant. - if (Constant *C = ConstantFoldCall(F, Operands)) + if (Constant *C = ConstantFoldCall(F, Operands, TLI)) return markConstant(I, C); } @@ -1517,6 +1520,9 @@ namespace { /// Sparse Conditional Constant Propagator. /// struct SCCP : public FunctionPass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetLibraryInfo>(); + } static char ID; // Pass identification, replacement for typeid SCCP() : FunctionPass(ID) { initializeSCCPPass(*PassRegistry::getPassRegistry()); @@ -1569,7 +1575,9 @@ static void DeleteInstructionInBlock(BasicBlock *BB) { // bool SCCP::runOnFunction(Function &F) { DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n"); - SCCPSolver Solver(getAnalysisIfAvailable<TargetData>()); + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); + SCCPSolver Solver(TD, TLI); // Mark the first block of the function as being executable. Solver.MarkBlockExecutable(F.begin()); @@ -1641,6 +1649,9 @@ namespace { /// Constant Propagation. /// struct IPSCCP : public ModulePass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetLibraryInfo>(); + } static char ID; IPSCCP() : ModulePass(ID) { initializeIPSCCPPass(*PassRegistry::getPassRegistry()); @@ -1650,7 +1661,11 @@ namespace { } // end anonymous namespace char IPSCCP::ID = 0; -INITIALIZE_PASS(IPSCCP, "ipsccp", +INITIALIZE_PASS_BEGIN(IPSCCP, "ipsccp", + "Interprocedural Sparse Conditional Constant Propagation", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(IPSCCP, "ipsccp", "Interprocedural Sparse Conditional Constant Propagation", false, false) @@ -1689,7 +1704,9 @@ static bool AddressIsTaken(const GlobalValue *GV) { } bool IPSCCP::runOnModule(Module &M) { - SCCPSolver Solver(getAnalysisIfAvailable<TargetData>()); + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); + SCCPSolver Solver(TD, TLI); // AddressTakenFunctions - This set keeps track of the address-taken functions // that are in the input. As IPSCCP runs through and simplifies code, diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 4b14efc..bc70c51 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -453,6 +453,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); + if (!GEP->getPointerOperandType()->isPointerTy()) + return false; uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), Indices); // See if all uses can be converted. diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 6e169de..f3184ec 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -999,7 +999,7 @@ struct FFSOpt : public LibCallOptimization { Type *ArgType = Op->getType(); Value *F = Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType); - Value *V = B.CreateCall(F, Op, "cttz"); + Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz"); V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1)); V = B.CreateIntCast(V, B.getInt32Ty(), false); @@ -1293,7 +1293,8 @@ struct FWriteOpt : public LibCallOptimization { return ConstantInt::get(CI->getType(), 0); // If this is writing one byte, turn it into fputc. - if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F) + // This optimisation is only valid, if the return value is unused. + if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); EmitFPutC(Char, CI->getArgOperand(3), B, TD); return ConstantInt::get(CI->getType(), 1); diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index c83f56c..ef65c0a 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CFG.h" @@ -240,7 +241,7 @@ bool Sinking::SinkInstruction(Instruction *Inst, if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. - if (!Inst->isSafeToSpeculativelyExecute()) { + if (!isSafeToSpeculativelyExecute(Inst)) { DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n"); return false; } |