diff options
author | Nowar Gu <nowar100@gmail.com> | 2011-07-01 23:28:45 +0800 |
---|---|---|
committer | Nowar Gu <nowar100@gmail.com> | 2011-07-01 23:37:27 +0800 |
commit | 53d48080e55bf0c99cb7ca9de5b15a084d7324b5 (patch) | |
tree | 98f4e257a61eebb14933d37ddc16678da0a7069d /lib/Transforms | |
parent | 039a79eb418211573bada57ec3a1edf5a9d6071e (diff) | |
parent | ed5bc470aab7097c30e5f881158112f7830472f3 (diff) | |
download | external_llvm-53d48080e55bf0c99cb7ca9de5b15a084d7324b5.zip external_llvm-53d48080e55bf0c99cb7ca9de5b15a084d7324b5.tar.gz external_llvm-53d48080e55bf0c99cb7ca9de5b15a084d7324b5.tar.bz2 |
Merge upstream to r134237 at Fri. 1st July 2011.
Conflicts:
lib/Target/ARM/ARMCodeEmitter.cpp
Diffstat (limited to 'lib/Transforms')
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineSelect.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/Scalar/IndVarSimplify.cpp | 185 | ||||
-rw-r--r-- | lib/Transforms/Scalar/JumpThreading.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 20 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/Scalar/ScalarReplAggregates.cpp | 52 | ||||
-rw-r--r-- | lib/Transforms/Utils/Local.cpp | 24 | ||||
-rw-r--r-- | lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 17 | ||||
-rw-r--r-- | lib/Transforms/Utils/SimplifyCFG.cpp | 2 |
9 files changed, 180 insertions, 130 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index aeb3c3e..5733c20 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -796,7 +796,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // So at this point we know we have (Y -> OtherAddOp): // select C, (add X, Y), (sub X, Z) Value *NegVal; // Compute -Z - if (SI.getType()->isFloatingPointTy()) { + if (SI.getType()->isFPOrFPVectorTy()) { NegVal = Builder->CreateFNeg(SubOp->getOperand(1)); } else { NegVal = Builder->CreateNeg(SubOp->getOperand(1)); @@ -810,7 +810,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Builder->CreateSelect(CondVal, NewTrueOp, NewFalseOp, SI.getName() + ".p"); - if (SI.getType()->isFloatingPointTy()) + if (SI.getType()->isFPOrFPVectorTy()) return BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel); else return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel); diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 1d79339..77642e5 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -52,6 +52,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" @@ -72,11 +73,9 @@ STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); -// DisableIVRewrite mode currently affects IVUsers, so is defined in libAnalysis -// and referenced here. -namespace llvm { - extern bool DisableIVRewrite; -} +static cl::opt<bool> DisableIVRewrite( + "disable-iv-rewrite", cl::Hidden, + cl::desc("Disable canonical induction variable rewriting")); namespace { class IndVarSimplify : public LoopPass { @@ -86,21 +85,13 @@ namespace { DominatorTree *DT; TargetData *TD; - PHINode *CurrIV; // Current IV being simplified. - - // Instructions processed by SimplifyIVUsers for CurrIV. - SmallPtrSet<Instruction*,16> Simplified; - - // Use-def pairs if IVUsers waiting to be processed for CurrIV. - SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers; - SmallVector<WeakVH, 16> DeadInsts; bool Changed; public: static char ID; // Pass identification, replacement for typeid IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0), - CurrIV(0), Changed(false) { + Changed(false) { initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry()); } @@ -112,7 +103,8 @@ namespace { AU.addRequired<ScalarEvolution>(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); - AU.addRequired<IVUsers>(); + if (!DisableIVRewrite) + AU.addRequired<IVUsers>(); AU.addPreserved<ScalarEvolution>(); AU.addPreservedID(LoopSimplifyID); AU.addPreservedID(LCSSAID); @@ -132,7 +124,6 @@ namespace { void EliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, bool IsSigned); - void pushIVUsers(Instruction *Def); bool isSimpleIVUser(Instruction *I, const Loop *L); void RewriteNonIntegerIVs(Loop *L); @@ -618,8 +609,7 @@ protected: const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse); - Instruction *WidenIVUse(Instruction *NarrowUse, - Instruction *NarrowDef, + Instruction *WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, Instruction *WideDef); }; } // anonymous namespace @@ -669,9 +659,11 @@ Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse, LHS, RHS, NarrowBO->getName()); Builder.Insert(WideBO); - if (NarrowBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap(); - if (NarrowBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap(); - + if (const OverflowingBinaryOperator *OBO = + dyn_cast<OverflowingBinaryOperator>(NarrowBO)) { + if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap(); + if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap(); + } return WideBO; } llvm_unreachable(0); @@ -733,9 +725,10 @@ static bool HoistStep(Instruction *IncV, Instruction *InsertPos, /// WidenIVUse - Determine whether an individual user of the narrow IV can be /// widened. If so, return the wide clone of the user. -Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, - Instruction *NarrowDef, +Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, Instruction *WideDef) { + Instruction *NarrowUse = cast<Instruction>(NarrowDefUse.getUser()); + // To be consistent with IVUsers, stop traversing the def-use chain at // inner-loop phis or post-loop phis. if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L) @@ -753,7 +746,7 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, unsigned IVWidth = SE->getTypeSizeInBits(WideType); if (CastWidth < IVWidth) { // The cast isn't as wide as the IV, so insert a Trunc. - IRBuilder<> Builder(NarrowUse); + IRBuilder<> Builder(NarrowDefUse); NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType()); } else { @@ -787,11 +780,15 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, // This user does not evaluate to a recurence after widening, so don't // follow it. Instead insert a Trunc to kill off the original use, // eventually isolating the original narrow IV so it can be removed. - IRBuilder<> Builder(NarrowUse); + IRBuilder<> Builder(NarrowDefUse); Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType()); NarrowUse->replaceUsesOfWith(NarrowDef, Trunc); return 0; } + // We assume that block terminators are not SCEVable. + assert(NarrowUse != NarrowUse->getParent()->getTerminator() && + "can't split terminators"); + // Reuse the IV increment that SCEVExpander created as long as it dominates // NarrowUse. Instruction *WideUse = 0; @@ -885,20 +882,20 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WidePhi)); } while (!NarrowIVUsers.empty()) { - Use *NarrowDefUse; + Use *UsePtr; Instruction *WideDef; - tie(NarrowDefUse, WideDef) = NarrowIVUsers.pop_back_val(); + tie(UsePtr, WideDef) = NarrowIVUsers.pop_back_val(); + Use &NarrowDefUse = *UsePtr; // Process a def-use edge. This may replace the use, so don't hold a // use_iterator across it. - Instruction *NarrowDef = cast<Instruction>(NarrowDefUse->get()); - Instruction *NarrowUse = cast<Instruction>(NarrowDefUse->getUser()); - Instruction *WideUse = WidenIVUse(NarrowUse, NarrowDef, WideDef); + Instruction *NarrowDef = cast<Instruction>(NarrowDefUse.get()); + Instruction *WideUse = WidenIVUse(NarrowDefUse, NarrowDef, WideDef); // Follow all def-use edges from the previous narrow use. if (WideUse) { - for (Value::use_iterator UI = NarrowUse->use_begin(), - UE = NarrowUse->use_end(); UI != UE; ++UI) { + for (Value::use_iterator UI = NarrowDefUse.getUser()->use_begin(), + UE = NarrowDefUse.getUser()->use_end(); UI != UE; ++UI) { NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideUse)); } } @@ -1016,12 +1013,13 @@ bool IndVarSimplify::EliminateIVUser(Instruction *UseInst, // Eliminate any operation that SCEV can prove is an identity function. if (!SE->isSCEVable(UseInst->getType()) || + (UseInst->getType() != IVOperand->getType()) || (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) return false; - UseInst->replaceAllUsesWith(IVOperand); - DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); + + UseInst->replaceAllUsesWith(IVOperand); ++NumElimIdentity; Changed = true; DeadInsts.push_back(UseInst); @@ -1030,7 +1028,10 @@ bool IndVarSimplify::EliminateIVUser(Instruction *UseInst, /// pushIVUsers - Add all uses of Def to the current IV's worklist. /// -void IndVarSimplify::pushIVUsers(Instruction *Def) { +static void pushIVUsers( + Instruction *Def, + SmallPtrSet<Instruction*,16> &Simplified, + SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) { for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end(); UI != E; ++UI) { @@ -1038,7 +1039,9 @@ void IndVarSimplify::pushIVUsers(Instruction *Def) { // Avoid infinite or exponential worklist processing. // Also ensure unique worklist users. - if (Simplified.insert(User)) + // If Def is a LoopPhi, it may not be in the Simplified set, so check for + // self edges first. + if (User != Def && Simplified.insert(User)) SimpleIVUsers.push_back(std::make_pair(User, Def)); } } @@ -1056,6 +1059,10 @@ bool IndVarSimplify::isSimpleIVUser(Instruction *I, const Loop *L) { // Get the symbolic expression for this instruction. const SCEV *S = SE->getSCEV(I); + // We assume that terminators are not SCEVable. + assert((!S || I != I->getParent()->getTerminator()) && + "can't fold terminators"); + // Only consider affine recurrences. const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S); if (AR && AR->getLoop() == L) @@ -1079,50 +1086,75 @@ bool IndVarSimplify::isSimpleIVUser(Instruction *I, const Loop *L) { /// Once DisableIVRewrite is default, LSR will be the only client of IVUsers. /// void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) { - // Simplification is performed independently for each IV, as represented by a - // loop header phi. Each round of simplification first iterates through the - // SimplifyIVUsers worklist, then determines whether the current IV should be - // widened. Widening adds a new phi to LoopPhis, inducing another round of - // simplification on the wide IV. + std::map<PHINode *, WideIVInfo> WideIVMap; + SmallVector<PHINode*, 8> LoopPhis; for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { LoopPhis.push_back(cast<PHINode>(I)); } + // Each round of simplification iterates through the SimplifyIVUsers worklist + // for all current phis, then determines whether any IVs can be + // widened. Widening adds new phis to LoopPhis, inducing another round of + // simplification on the wide IVs. while (!LoopPhis.empty()) { - CurrIV = LoopPhis.pop_back_val(); - Simplified.clear(); - assert(SimpleIVUsers.empty() && "expect empty IV users list"); - - WideIVInfo WI; - - pushIVUsers(CurrIV); - - while (!SimpleIVUsers.empty()) { - Instruction *UseInst, *Operand; - tie(UseInst, Operand) = SimpleIVUsers.pop_back_val(); - - if (EliminateIVUser(UseInst, Operand)) { - pushIVUsers(Operand); - continue; - } - if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) { - bool IsSigned = Cast->getOpcode() == Instruction::SExt; - if (IsSigned || Cast->getOpcode() == Instruction::ZExt) { - CollectExtend(Cast, IsSigned, WI, SE, TD); + // Evaluate as many IV expressions as possible before widening any IVs. This + // forces SCEV to set no-wrap flags before evaluating sign/zero + // extension. The first time SCEV attempts to normalize sign/zero extension, + // the result becomes final. So for the most predictable results, we delay + // evaluation of sign/zero extend evaluation until needed, and avoid running + // other SCEV based analysis prior to SimplifyIVUsersNoRewrite. + do { + PHINode *CurrIV = LoopPhis.pop_back_val(); + + // Information about sign/zero extensions of CurrIV. + WideIVInfo WI; + + // Instructions processed by SimplifyIVUsers for CurrIV. + SmallPtrSet<Instruction*,16> Simplified; + + // Use-def pairs if IVUsers waiting to be processed for CurrIV. + SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers; + + // Push users of the current LoopPhi. In rare cases, pushIVUsers may be + // called multiple times for the same LoopPhi. This is the proper thing to + // do for loop header phis that use each other. + pushIVUsers(CurrIV, Simplified, SimpleIVUsers); + + while (!SimpleIVUsers.empty()) { + Instruction *UseInst, *Operand; + tie(UseInst, Operand) = SimpleIVUsers.pop_back_val(); + // Bypass back edges to avoid extra work. + if (UseInst == CurrIV) continue; + + if (EliminateIVUser(UseInst, Operand)) { + pushIVUsers(Operand, Simplified, SimpleIVUsers); + continue; + } + if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) { + bool IsSigned = Cast->getOpcode() == Instruction::SExt; + if (IsSigned || Cast->getOpcode() == Instruction::ZExt) { + CollectExtend(Cast, IsSigned, WI, SE, TD); + } + continue; + } + if (isSimpleIVUser(UseInst, L)) { + pushIVUsers(UseInst, Simplified, SimpleIVUsers); } - continue; } - if (isSimpleIVUser(UseInst, L)) { - pushIVUsers(UseInst); + if (WI.WidestNativeType) { + WideIVMap[CurrIV] = WI; } - } - if (WI.WidestNativeType) { - WidenIV Widener(CurrIV, WI, LI, SE, DT, DeadInsts); + } while(!LoopPhis.empty()); + + for (std::map<PHINode *, WideIVInfo>::const_iterator I = WideIVMap.begin(), + E = WideIVMap.end(); I != E; ++I) { + WidenIV Widener(I->first, I->second, LI, SE, DT, DeadInsts); if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) { Changed = true; LoopPhis.push_back(WidePhi); } } + WideIVMap.clear(); } } @@ -1145,8 +1177,6 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { DT = &getAnalysis<DominatorTree>(); TD = getAnalysisIfAvailable<TargetData>(); - CurrIV = NULL; - Simplified.clear(); DeadInsts.clear(); Changed = false; @@ -1157,9 +1187,18 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); // Create a rewriter object which we'll use to transform the code with. - SCEVExpander Rewriter(*SE); - if (DisableIVRewrite) + SCEVExpander Rewriter(*SE, "indvars"); + + // Eliminate redundant IV users. + // + // Simplification works best when run before other consumers of SCEV. We + // attempt to avoid evaluating SCEVs for sign/zero extend operations until + // other expressions involving loop IVs have been evaluated. This helps SCEV + // set no-wrap flags before normalizing sign/zero extension. + if (DisableIVRewrite) { Rewriter.disableCanonicalMode(); + SimplifyIVUsersNoRewrite(L, Rewriter); + } // Check to see if this loop has a computable loop-invariant execution count. // If so, this means that we can compute the final value of any expressions @@ -1171,9 +1210,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { RewriteLoopExitValues(L, Rewriter); // Eliminate redundant IV users. - if (DisableIVRewrite) - SimplifyIVUsersNoRewrite(L, Rewriter); - else + if (!DisableIVRewrite) SimplifyIVUsers(Rewriter); // Compute the type of the largest recurrence expression, and decide whether diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index cf18ff0..b500d5b 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -600,8 +600,10 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) { TestBB = BBTerm->getSuccessor(i); unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); - if (NumPreds < MinNumPreds) + if (NumPreds < MinNumPreds) { MinSucc = i; + MinNumPreds = NumPreds; + } } return MinSucc; diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index dbf6eec..a7bc0e0 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -167,7 +167,7 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) { if (Instruction *I = dyn_cast<Instruction>(V)) if (isInstructionTriviallyDead(I)) - deleteDeadInstruction(I, SE); + deleteDeadInstruction(I, SE); } bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { @@ -467,8 +467,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // header. This allows us to insert code for it in the preheader. BasicBlock *Preheader = CurLoop->getLoopPreheader(); IRBuilder<> Builder(Preheader->getTerminator()); - SCEVExpander Expander(*SE); - + SCEVExpander Expander(*SE, "loop-idiom"); + // Okay, we have a strided store "p[i]" of a splattable value. We can turn // this into a memset in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read @@ -488,7 +488,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, deleteIfDeadInstruction(BasePtr, *SE); return false; } - + // Okay, everything looks good, insert the memset. // The # stored bytes is (BECount+1)*Size. Expand the trip count out to @@ -556,8 +556,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // header. This allows us to insert code for it in the preheader. BasicBlock *Preheader = CurLoop->getLoopPreheader(); IRBuilder<> Builder(Preheader->getTerminator()); - SCEVExpander Expander(*SE); - + SCEVExpander Expander(*SE, "loop-idiom"); + // Okay, we have a strided store "p[i]" of a loaded value. We can turn // this into a memcpy in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read @@ -568,7 +568,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, Expander.expandCodeFor(StoreEv->getStart(), Builder.getInt8PtrTy(SI->getPointerAddressSpace()), Preheader->getTerminator()); - + if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef, CurLoop, BECount, StoreSize, getAnalysis<AliasAnalysis>(), SI)) { @@ -593,9 +593,9 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, deleteIfDeadInstruction(StoreBasePtr, *SE); return false; } - + // Okay, everything is safe, we can transform this! - + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. @@ -619,7 +619,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); - + // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index afa0bf8..c6ca99a 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -3698,7 +3698,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, // we can remove them after we are done working. SmallVector<WeakVH, 16> DeadInsts; - SCEVExpander Rewriter(SE); + SCEVExpander Rewriter(SE, "lsr"); Rewriter.disableCanonicalMode(); Rewriter.setIVIncInsertPos(L, IVIncInsertPos); diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 46ac948..87e364d 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -152,7 +152,8 @@ namespace { void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts); - static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI); + static MemTransferInst *isOnlyCopiedFromConstantGlobal( + AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete); }; // SROA_DT - SROA that uses DominatorTree. @@ -1302,7 +1303,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { LoadInst *TrueLoad = Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t"); LoadInst *FalseLoad = - Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t"); + Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f"); // Transfer alignment and TBAA info if present. TrueLoad->setAlignment(LI->getAlignment()); @@ -1443,8 +1444,8 @@ static bool ShouldAttemptScalarRepl(AllocaInst *AI) { // performScalarRepl - This algorithm is a simple worklist driven algorithm, -// which runs on all of the malloc/alloca instructions in the function, removing -// them if they are only used by getelementptr instructions. +// which runs on all of the alloca instructions in the function, removing them +// if they are only used by getelementptr instructions. // bool SROA::performScalarRepl(Function &F) { std::vector<AllocaInst*> WorkList; @@ -1478,12 +1479,15 @@ bool SROA::performScalarRepl(Function &F) { // the constant global instead. This is commonly produced by the CFE by // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' // is only subsequently read. - if (MemTransferInst *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { + SmallVector<Instruction *, 4> ToDelete; + if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) { DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); - DEBUG(dbgs() << " memcpy = " << *TheCopy << '\n'); - Constant *TheSrc = cast<Constant>(TheCopy->getSource()); + DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); + for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) + ToDelete[i]->eraseFromParent(); + Constant *TheSrc = cast<Constant>(Copy->getSource()); AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); - TheCopy->eraseFromParent(); // Don't mutate the global. + Copy->eraseFromParent(); // Don't mutate the global. AI->eraseFromParent(); ++NumGlobals; Changed = true; @@ -2507,8 +2511,14 @@ static bool PointsToConstantGlobal(Value *V) { /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to /// the alloca, and if the source pointer is a pointer to a constant global, we /// can optimize this. -static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, - bool isOffset) { +static bool +isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, + bool isOffset, + SmallVector<Instruction *, 4> &LifetimeMarkers) { + // We track lifetime intrinsics as we encounter them. If we decide to go + // ahead and replace the value with the global, this lets the caller quickly + // eliminate the markers. + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { User *U = cast<Instruction>(*UI); @@ -2520,7 +2530,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { // If uses of the bitcast are ok, we are ok. - if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset)) + if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset, + LifetimeMarkers)) return false; continue; } @@ -2528,7 +2539,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, // If the GEP has all zero indices, it doesn't offset the pointer. If it // doesn't, it does. if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, - isOffset || !GEP->hasAllZeroIndices())) + isOffset || !GEP->hasAllZeroIndices(), + LifetimeMarkers)) return false; continue; } @@ -2554,6 +2566,16 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, continue; } + // Lifetime intrinsics can be handled by the caller. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + assert(II->use_empty() && "Lifetime markers have no result to use!"); + LifetimeMarkers.push_back(II); + continue; + } + } + // If this is isn't our memcpy/memmove, reject it as something we can't // handle. MemTransferInst *MI = dyn_cast<MemTransferInst>(U); @@ -2590,9 +2612,11 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only /// modified by a copy from a constant global. If we can prove this, we can /// replace any uses of the alloca with uses of the global directly. -MemTransferInst *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) { +MemTransferInst * +SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI, + SmallVector<Instruction*, 4> &ToDelete) { MemTransferInst *TheCopy = 0; - if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false)) + if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete)) return TheCopy; return 0; } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 506e5e8..0f6d9ae 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -536,9 +536,9 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { /// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an /// unconditional branch, and contains no instructions other than PHI nodes, -/// potential debug intrinsics and the branch. If possible, eliminate BB by -/// rewriting all the predecessors to branch to the successor block and return -/// true. If we can't transform, return false. +/// potential side-effect free intrinsics and the branch. If possible, +/// eliminate BB by rewriting all the predecessors to branch to the successor +/// block and return true. If we can't transform, return false. bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { assert(BB != &BB->getParent()->getEntryBlock() && "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!"); @@ -613,13 +613,15 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { } } - while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { - if (Succ->getSinglePredecessor()) { - // BB is the only predecessor of Succ, so Succ will end up with exactly - // the same predecessors BB had. - Succ->getInstList().splice(Succ->begin(), - BB->getInstList(), BB->begin()); - } else { + if (Succ->getSinglePredecessor()) { + // BB is the only predecessor of Succ, so Succ will end up with exactly + // the same predecessors BB had. + + // Copy over any phi, debug or lifetime instruction. + BB->getTerminator()->eraseFromParent(); + Succ->getInstList().splice(Succ->getFirstNonPHI(), BB->getInstList()); + } else { + while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. assert(PN->use_empty() && "There shouldn't be any uses here!"); PN->eraseFromParent(); @@ -642,7 +644,7 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { bool Changed = false; // This implementation doesn't currently consider undef operands - // specially. Theroetically, two phis which are identical except for + // specially. Theoretically, two phis which are identical except for // one having an undef where the other doesn't could be collapsed. // Map from PHI hash values to PHI nodes. If multiple PHIs have diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 32d1dcc..e5a00f4 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -38,6 +38,7 @@ #include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -73,22 +74,6 @@ struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { }; } -/// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer -/// are lifetime markers. -/// -static bool onlyUsedByLifetimeMarkers(const Value *V) { - for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); - UI != UE; ++UI) { - const IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI); - if (!II) return false; - - if (II->getIntrinsicID() != Intrinsic::lifetime_start && - II->getIntrinsicID() != Intrinsic::lifetime_end) - return false; - } - return true; -} - /// isAllocaPromotable - Return true if this alloca is legal for promotion. /// This is true if there are only loads and stores to the alloca. /// diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 7b93b4a..49726d5 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2604,7 +2604,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ BasicBlock *BB = BI->getParent(); // If the Terminator is the only non-phi instruction, simplify the block. - BasicBlock::iterator I = BB->getFirstNonPHIOrDbg(); + BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; |