diff options
author | Nowar Gu <nowar100@gmail.com> | 2011-06-17 14:29:24 +0800 |
---|---|---|
committer | Nowar Gu <nowar100@gmail.com> | 2011-06-20 15:49:07 +0800 |
commit | 907af0f20f58f2ea26da7ea64e1f094cd6880db7 (patch) | |
tree | 02007757de416c561df174d582205cebfa582801 /lib/Analysis | |
parent | 1d4f9a57447faa0142a1d0301e5ce550cfe60c4f (diff) | |
parent | ec324e5ae44025c6bdb930b78198f30f807e355b (diff) | |
download | external_llvm-907af0f20f58f2ea26da7ea64e1f094cd6880db7.zip external_llvm-907af0f20f58f2ea26da7ea64e1f094cd6880db7.tar.gz external_llvm-907af0f20f58f2ea26da7ea64e1f094cd6880db7.tar.bz2 |
Merge upstream to r133240 at Fri. 17th Jun 2011.
Conflicts:
lib/CodeGen/AsmPrinter/AsmPrinter.cpp
lib/Target/ARM/ARMCodeEmitter.cpp
Diffstat (limited to 'lib/Analysis')
31 files changed, 1403 insertions, 335 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index be02ddb..c189a00 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -86,14 +86,20 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS, if (onlyAccessesArgPointees(MRB)) { bool doesAlias = false; - if (doesAccessArgPointees(MRB)) + if (doesAccessArgPointees(MRB)) { + MDNode *CSTag = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa); for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); - AI != AE; ++AI) - if (!isNoAlias(Location(*AI), Loc)) { + AI != AE; ++AI) { + const Value *Arg = *AI; + if (!Arg->getType()->isPointerTy()) + continue; + Location CSLoc(Arg, UnknownSize, CSTag); + if (!isNoAlias(CSLoc, Loc)) { doesAlias = true; break; } - + } + } if (!doesAlias) return NoModRef; } @@ -138,13 +144,19 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { // CS2's arguments. if (onlyAccessesArgPointees(CS2B)) { AliasAnalysis::ModRefResult R = NoModRef; - if (doesAccessArgPointees(CS2B)) + if (doesAccessArgPointees(CS2B)) { + MDNode *CS2Tag = CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa); for (ImmutableCallSite::arg_iterator I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) { - R = ModRefResult((R | getModRefInfo(CS1, *I, UnknownSize)) & Mask); + const Value *Arg = *I; + if (!Arg->getType()->isPointerTy()) + continue; + Location CS2Loc(Arg, UnknownSize, CS2Tag); + R = ModRefResult((R | getModRefInfo(CS1, CS2Loc)) & Mask); if (R == Mask) break; } + } return R; } @@ -152,13 +164,20 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { // any of the memory referenced by CS1's arguments. If not, return NoModRef. if (onlyAccessesArgPointees(CS1B)) { AliasAnalysis::ModRefResult R = NoModRef; - if (doesAccessArgPointees(CS1B)) + if (doesAccessArgPointees(CS1B)) { + MDNode *CS1Tag = CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa); for (ImmutableCallSite::arg_iterator - I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) - if (getModRefInfo(CS2, *I, UnknownSize) != NoModRef) { + I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) { + const Value *Arg = *I; + if (!Arg->getType()->isPointerTy()) + continue; + Location CS1Loc(Arg, UnknownSize, CS1Tag); + if (getModRefInfo(CS2, CS1Loc) != NoModRef) { R = Mask; break; } + } + } if (R == NoModRef) return R; } diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 3a46976..2ed6949 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -602,6 +602,10 @@ void AliasSetTracker::ASTCallbackVH::deleted() { // this now dangles! } +void AliasSetTracker::ASTCallbackVH::allUsesReplacedWith(Value *V) { + AST->copyValue(getValPtr(), V); +} + AliasSetTracker::ASTCallbackVH::ASTCallbackVH(Value *V, AliasSetTracker *ast) : CallbackVH(V), AST(ast) {} diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 6ebe100..e57ba78 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -23,6 +23,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeAliasSetPrinterPass(Registry); initializeNoAAPass(Registry); initializeBasicAliasAnalysisPass(Registry); + initializeBranchProbabilityInfoPass(Registry); initializeCFGViewerPass(Registry); initializeCFGPrinterPass(Registry); initializeCFGOnlyViewerPass(Registry); diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index f7bcd9e..8330ea7 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -281,17 +281,20 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, continue; } - if (const Instruction *I = dyn_cast<Instruction>(V)) - // TODO: Get a DominatorTree and use it here. - if (const Value *Simplified = - SimplifyInstruction(const_cast<Instruction *>(I), TD)) { - V = Simplified; - continue; - } - const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op); - if (GEPOp == 0) + if (GEPOp == 0) { + // If it's not a GEP, hand it off to SimplifyInstruction to see if it + // can come up with something. This matches what GetUnderlyingObject does. + if (const Instruction *I = dyn_cast<Instruction>(V)) + // TODO: Get a DominatorTree and use it here. + if (const Value *Simplified = + SimplifyInstruction(const_cast<Instruction *>(I), TD)) { + V = Simplified; + continue; + } + return V; + } // Don't attempt to analyze GEPs over unsized objects. if (!cast<PointerType>(GEPOp->getOperand(0)->getType()) @@ -350,7 +353,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, Scale *= IndexScale.getSExtValue(); - // If we already had an occurrance of this index variable, merge this + // If we already had an occurrence of this index variable, merge this // scale into it. For example, we want to handle: // A[x][x] -> x*16 + x*4 -> x*20 // This also ensures that 'x' only appears in the index list once. @@ -448,7 +451,13 @@ namespace { /// BasicAliasAnalysis - This is the primary alias analysis implementation. struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis { static char ID; // Class identification, replacement for typeinfo - BasicAliasAnalysis() : ImmutablePass(ID) { + BasicAliasAnalysis() : ImmutablePass(ID), + // AliasCache rarely has more than 1 or 2 elements, + // so start it off fairly small so that clear() + // doesn't have to tromp through 64 (the default) + // elements on each alias query. This really wants + // something like a SmallDenseMap. + AliasCache(8) { initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry()); } @@ -462,12 +471,12 @@ namespace { virtual AliasResult alias(const Location &LocA, const Location &LocB) { - assert(Visited.empty() && "Visited must be cleared after use!"); + assert(AliasCache.empty() && "AliasCache must be cleared after use!"); assert(notDifferentParent(LocA.Ptr, LocB.Ptr) && "BasicAliasAnalysis doesn't support interprocedural queries."); AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag, LocB.Ptr, LocB.Size, LocB.TBAATag); - Visited.clear(); + AliasCache.clear(); return Alias; } @@ -503,7 +512,12 @@ namespace { } private: - // Visited - Track instructions visited by a aliasPHI, aliasSelect(), and aliasGEP(). + // AliasCache - Track alias queries to guard against recursion. + typedef std::pair<Location, Location> LocPair; + typedef DenseMap<LocPair, AliasResult> AliasCacheTy; + AliasCacheTy AliasCache; + + // Visited - Track instructions visited by pointsToConstantMemory. SmallPtrSet<const Value*, 16> Visited; // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP @@ -680,9 +694,12 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, unsigned ArgNo = 0; for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); CI != CE; ++CI, ++ArgNo) { - // Only look at the no-capture pointer arguments. + // Only look at the no-capture or byval pointer arguments. If this + // pointer were passed to arguments that were neither of these, then it + // couldn't be no-capture. if (!(*CI)->getType()->isPointerTy() || - !CS.paramHasAttr(ArgNo+1, Attribute::NoCapture)) + (!CS.paramHasAttr(ArgNo+1, Attribute::NoCapture) && + !CS.paramHasAttr(ArgNo+1, Attribute::ByVal))) continue; // If this is a no-capture pointer argument, see if we can tell that it @@ -779,6 +796,26 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, return NoModRef; break; } + case Intrinsic::arm_neon_vld1: { + // LLVM's vld1 and vst1 intrinsics currently only support a single + // vector register. + uint64_t Size = + TD ? TD->getTypeStoreSize(II->getType()) : UnknownSize; + if (isNoAlias(Location(II->getArgOperand(0), Size, + II->getMetadata(LLVMContext::MD_tbaa)), + Loc)) + return NoModRef; + break; + } + case Intrinsic::arm_neon_vst1: { + uint64_t Size = + TD ? TD->getTypeStoreSize(II->getArgOperand(1)->getType()) : UnknownSize; + if (isNoAlias(Location(II->getArgOperand(0), Size, + II->getMetadata(LLVMContext::MD_tbaa)), + Loc)) + return NoModRef; + break; + } } // The AliasAnalysis base class has some smarts, lets use them. @@ -796,13 +833,6 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, const MDNode *V2TBAAInfo, const Value *UnderlyingV1, const Value *UnderlyingV2) { - // If this GEP has been visited before, we're on a use-def cycle. - // Such cycles are only valid when PHI nodes are involved or in unreachable - // code. The visitPHI function catches cycles containing PHIs, but there - // could still be a cycle without PHIs in unreachable code. - if (!Visited.insert(GEP1)) - return MayAlias; - int64_t GEP1BaseOffset; SmallVector<VariableGEPIndex, 4> GEP1VariableIndices; @@ -883,7 +913,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty()) return MustAlias; - // If there is a difference betwen the pointers, but the difference is + // If there is a difference between the pointers, but the difference is // less than the size of the associated memory object, then we know // that the objects are partially overlapping. if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) { @@ -920,7 +950,30 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, return NoAlias; } - return MayAlias; + // Statically, we can see that the base objects are the same, but the + // pointers have dynamic offsets which we can't resolve. And none of our + // little tricks above worked. + // + // TODO: Returning PartialAlias instead of MayAlias is a mild hack; the + // practical effect of this is protecting TBAA in the case of dynamic + // indices into arrays of unions. An alternative way to solve this would + // be to have clang emit extra metadata for unions and/or union accesses. + // A union-specific solution wouldn't handle the problem for malloc'd + // memory however. + return PartialAlias; +} + +static AliasAnalysis::AliasResult +MergeAliasResults(AliasAnalysis::AliasResult A, AliasAnalysis::AliasResult B) { + // If the results agree, take it. + if (A == B) + return A; + // A mix of PartialAlias and MustAlias is PartialAlias. + if ((A == AliasAnalysis::PartialAlias && B == AliasAnalysis::MustAlias) || + (B == AliasAnalysis::PartialAlias && A == AliasAnalysis::MustAlias)) + return AliasAnalysis::PartialAlias; + // Otherwise, we don't know anything. + return AliasAnalysis::MayAlias; } /// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select @@ -930,13 +983,6 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize, const MDNode *SITBAAInfo, const Value *V2, uint64_t V2Size, const MDNode *V2TBAAInfo) { - // If this select has been visited before, we're on a use-def cycle. - // Such cycles are only valid when PHI nodes are involved or in unreachable - // code. The visitPHI function catches cycles containing PHIs, but there - // could still be a cycle without PHIs in unreachable code. - if (!Visited.insert(SI)) - return MayAlias; - // If the values are Selects with the same condition, we can do a more precise // check: just check for aliases between the values on corresponding arms. if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) @@ -949,9 +995,7 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize, AliasResult ThisAlias = aliasCheck(SI->getFalseValue(), SISize, SITBAAInfo, SI2->getFalseValue(), V2Size, V2TBAAInfo); - if (ThisAlias != Alias) - return MayAlias; - return Alias; + return MergeAliasResults(ThisAlias, Alias); } // If both arms of the Select node NoAlias or MustAlias V2, then returns @@ -961,16 +1005,9 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize, if (Alias == MayAlias) return MayAlias; - // If V2 is visited, the recursive case will have been caught in the - // above aliasCheck call, so these subsequent calls to aliasCheck - // don't need to assume that V2 is being visited recursively. - Visited.erase(V2); - AliasResult ThisAlias = aliasCheck(V2, V2Size, V2TBAAInfo, SI->getFalseValue(), SISize, SITBAAInfo); - if (ThisAlias != Alias) - return MayAlias; - return Alias; + return MergeAliasResults(ThisAlias, Alias); } // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction @@ -980,10 +1017,6 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, const MDNode *PNTBAAInfo, const Value *V2, uint64_t V2Size, const MDNode *V2TBAAInfo) { - // The PHI node has already been visited, avoid recursion any further. - if (!Visited.insert(PN)) - return MayAlias; - // If the values are PHIs in the same block, we can do a more precise // as well as efficient check: just check for aliases between the values // on corresponding edges. @@ -1000,8 +1033,9 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo, PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), V2Size, V2TBAAInfo); - if (ThisAlias != Alias) - return MayAlias; + Alias = MergeAliasResults(ThisAlias, Alias); + if (Alias == MayAlias) + break; } return Alias; } @@ -1032,15 +1066,11 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) { Value *V = V1Srcs[i]; - // If V2 is visited, the recursive case will have been caught in the - // above aliasCheck call, so these subsequent calls to aliasCheck - // don't need to assume that V2 is being visited recursively. - Visited.erase(V2); - AliasResult ThisAlias = aliasCheck(V2, V2Size, V2TBAAInfo, V, PNSize, PNTBAAInfo); - if (ThisAlias != Alias || ThisAlias == MayAlias) - return MayAlias; + Alias = MergeAliasResults(ThisAlias, Alias); + if (Alias == MayAlias) + break; } return Alias; @@ -1125,6 +1155,17 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD))) return NoAlias; + // Check the cache before climbing up use-def chains. This also terminates + // otherwise infinitely recursive queries. + LocPair Locs(Location(V1, V1Size, V1TBAAInfo), + Location(V2, V2Size, V2TBAAInfo)); + if (V1 > V2) + std::swap(Locs.first, Locs.second); + std::pair<AliasCacheTy::iterator, bool> Pair = + AliasCache.insert(std::make_pair(Locs, MayAlias)); + if (!Pair.second) + return Pair.first->second; + // FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the // GEP can't simplify, we don't even look at the PHI cases. if (!isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) { @@ -1134,7 +1175,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, } if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) { AliasResult Result = aliasGEP(GV1, V1Size, V2, V2Size, V2TBAAInfo, O1, O2); - if (Result != MayAlias) return Result; + if (Result != MayAlias) return AliasCache[Locs] = Result; } if (isa<PHINode>(V2) && !isa<PHINode>(V1)) { @@ -1144,7 +1185,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, if (const PHINode *PN = dyn_cast<PHINode>(V1)) { AliasResult Result = aliasPHI(PN, V1Size, V1TBAAInfo, V2, V2Size, V2TBAAInfo); - if (Result != MayAlias) return Result; + if (Result != MayAlias) return AliasCache[Locs] = Result; } if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) { @@ -1154,7 +1195,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) { AliasResult Result = aliasSelect(S1, V1Size, V1TBAAInfo, V2, V2Size, V2TBAAInfo); - if (Result != MayAlias) return Result; + if (Result != MayAlias) return AliasCache[Locs] = Result; } // If both pointers are pointing into the same object and one of them @@ -1163,8 +1204,10 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, if (TD && O1 == O2) if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD)) || (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD))) - return PartialAlias; + return AliasCache[Locs] = PartialAlias; - return AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo), - Location(V2, V2Size, V2TBAAInfo)); + AliasResult Result = + AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo), + Location(V2, V2Size, V2TBAAInfo)); + return AliasCache[Locs] = Result; } diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp new file mode 100644 index 0000000..15059c7 --- /dev/null +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -0,0 +1,358 @@ +//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loops should be simplified before this analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Instructions.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob", + "Branch Probability Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob", + "Branch Probability Analysis", false, true) + +char BranchProbabilityInfo::ID = 0; + +namespace { +// Please note that BranchProbabilityAnalysis is not a FunctionPass. +// It is created by BranchProbabilityInfo (which is a FunctionPass), which +// provides a clear interface. Thanks to that, all heuristics and other +// private methods are hidden in the .cpp file. +class BranchProbabilityAnalysis { + + typedef std::pair<BasicBlock *, BasicBlock *> Edge; + + DenseMap<Edge, uint32_t> *Weights; + + BranchProbabilityInfo *BP; + + LoopInfo *LI; + + + // Weights are for internal use only. They are used by heuristics to help to + // estimate edges' probability. Example: + // + // Using "Loop Branch Heuristics" we predict weights of edges for the + // block BB2. + // ... + // | + // V + // BB1<-+ + // | | + // | | (Weight = 128) + // V | + // BB2--+ + // | + // | (Weight = 4) + // V + // BB3 + // + // Probability of the edge BB2->BB1 = 128 / (128 + 4) = 0.9696.. + // Probability of the edge BB2->BB3 = 4 / (128 + 4) = 0.0303.. + + static const uint32_t LBH_TAKEN_WEIGHT = 128; + static const uint32_t LBH_NONTAKEN_WEIGHT = 4; + + // Standard weight value. Used when none of the heuristics set weight for + // the edge. + static const uint32_t NORMAL_WEIGHT = 16; + + // Minimum weight of an edge. Please note, that weight is NEVER 0. + static const uint32_t MIN_WEIGHT = 1; + + // Return TRUE if BB leads directly to a Return Instruction. + static bool isReturningBlock(BasicBlock *BB) { + SmallPtrSet<BasicBlock *, 8> Visited; + + while (true) { + TerminatorInst *TI = BB->getTerminator(); + if (isa<ReturnInst>(TI)) + return true; + + if (TI->getNumSuccessors() > 1) + break; + + // It is unreachable block which we can consider as a return instruction. + if (TI->getNumSuccessors() == 0) + return true; + + Visited.insert(BB); + BB = TI->getSuccessor(0); + + // Stop if cycle is detected. + if (Visited.count(BB)) + return false; + } + + return false; + } + + // Multiply Edge Weight by two. + void incEdgeWeight(BasicBlock *Src, BasicBlock *Dst) { + uint32_t Weight = BP->getEdgeWeight(Src, Dst); + uint32_t MaxWeight = getMaxWeightFor(Src); + + if (Weight * 2 > MaxWeight) + BP->setEdgeWeight(Src, Dst, MaxWeight); + else + BP->setEdgeWeight(Src, Dst, Weight * 2); + } + + // Divide Edge Weight by two. + void decEdgeWeight(BasicBlock *Src, BasicBlock *Dst) { + uint32_t Weight = BP->getEdgeWeight(Src, Dst); + + assert(Weight > 0); + if (Weight / 2 < MIN_WEIGHT) + BP->setEdgeWeight(Src, Dst, MIN_WEIGHT); + else + BP->setEdgeWeight(Src, Dst, Weight / 2); + } + + + uint32_t getMaxWeightFor(BasicBlock *BB) const { + return UINT32_MAX / BB->getTerminator()->getNumSuccessors(); + } + +public: + BranchProbabilityAnalysis(DenseMap<Edge, uint32_t> *W, + BranchProbabilityInfo *BP, LoopInfo *LI) + : Weights(W), BP(BP), LI(LI) { + } + + // Return Heuristics + void calcReturnHeuristics(BasicBlock *BB); + + // Pointer Heuristics + void calcPointerHeuristics(BasicBlock *BB); + + // Loop Branch Heuristics + void calcLoopBranchHeuristics(BasicBlock *BB); + + bool runOnFunction(Function &F); +}; +} // end anonymous namespace + +// Calculate Edge Weights using "Return Heuristics". Predict a successor which +// leads directly to Return Instruction will not be taken. +void BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){ + if (BB->getTerminator()->getNumSuccessors() == 1) + return; + + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + BasicBlock *Succ = *I; + if (isReturningBlock(Succ)) { + decEdgeWeight(BB, Succ); + } + } +} + +// Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion +// between two pointer or pointer and NULL will fail. +void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) { + BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BI || !BI->isConditional()) + return; + + Value *Cond = BI->getCondition(); + ICmpInst *CI = dyn_cast<ICmpInst>(Cond); + if (!CI) + return; + + Value *LHS = CI->getOperand(0); + + if (!LHS->getType()->isPointerTy()) + return; + + assert(CI->getOperand(1)->getType()->isPointerTy()); + + BasicBlock *Taken = BI->getSuccessor(0); + BasicBlock *NonTaken = BI->getSuccessor(1); + + // p != 0 -> isProb = true + // p == 0 -> isProb = false + // p != q -> isProb = true + // p == q -> isProb = false; + bool isProb = !CI->isEquality(); + if (!isProb) + std::swap(Taken, NonTaken); + + incEdgeWeight(BB, Taken); + decEdgeWeight(BB, NonTaken); +} + +// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges +// as taken, exiting edges as not-taken. +void BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { + uint32_t numSuccs = BB->getTerminator()->getNumSuccessors(); + + Loop *L = LI->getLoopFor(BB); + if (!L) + return; + + SmallVector<BasicBlock *, 8> BackEdges; + SmallVector<BasicBlock *, 8> ExitingEdges; + + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + BasicBlock *Succ = *I; + Loop *SuccL = LI->getLoopFor(Succ); + if (SuccL != L) + ExitingEdges.push_back(Succ); + else if (Succ == L->getHeader()) + BackEdges.push_back(Succ); + } + + if (uint32_t numBackEdges = BackEdges.size()) { + uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges; + if (backWeight < NORMAL_WEIGHT) + backWeight = NORMAL_WEIGHT; + + for (SmallVector<BasicBlock *, 8>::iterator EI = BackEdges.begin(), + EE = BackEdges.end(); EI != EE; ++EI) { + BasicBlock *Back = *EI; + BP->setEdgeWeight(BB, Back, backWeight); + } + } + + uint32_t numExitingEdges = ExitingEdges.size(); + if (uint32_t numNonExitingEdges = numSuccs - numExitingEdges) { + uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numNonExitingEdges; + if (exitWeight < MIN_WEIGHT) + exitWeight = MIN_WEIGHT; + + for (SmallVector<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(), + EE = ExitingEdges.end(); EI != EE; ++EI) { + BasicBlock *Exiting = *EI; + BP->setEdgeWeight(BB, Exiting, exitWeight); + } + } +} + +bool BranchProbabilityAnalysis::runOnFunction(Function &F) { + + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + BasicBlock *BB = I++; + + // Only LBH uses setEdgeWeight method. + calcLoopBranchHeuristics(BB); + + // PH and RH use only incEdgeWeight and decEwdgeWeight methods to + // not efface LBH results. + calcPointerHeuristics(BB); + calcReturnHeuristics(BB); + } + + return false; +} + + +bool BranchProbabilityInfo::runOnFunction(Function &F) { + LoopInfo &LI = getAnalysis<LoopInfo>(); + BranchProbabilityAnalysis BPA(&Weights, this, &LI); + return BPA.runOnFunction(F); +} + +uint32_t BranchProbabilityInfo::getSumForBlock(BasicBlock *BB) const { + uint32_t Sum = 0; + + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + BasicBlock *Succ = *I; + uint32_t Weight = getEdgeWeight(BB, Succ); + uint32_t PrevSum = Sum; + + Sum += Weight; + assert(Sum > PrevSum); (void) PrevSum; + } + + return Sum; +} + +bool BranchProbabilityInfo::isEdgeHot(BasicBlock *Src, BasicBlock *Dst) const { + // Hot probability is at least 4/5 = 80% + uint32_t Weight = getEdgeWeight(Src, Dst); + uint32_t Sum = getSumForBlock(Src); + + // FIXME: Implement BranchProbability::compare then change this code to + // compare this BranchProbability against a static "hot" BranchProbability. + return (uint64_t)Weight * 5 > (uint64_t)Sum * 4; +} + +BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { + uint32_t Sum = 0; + uint32_t MaxWeight = 0; + BasicBlock *MaxSucc = 0; + + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + BasicBlock *Succ = *I; + uint32_t Weight = getEdgeWeight(BB, Succ); + uint32_t PrevSum = Sum; + + Sum += Weight; + assert(Sum > PrevSum); (void) PrevSum; + + if (Weight > MaxWeight) { + MaxWeight = Weight; + MaxSucc = Succ; + } + } + + // FIXME: Use BranchProbability::compare. + if ((uint64_t)MaxWeight * 5 > (uint64_t)Sum * 4) + return MaxSucc; + + return 0; +} + +// Return edge's weight. If can't find it, return DEFAULT_WEIGHT value. +uint32_t +BranchProbabilityInfo::getEdgeWeight(BasicBlock *Src, BasicBlock *Dst) const { + Edge E(Src, Dst); + DenseMap<Edge, uint32_t>::const_iterator I = Weights.find(E); + + if (I != Weights.end()) + return I->second; + + return DEFAULT_WEIGHT; +} + +void BranchProbabilityInfo::setEdgeWeight(BasicBlock *Src, BasicBlock *Dst, + uint32_t Weight) { + Weights[std::make_pair(Src, Dst)] = Weight; + DEBUG(dbgs() << "set edge " << Src->getNameStr() << " -> " + << Dst->getNameStr() << " weight to " << Weight + << (isEdgeHot(Src, Dst) ? " [is HOT now]\n" : "\n")); +} + + +BranchProbability BranchProbabilityInfo:: +getEdgeProbability(BasicBlock *Src, BasicBlock *Dst) const { + + uint32_t N = getEdgeWeight(Src, Dst); + uint32_t D = getSumForBlock(Src); + + return BranchProbability(N, D); +} + +raw_ostream & +BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, BasicBlock *Src, + BasicBlock *Dst) const { + + const BranchProbability Prob = getEdgeProbability(Src, Dst); + OS << "edge " << Src->getNameStr() << " -> " << Dst->getNameStr() + << " probability is " << Prob + << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n"); + + return OS; +} diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 6be5617..1a975bf 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMAnalysis AliasSetTracker.cpp Analysis.cpp BasicAliasAnalysis.cpp + BranchProbabilityInfo.cpp CFGPrinter.cpp CaptureTracking.cpp ConstantFolding.cpp diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index 42a54d9..b2c27d1 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -17,6 +17,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Value.h" #include "llvm/Analysis/AliasAnalysis.h" diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index c6aad0c..08a6065 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -23,6 +23,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" +#include "llvm/Operator.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallVector.h" @@ -1084,7 +1085,7 @@ llvm::canConstantFoldCallTo(const Function *F) { case 'c': return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh"; case 'e': - return Name == "exp"; + return Name == "exp" || Name == "exp2"; case 'f': return Name == "fabs" || Name == "fmod" || Name == "floor"; case 'l': @@ -1220,6 +1221,12 @@ llvm::ConstantFoldCall(Function *F, case 'e': if (Name == "exp") return ConstantFoldFP(exp, V, Ty); + + if (Name == "exp2") { + // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a + // C99 library. + return ConstantFoldBinaryFP(pow, 2.0, V, Ty); + } break; case 'f': if (Name == "fabs") diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp index 108d2d2..ef5d03a 100644 --- a/lib/Analysis/DIBuilder.cpp +++ b/lib/Analysis/DIBuilder.cpp @@ -50,7 +50,11 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, MDString::get(VMContext, Flags), ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer) }; - TheCU = DICompileUnit(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + TheCU = DICompileUnit(MDNode::get(VMContext, Elts)); + + // Create a named metadata so that it is easier to find cu in a module. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu"); + NMD->addOperand(TheCU); } /// createFile - Create a file descriptor to hold debugging information @@ -63,7 +67,7 @@ DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) { MDString::get(VMContext, Directory), TheCU }; - return DIFile(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIFile(MDNode::get(VMContext, Elts)); } /// createEnumerator - Create a single enumerator value. @@ -73,7 +77,7 @@ DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) { MDString::get(VMContext, Name), ConstantInt::get(Type::getInt64Ty(VMContext), Val) }; - return DIEnumerator(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIEnumerator(MDNode::get(VMContext, Elts)); } /// createBasicType - Create debugging information entry for a basic @@ -95,7 +99,7 @@ DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags; ConstantInt::get(Type::getInt32Ty(VMContext), Encoding) }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createQaulifiedType - Create debugging information entry for a qualified @@ -114,7 +118,7 @@ DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags FromTy }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createPointerType - Create debugging information entry for a pointer. @@ -133,7 +137,7 @@ DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags PointeeTy }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createReferenceType - Create debugging information entry for a reference. @@ -151,17 +155,17 @@ DIType DIBuilder::createReferenceType(DIType RTy) { ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags RTy }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createTypedef - Create debugging information entry for a typedef. DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, - unsigned LineNo) { + unsigned LineNo, DIDescriptor Context) { // typedefs are encoded in DIDerivedType format. assert(Ty.Verify() && "Invalid typedef type!"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_typedef), - Ty.getContext(), + Context, MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), @@ -171,7 +175,7 @@ DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags Ty }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createFriend - Create debugging information entry for a 'friend'. @@ -191,7 +195,7 @@ DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) { ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags FriendTy }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createInheritance - Create debugging information entry to establish @@ -211,7 +215,7 @@ DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, ConstantInt::get(Type::getInt32Ty(VMContext), Flags), BaseTy }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createMemberType - Create debugging information entry for a member. @@ -233,7 +237,36 @@ DIType DIBuilder::createMemberType(StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), Flags), Ty }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createObjCIVar - Create debugging information entry for Objective-C +/// instance variable. +DIType DIBuilder::createObjCIVar(StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + DIType Ty, StringRef PropertyName, + StringRef GetterName, StringRef SetterName, + unsigned PropertyAttributes) { + // TAG_member is encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_member), + File, // Or TheCU ? Ty ? + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + Ty, + MDString::get(VMContext, PropertyName), + MDString::get(VMContext, GetterName), + MDString::get(VMContext, SetterName), + ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes) + }; + return DIType(MDNode::get(VMContext, Elts)); } /// createClassType - Create debugging information entry for a class. @@ -260,7 +293,7 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, VTableHoder, TemplateParams }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createTemplateTypeParameter - Create debugging information for template @@ -278,8 +311,7 @@ DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo) }; - return DITemplateTypeParameter(MDNode::get(VMContext, &Elts[0], - array_lengthof(Elts))); + return DITemplateTypeParameter(MDNode::get(VMContext, Elts)); } /// createTemplateValueParameter - Create debugging information for template @@ -299,8 +331,7 @@ DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo) }; - return DITemplateValueParameter(MDNode::get(VMContext, &Elts[0], - array_lengthof(Elts))); + return DITemplateValueParameter(MDNode::get(VMContext, Elts)); } /// createStructType - Create debugging information entry for a struct. @@ -325,7 +356,7 @@ DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createUnionType - Create debugging information entry for an union. @@ -350,7 +381,7 @@ DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createSubroutineType - Create subroutine type. @@ -371,7 +402,7 @@ DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { ConstantInt::get(Type::getInt32Ty(VMContext), 0), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createEnumerationType - Create debugging information entry for an @@ -396,7 +427,7 @@ DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), 0), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + MDNode *Node = MDNode::get(VMContext, Elts); NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum"); NMD->addOperand(Node); return DIType(Node); @@ -421,7 +452,7 @@ DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, ConstantInt::get(Type::getInt32Ty(VMContext), 0), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createVectorType - Create debugging information entry for a vector. @@ -443,7 +474,7 @@ DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, ConstantInt::get(Type::getInt32Ty(VMContext), 0), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createArtificialType - Create a new DIType with "artificial" flag set. @@ -467,7 +498,7 @@ DIType DIBuilder::createArtificialType(DIType Ty) { // Flags are stored at this slot. Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags); - return DIType(MDNode::get(VMContext, Elts.data(), Elts.size())); + return DIType(MDNode::get(VMContext, Elts)); } /// retainType - Retain DIType in a module even if it is not referenced @@ -483,7 +514,7 @@ DIDescriptor DIBuilder::createUnspecifiedParameter() { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters) }; - return DIDescriptor(MDNode::get(VMContext, &Elts[0], 1)); + return DIDescriptor(MDNode::get(VMContext, Elts)); } /// createTemporaryType - Create a temporary forward-declared type. @@ -491,7 +522,7 @@ DIType DIBuilder::createTemporaryType() { // Give the temporary MDNode a tag. It doesn't matter what tag we // use here as long as DIType accepts it. Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; - MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts)); + MDNode *Node = MDNode::getTemporary(VMContext, Elts); return DIType(Node); } @@ -505,17 +536,17 @@ DIType DIBuilder::createTemporaryType(DIFile F) { NULL, F }; - MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts)); + MDNode *Node = MDNode::getTemporary(VMContext, Elts); return DIType(Node); } /// getOrCreateArray - Get a DIArray, create one if required. -DIArray DIBuilder::getOrCreateArray(Value *const *Elements, unsigned NumElements) { - if (NumElements == 0) { +DIArray DIBuilder::getOrCreateArray(ArrayRef<Value *> Elements) { + if (Elements.empty()) { Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)); - return DIArray(MDNode::get(VMContext, &Null, 1)); + return DIArray(MDNode::get(VMContext, Null)); } - return DIArray(MDNode::get(VMContext, Elements, NumElements)); + return DIArray(MDNode::get(VMContext, Elements)); } /// getOrCreateSubrange - Create a descriptor for a value range. This @@ -527,7 +558,7 @@ DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) { ConstantInt::get(Type::getInt64Ty(VMContext), Hi) }; - return DISubrange(MDNode::get(VMContext, &Elts[0], 3)); + return DISubrange(MDNode::get(VMContext, Elts)); } /// createGlobalVariable - Create a new descriptor for the specified global. @@ -548,7 +579,7 @@ createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/ Val }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + MDNode *Node = MDNode::get(VMContext, Elts); // Create a named metadata so that we do not lose this mdnode. NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); NMD->addOperand(Node); @@ -575,7 +606,7 @@ createStaticVariable(DIDescriptor Context, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/ Val }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + MDNode *Node = MDNode::get(VMContext, Elts); // Create a named metadata so that we do not lose this mdnode. NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); NMD->addOperand(Node); @@ -597,7 +628,7 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, Ty, ConstantInt::get(Type::getInt32Ty(VMContext), Flags) }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + MDNode *Node = MDNode::get(VMContext, Elts); if (AlwaysPreserve) { // The optimizer may remove local variable. If there is an interest // to preserve variable info in such situation then stash it in a @@ -620,8 +651,8 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope, StringRef Name, DIFile F, unsigned LineNo, - DIType Ty, Value *const *Addr, - unsigned NumAddr, unsigned ArgNo) { + DIType Ty, ArrayRef<Value *> Addr, + unsigned ArgNo) { SmallVector<Value *, 15> Elts; Elts.push_back(GetTagConstant(VMContext, Tag)); Elts.push_back(Scope); @@ -629,9 +660,10 @@ DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope, Elts.push_back(F); Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24)))); Elts.push_back(Ty); - Elts.append(Addr, Addr+NumAddr); + Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))); + Elts.append(Addr.begin(), Addr.end()); - return DIVariable(MDNode::get(VMContext, Elts.data(), Elts.size())); + return DIVariable(MDNode::get(VMContext, Elts)); } /// createFunction - Create a new descriptor for the specified function. @@ -643,7 +675,8 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, bool isLocalToUnit, bool isDefinition, unsigned Flags, bool isOptimized, Function *Fn, - MDNode *TParams) { + MDNode *TParams, + MDNode *Decl) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -662,9 +695,10 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, ConstantInt::get(Type::getInt32Ty(VMContext), Flags), ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), Fn, - TParams + TParams, + Decl }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + MDNode *Node = MDNode::get(VMContext, Elts); // Create a named metadata so that we do not lose this mdnode. NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); @@ -706,7 +740,7 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, Fn, TParam, }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + MDNode *Node = MDNode::get(VMContext, Elts); // Create a named metadata so that we do not lose this mdnode. NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); @@ -725,7 +759,7 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name, File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) }; - return DINameSpace(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DINameSpace(MDNode::get(VMContext, Elts)); } DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, @@ -740,7 +774,7 @@ DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, File, ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++) }; - return DILexicalBlock(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DILexicalBlock(MDNode::get(VMContext, Elts)); } /// insertDeclare - Insert a new llvm.dbg.declare intrinsic call. @@ -751,7 +785,7 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, if (!DeclareFn) DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); - Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), VarInfo }; + Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo }; return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore); } @@ -763,7 +797,7 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, if (!DeclareFn) DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); - Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), VarInfo }; + Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo }; // If this block already has a terminator then insert this intrinsic // before the terminator. @@ -782,7 +816,7 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, if (!ValueFn) ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); - Value *Args[] = { MDNode::get(V->getContext(), &V, 1), + Value *Args[] = { MDNode::get(V->getContext(), V), ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), VarInfo }; return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore); @@ -797,7 +831,7 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, if (!ValueFn) ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); - Value *Args[] = { MDNode::get(V->getContext(), &V, 1), + Value *Args[] = { MDNode::get(V->getContext(), V), ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), VarInfo }; return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd); diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index 690c4b4..2e79eab 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -148,7 +148,7 @@ private: for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { CallSite CS(cast<Value>(II)); - if (CS && !isa<DbgInfoIntrinsic>(II)) { + if (CS && !isa<IntrinsicInst>(II)) { const Function *Callee = CS.getCalledFunction(); if (Callee) Node->addCalledFunction(CS, getOrInsertFunction(Callee)); diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index 725ab72..659ffab 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -245,8 +245,8 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - CallSite CS(cast<Value>(I)); - if (!CS || isa<DbgInfoIntrinsic>(I)) continue; + CallSite CS(cast<Value>(I)); + if (!CS || isa<IntrinsicInst>(I)) continue; // If this call site already existed in the callgraph, just verify it // matches up to expectations and remove it from CallSites. diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp index 06ae34c..dde2556 100644 --- a/lib/Analysis/IPA/FindUsedTypes.cpp +++ b/lib/Analysis/IPA/FindUsedTypes.cpp @@ -32,7 +32,7 @@ INITIALIZE_PASS(FindUsedTypes, "print-used-types", void FindUsedTypes::IncorporateType(const Type *Ty) { // If ty doesn't already exist in the used types map, add it now, otherwise // return. - if (!UsedTypes.insert(Ty).second) return; // Already contain Ty. + if (!UsedTypes.insert(Ty)) return; // Already contain Ty. // Make sure to add any types this type references now. // @@ -94,7 +94,7 @@ bool FindUsedTypes::runOnModule(Module &m) { // void FindUsedTypes::print(raw_ostream &OS, const Module *M) const { OS << "Types in use by this module:\n"; - for (std::set<const Type *>::const_iterator I = UsedTypes.begin(), + for (SetVector<const Type *>::const_iterator I = UsedTypes.begin(), E = UsedTypes.end(); I != E; ++I) { OS << " "; WriteTypeSymbolic(OS, *I, M); diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index 116aaf4..b226d66 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -602,7 +602,7 @@ void GlobalsModRef::addEscapingUse(Use &U) { // For the purposes of this analysis, it is conservatively correct to treat // a newly escaping value equivalently to a deleted one. We could perhaps // be more precise by processing the new use and attempting to update our - // saved analysis results to accomodate it. + // saved analysis results to accommodate it. deleteValue(U); AliasAnalysis::addEscapingUse(U); diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 2cda791..a0c42f0 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetData.h" #include "llvm/Assembly/Writer.h" #include "llvm/ADT/STLExtras.h" @@ -38,6 +39,15 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_END(IVUsers, "iv-users", "Induction Variable Users", false, true) +// IVUsers behavior currently depends on this temporary indvars mode. The +// option must be defined upstream from its uses. +namespace llvm { + bool DisableIVRewrite = false; +} +cl::opt<bool, true> DisableIVRewriteOpt( + "disable-iv-rewrite", cl::Hidden, cl::location(llvm::DisableIVRewrite), + cl::desc("Disable canonical induction variable rewriting")); + Pass *llvm::createIVUsersPass() { return new IVUsers(); } @@ -79,7 +89,7 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, /// AddUsersIfInteresting - Inspect the specified instruction. If it is a /// reducible SCEV, recursively add its users to the IVUsesByStride set and /// return true. Otherwise, return false. -bool IVUsers::AddUsersIfInteresting(Instruction *I) { +bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) { if (!SE->isSCEVable(I->getType())) return false; // Void and FP expressions cannot be reduced. @@ -90,6 +100,11 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { if (Width > 64 || (TD && !TD->isLegalInteger(Width))) return false; + // We expect Sign/Zero extension to be eliminated from the IR before analyzing + // any downstream uses. + if (DisableIVRewrite && (isa<SExtInst>(I) || isa<ZExtInst>(I))) + return false; + if (!Processed.insert(I)) return true; // Instruction already handled. @@ -121,13 +136,13 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { bool AddUserToIVUsers = false; if (LI->getLoopFor(User->getParent()) != L) { if (isa<PHINode>(User) || Processed.count(User) || - !AddUsersIfInteresting(User)) { + !AddUsersIfInteresting(User, Phi)) { DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n' << " OF SCEV: " << *ISE << '\n'); AddUserToIVUsers = true; } } else if (Processed.count(User) || - !AddUsersIfInteresting(User)) { + !AddUsersIfInteresting(User, Phi)) { DEBUG(dbgs() << "FOUND USER: " << *User << '\n' << " OF SCEV: " << *ISE << '\n'); AddUserToIVUsers = true; @@ -135,9 +150,11 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { if (AddUserToIVUsers) { // Okay, we found a user that we cannot reduce. - IVUses.push_back(new IVStrideUse(this, User, I)); + IVUses.push_back(new IVStrideUse(this, User, I, Phi)); IVStrideUse &NewUse = IVUses.back(); - // Transform the expression into a normalized form. + // Autodetect the post-inc loop set, populating NewUse.PostIncLoops. + // The regular return value here is discarded; instead of recording + // it, we just recompute it when we need it. ISE = TransformForPostIncUse(NormalizeAutodetect, ISE, User, I, NewUse.PostIncLoops, @@ -148,8 +165,8 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { return true; } -IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) { - IVUses.push_back(new IVStrideUse(this, User, Operand)); +IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand, PHINode *Phi) { + IVUses.push_back(new IVStrideUse(this, User, Operand, Phi)); return IVUses.back(); } @@ -177,7 +194,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { // them by stride. Start by finding all of the PHI nodes in the header for // this loop. If they are induction variables, inspect their uses. for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) - (void)AddUsersIfInteresting(I); + (void)AddUsersIfInteresting(I, cast<PHINode>(I)); return false; } diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 47f91cf..efde598 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -66,21 +66,13 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { ImmutableCallSite CS(cast<Instruction>(II)); - // If this function contains a call to setjmp or _setjmp, never inline - // it. This is a hack because we depend on the user marking their local - // variables as volatile if they are live across a setjmp call, and they - // probably won't do this in callers. if (const Function *F = CS.getCalledFunction()) { // If a function is both internal and has a single use, then it is // extremely likely to get inlined in the future (it was probably // exposed by an interleaved devirtualization pass). if (F->hasInternalLinkage() && F->hasOneUse()) ++NumInlineCandidates; - - if (F->isDeclaration() && - (F->getName() == "setjmp" || F->getName() == "_setjmp")) - callsSetJmp = true; - + // If this call is to function itself, then the function is recursive. // Inlining it into other functions is a bad idea, because this is // basically just a form of loop peeling, and our metrics aren't useful @@ -226,6 +218,13 @@ unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) { /// analyzeFunction - Fill in the current structure with information gleaned /// from the specified function. void CodeMetrics::analyzeFunction(Function *F) { + // If this function contains a call to setjmp or _setjmp, never inline + // it. This is a hack because we depend on the user marking their local + // variables as volatile if they are live across a setjmp call, and they + // probably won't do this in callers. + if (F->callsFunctionThatReturnsTwice()) + callsSetJmp = true; + // Look at the size of the callee. for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) analyzeBasicBlock(&*BB); @@ -501,7 +500,7 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, return InlineCost::getAlways(); if (CalleeFI->Metrics.usesDynamicAlloca) { - // Get infomation about the caller. + // Get information about the caller. FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; // If we haven't calculated this information yet, do so now. @@ -549,7 +548,7 @@ InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee, int Cost = 0; - // Look at the orginal size of the callee. Each instruction counts as 5. + // Look at the original size of the callee. Each instruction counts as 5. Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost; // Offset that with the amount of code that can be constant-folded @@ -594,7 +593,7 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) { CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics; // For small functions we prefer to recalculate the cost for better accuracy. - if (CallerMetrics.NumBlocks < 10 || CallerMetrics.NumInsts < 1000) { + if (CallerMetrics.NumBlocks < 10 && CallerMetrics.NumInsts < 1000) { resetCachedCostInfo(Caller); return; } diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 9dd5f05..9d78f8b 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -18,6 +18,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "instsimplify" +#include "llvm/Operator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ConstantFolding.h" @@ -900,6 +901,109 @@ Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD, return ::SimplifyFDivInst(Op0, Op1, TD, DT, RecursionLimit); } +/// SimplifyRem - Given operands for an SRem or URem, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + if (Constant *C0 = dyn_cast<Constant>(Op0)) { + if (Constant *C1 = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { C0, C1 }; + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD); + } + } + + // X % undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + // undef % X -> 0 + if (match(Op0, m_Undef())) + return Constant::getNullValue(Op0->getType()); + + // 0 % X -> 0, we don't need to preserve faults! + if (match(Op0, m_Zero())) + return Op0; + + // X % 0 -> undef, we don't need to preserve faults! + if (match(Op1, m_Zero())) + return UndefValue::get(Op0->getType()); + + // X % 1 -> 0 + if (match(Op1, m_One())) + return Constant::getNullValue(Op0->getType()); + + if (Op0->getType()->isIntegerTy(1)) + // It can't be remainder by zero, hence it must be remainder by one. + return Constant::getNullValue(Op0->getType()); + + // X % X -> 0 + if (Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +/// SimplifySRemInst - Given operands for an SRem, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifySRemInst(Op0, Op1, TD, DT, RecursionLimit); +} + +/// SimplifyURemInst - Given operands for a URem, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifyURemInst(Op0, Op1, TD, DT, RecursionLimit); +} + +static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *, + const DominatorTree *, unsigned) { + // undef % X -> undef (the undef could be a snan). + if (match(Op0, m_Undef())) + return Op0; + + // X % undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + return 0; +} + +Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifyFRemInst(Op0, Op1, TD, DT, RecursionLimit); +} + /// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can /// fold the result. If not, this returns null. static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, @@ -1272,6 +1376,26 @@ static const Type *GetCompareTy(Value *Op) { return CmpInst::makeCmpResultType(Op->getType()); } +/// ExtractEquivalentCondition - Rummage around inside V looking for something +/// equivalent to the comparison "LHS Pred RHS". Return such a value if found, +/// otherwise return null. Helper function for analyzing max/min idioms. +static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, + Value *LHS, Value *RHS) { + SelectInst *SI = dyn_cast<SelectInst>(V); + if (!SI) + return 0; + CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); + if (!Cmp) + return 0; + Value *CmpLHS = Cmp->getOperand(0), *CmpRHS = Cmp->getOperand(1); + if (Pred == Cmp->getPredicate() && LHS == CmpLHS && RHS == CmpRHS) + return Cmp; + if (Pred == CmpInst::getSwappedPredicate(Cmp->getPredicate()) && + LHS == CmpRHS && RHS == CmpLHS) + return Cmp; + return 0; +} + /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can /// fold the result. If not, this returns null. static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, @@ -1354,46 +1478,48 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, default: assert(false && "Unknown ICmp predicate!"); case ICmpInst::ICMP_ULT: - return ConstantInt::getFalse(LHS->getContext()); + // getNullValue also works for vectors, unlike getFalse. + return Constant::getNullValue(ITy); case ICmpInst::ICMP_UGE: - return ConstantInt::getTrue(LHS->getContext()); + // getAllOnesValue also works for vectors, unlike getTrue. + return ConstantInt::getAllOnesValue(ITy); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE: if (isKnownNonZero(LHS, TD)) - return ConstantInt::getFalse(LHS->getContext()); + return Constant::getNullValue(ITy); break; case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: if (isKnownNonZero(LHS, TD)) - return ConstantInt::getTrue(LHS->getContext()); + return ConstantInt::getAllOnesValue(ITy); break; case ICmpInst::ICMP_SLT: ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); if (LHSKnownNegative) - return ConstantInt::getTrue(LHS->getContext()); + return ConstantInt::getAllOnesValue(ITy); if (LHSKnownNonNegative) - return ConstantInt::getFalse(LHS->getContext()); + return Constant::getNullValue(ITy); break; case ICmpInst::ICMP_SLE: ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); if (LHSKnownNegative) - return ConstantInt::getTrue(LHS->getContext()); + return ConstantInt::getAllOnesValue(ITy); if (LHSKnownNonNegative && isKnownNonZero(LHS, TD)) - return ConstantInt::getFalse(LHS->getContext()); + return Constant::getNullValue(ITy); break; case ICmpInst::ICMP_SGE: ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); if (LHSKnownNegative) - return ConstantInt::getFalse(LHS->getContext()); + return Constant::getNullValue(ITy); if (LHSKnownNonNegative) - return ConstantInt::getTrue(LHS->getContext()); + return ConstantInt::getAllOnesValue(ITy); break; case ICmpInst::ICMP_SGT: ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); if (LHSKnownNegative) - return ConstantInt::getFalse(LHS->getContext()); + return Constant::getNullValue(ITy); if (LHSKnownNonNegative && isKnownNonZero(LHS, TD)) - return ConstantInt::getTrue(LHS->getContext()); + return ConstantInt::getAllOnesValue(ITy); break; } } @@ -1685,7 +1811,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: - return ConstantInt::getFalse(RHS->getContext()); + // getNullValue also works for vectors, unlike getFalse. + return Constant::getNullValue(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD); @@ -1695,7 +1822,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, case ICmpInst::ICMP_NE: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: - return ConstantInt::getTrue(RHS->getContext()); + // getAllOnesValue also works for vectors, unlike getTrue. + return Constant::getAllOnesValue(ITy); } } if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) { @@ -1712,7 +1840,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: - return ConstantInt::getTrue(RHS->getContext()); + // getAllOnesValue also works for vectors, unlike getTrue. + return Constant::getAllOnesValue(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD); @@ -1722,7 +1851,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: - return ConstantInt::getFalse(RHS->getContext()); + // getNullValue also works for vectors, unlike getFalse. + return Constant::getNullValue(ITy); } } @@ -1737,7 +1867,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // fall-through case Instruction::SDiv: case Instruction::AShr: - if (!LBO->isExact() && !RBO->isExact()) + if (!LBO->isExact() || !RBO->isExact()) break; if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), RBO->getOperand(0), TD, DT, MaxRecurse-1)) @@ -1758,6 +1888,194 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + // Simplify comparisons involving max/min. + Value *A, *B; + CmpInst::Predicate P = CmpInst::BAD_ICMP_PREDICATE; + CmpInst::Predicate EqP; // Chosen so that "A == max/min(A,B)" iff "A EqP B". + + // Signed variants on "max(a,b)>=a -> true". + if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) { + if (A != RHS) std::swap(A, B); // smax(A, B) pred A. + EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B". + // We analyze this as smax(A, B) pred A. + P = Pred; + } else if (match(RHS, m_SMax(m_Value(A), m_Value(B))) && + (A == LHS || B == LHS)) { + if (A != LHS) std::swap(A, B); // A pred smax(A, B). + EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B". + // We analyze this as smax(A, B) swapped-pred A. + P = CmpInst::getSwappedPredicate(Pred); + } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) && + (A == RHS || B == RHS)) { + if (A != RHS) std::swap(A, B); // smin(A, B) pred A. + EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B". + // We analyze this as smax(-A, -B) swapped-pred -A. + // Note that we do not need to actually form -A or -B thanks to EqP. + P = CmpInst::getSwappedPredicate(Pred); + } else if (match(RHS, m_SMin(m_Value(A), m_Value(B))) && + (A == LHS || B == LHS)) { + if (A != LHS) std::swap(A, B); // A pred smin(A, B). + EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B". + // We analyze this as smax(-A, -B) pred -A. + // Note that we do not need to actually form -A or -B thanks to EqP. + P = Pred; + } + if (P != CmpInst::BAD_ICMP_PREDICATE) { + // Cases correspond to "max(A, B) p A". + switch (P) { + default: + break; + case CmpInst::ICMP_EQ: + case CmpInst::ICMP_SLE: + // Equivalent to "A EqP B". This may be the same as the condition tested + // in the max/min; if so, we can just return that. + if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B)) + return V; + if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B)) + return V; + // Otherwise, see if "A EqP B" simplifies. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1)) + return V; + break; + case CmpInst::ICMP_NE: + case CmpInst::ICMP_SGT: { + CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP); + // Equivalent to "A InvEqP B". This may be the same as the condition + // tested in the max/min; if so, we can just return that. + if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B)) + return V; + if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B)) + return V; + // Otherwise, see if "A InvEqP B" simplifies. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1)) + return V; + break; + } + case CmpInst::ICMP_SGE: + // Always true. + return Constant::getAllOnesValue(ITy); + case CmpInst::ICMP_SLT: + // Always false. + return Constant::getNullValue(ITy); + } + } + + // Unsigned variants on "max(a,b)>=a -> true". + P = CmpInst::BAD_ICMP_PREDICATE; + if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) { + if (A != RHS) std::swap(A, B); // umax(A, B) pred A. + EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B". + // We analyze this as umax(A, B) pred A. + P = Pred; + } else if (match(RHS, m_UMax(m_Value(A), m_Value(B))) && + (A == LHS || B == LHS)) { + if (A != LHS) std::swap(A, B); // A pred umax(A, B). + EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B". + // We analyze this as umax(A, B) swapped-pred A. + P = CmpInst::getSwappedPredicate(Pred); + } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) && + (A == RHS || B == RHS)) { + if (A != RHS) std::swap(A, B); // umin(A, B) pred A. + EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B". + // We analyze this as umax(-A, -B) swapped-pred -A. + // Note that we do not need to actually form -A or -B thanks to EqP. + P = CmpInst::getSwappedPredicate(Pred); + } else if (match(RHS, m_UMin(m_Value(A), m_Value(B))) && + (A == LHS || B == LHS)) { + if (A != LHS) std::swap(A, B); // A pred umin(A, B). + EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B". + // We analyze this as umax(-A, -B) pred -A. + // Note that we do not need to actually form -A or -B thanks to EqP. + P = Pred; + } + if (P != CmpInst::BAD_ICMP_PREDICATE) { + // Cases correspond to "max(A, B) p A". + switch (P) { + default: + break; + case CmpInst::ICMP_EQ: + case CmpInst::ICMP_ULE: + // Equivalent to "A EqP B". This may be the same as the condition tested + // in the max/min; if so, we can just return that. + if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B)) + return V; + if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B)) + return V; + // Otherwise, see if "A EqP B" simplifies. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1)) + return V; + break; + case CmpInst::ICMP_NE: + case CmpInst::ICMP_UGT: { + CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP); + // Equivalent to "A InvEqP B". This may be the same as the condition + // tested in the max/min; if so, we can just return that. + if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B)) + return V; + if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B)) + return V; + // Otherwise, see if "A InvEqP B" simplifies. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1)) + return V; + break; + } + case CmpInst::ICMP_UGE: + // Always true. + return Constant::getAllOnesValue(ITy); + case CmpInst::ICMP_ULT: + // Always false. + return Constant::getNullValue(ITy); + } + } + + // Variants on "max(x,y) >= min(x,z)". + Value *C, *D; + if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && + match(RHS, m_SMin(m_Value(C), m_Value(D))) && + (A == C || A == D || B == C || B == D)) { + // max(x, ?) pred min(x, ?). + if (Pred == CmpInst::ICMP_SGE) + // Always true. + return Constant::getAllOnesValue(ITy); + if (Pred == CmpInst::ICMP_SLT) + // Always false. + return Constant::getNullValue(ITy); + } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) && + match(RHS, m_SMax(m_Value(C), m_Value(D))) && + (A == C || A == D || B == C || B == D)) { + // min(x, ?) pred max(x, ?). + if (Pred == CmpInst::ICMP_SLE) + // Always true. + return Constant::getAllOnesValue(ITy); + if (Pred == CmpInst::ICMP_SGT) + // Always false. + return Constant::getNullValue(ITy); + } else if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && + match(RHS, m_UMin(m_Value(C), m_Value(D))) && + (A == C || A == D || B == C || B == D)) { + // max(x, ?) pred min(x, ?). + if (Pred == CmpInst::ICMP_UGE) + // Always true. + return Constant::getAllOnesValue(ITy); + if (Pred == CmpInst::ICMP_ULT) + // Always false. + return Constant::getNullValue(ITy); + } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) && + match(RHS, m_UMax(m_Value(C), m_Value(D))) && + (A == C || A == D || B == C || B == D)) { + // min(x, ?) pred max(x, ?). + if (Pred == CmpInst::ICMP_ULE) + // Always true. + return Constant::getAllOnesValue(ITy); + if (Pred == CmpInst::ICMP_UGT) + // Always false. + return Constant::getNullValue(ITy); + } + // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) @@ -1993,6 +2311,9 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, DT, MaxRecurse); case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, DT, MaxRecurse); case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, DT, MaxRecurse); + case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, DT, MaxRecurse); + case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, DT, MaxRecurse); + case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, DT, MaxRecurse); case Instruction::Shl: return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, TD, DT, MaxRecurse); @@ -2087,6 +2408,15 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD, case Instruction::FDiv: Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, DT); break; + case Instruction::SRem: + Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::URem: + Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::FRem: + Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; case Instruction::Shl: Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->hasNoSignedWrap(), diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 9e7da6c..6e27597 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -29,7 +29,6 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include <map> -#include <set> #include <stack> using namespace llvm; @@ -268,6 +267,8 @@ public: } // end anonymous namespace. namespace llvm { +raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) + LLVM_ATTRIBUTE_USED; raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) { if (Val.isUndefined()) return OS << "undefined"; @@ -588,16 +589,18 @@ static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) { } if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { if (MI->isVolatile()) return false; - if (MI->getAddressSpace() != 0) return false; // FIXME: check whether it has a valuerange that excludes zero? ConstantInt *Len = dyn_cast<ConstantInt>(MI->getLength()); if (!Len || Len->isZero()) return false; - if (MI->getRawDest() == Ptr || MI->getDest() == Ptr) - return true; + if (MI->getDestAddressSpace() == 0) + if (MI->getRawDest() == Ptr || MI->getDest() == Ptr) + return true; if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) - return MTI->getRawSource() == Ptr || MTI->getSource() == Ptr; + if (MTI->getSourceAddressSpace() == 0) + if (MTI->getRawSource() == Ptr || MTI->getSource() == Ptr) + return true; } return false; } diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index fc7edc0..f130f30 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -606,7 +606,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, Type::getInt64Ty(V->getContext()))) return findValueImpl(CE->getOperand(0), OffsetOk, Visited); } else if (CE->getOpcode() == Instruction::ExtractValue) { - const SmallVector<unsigned, 4> &Indices = CE->getIndices(); + ArrayRef<unsigned> Indices = CE->getIndices(); if (Value *W = FindInsertedValue(CE->getOperand(0), Indices.begin(), Indices.end())) diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 2ea27fb..c5c676b 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -17,6 +17,7 @@ #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" using namespace llvm; /// AreEquivalentAddressValues - Test if A and B will obviously have the same @@ -30,7 +31,7 @@ using namespace llvm; static bool AreEquivalentAddressValues(const Value *A, const Value *B) { // Test if the values are trivially equivalent. if (A == B) return true; - + // Test if the values come from identical arithmetic instructions. // Use isIdenticalToWhenDefined instead of isIdenticalTo because // this function is only used when one address use dominates the @@ -41,7 +42,7 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { if (const Instruction *BI = dyn_cast<Instruction>(B)) if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) return true; - + // Otherwise they may not be equivalent. return false; } diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp index 64d215c..2283db0 100644 --- a/lib/Analysis/MemDepPrinter.cpp +++ b/lib/Analysis/MemDepPrinter.cpp @@ -79,8 +79,8 @@ bool MemDepPrinter::runOnFunction(Function &F) { MemDepResult Res = MDA.getDependency(Inst); if (!Res.isNonLocal()) { - assert(Res.isClobber() != Res.isDef() && - "Local dep should be def or clobber!"); + assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) && + "Local dep should be unknown, def or clobber!"); Deps[Inst].insert(std::make_pair(InstAndClobberFlag(Res.getInst(), Res.isClobber()), static_cast<BasicBlock *>(0))); @@ -92,8 +92,9 @@ bool MemDepPrinter::runOnFunction(Function &F) { for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator I = NLDI.begin(), E = NLDI.end(); I != E; ++I) { const MemDepResult &Res = I->getResult(); - assert(Res.isClobber() != Res.isDef() && - "Resolved non-local call dep should be def or clobber!"); + assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) && + "Resolved non-local call dep should be unknown, def or " + "clobber!"); InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(), Res.isClobber()), I->getBB())); @@ -148,16 +149,24 @@ void MemDepPrinter::print(raw_ostream &OS, const Module *M) const { bool isClobber = I->first.getInt(); const BasicBlock *DepBB = I->second; - OS << " " << (isClobber ? "Clobber" : " Def"); + OS << " "; + if (!DepInst) + OS << "Unknown"; + else if (isClobber) + OS << "Clobber"; + else + OS << " Def"; if (DepBB) { OS << " in block "; WriteAsOperand(OS, DepBB, /*PrintType=*/false, M); } - OS << " from: "; - if (DepInst == Inst) - OS << "<unspecified>"; - else - DepInst->print(OS); + if (DepInst) { + OS << " from: "; + if (DepInst == Inst) + OS << "<unspecified>"; + else + DepInst->print(OS); + } OS << "\n"; } diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 35043bdd..bba4482 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -16,6 +16,7 @@ #define DEBUG_TYPE "memdep" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Function.h" @@ -46,6 +47,11 @@ STATISTIC(NumUncacheNonLocalPtr, STATISTIC(NumCacheCompleteNonLocalPtr, "Number of block queries that were completely cached"); +// Limit for the number of instructions to scan in a block. +// FIXME: Figure out what a sane value is for this. +// (500 is relatively insane.) +static const int BlockScanLimit = 500; + char MemoryDependenceAnalysis::ID = 0; // Register this pass... @@ -179,8 +185,16 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, MemDepResult MemoryDependenceAnalysis:: getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, BasicBlock::iterator ScanIt, BasicBlock *BB) { + unsigned Limit = BlockScanLimit; + // Walk backwards through the block, looking for dependencies while (ScanIt != BB->begin()) { + // Limit the amount of scanning we do so we don't end up with quadratic + // running time on extreme testcases. + --Limit; + if (!Limit) + return MemDepResult::getUnknown(); + Instruction *Inst = --ScanIt; // If this inst is a memory op, get the pointer it accessed @@ -214,11 +228,101 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, } } - // No dependence found. If this is the entry block of the function, it is a - // clobber, otherwise it is non-local. + // No dependence found. If this is the entry block of the function, it is + // unknown, otherwise it is non-local. if (BB != &BB->getParent()->getEntryBlock()) return MemDepResult::getNonLocal(); - return MemDepResult::getClobber(ScanIt); + return MemDepResult::getUnknown(); +} + +/// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that +/// would fully overlap MemLoc if done as a wider legal integer load. +/// +/// MemLocBase, MemLocOffset are lazily computed here the first time the +/// base/offs of memloc is needed. +static bool +isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, + const Value *&MemLocBase, + int64_t &MemLocOffs, + const LoadInst *LI, + const TargetData *TD) { + // If we have no target data, we can't do this. + if (TD == 0) return false; + + // If we haven't already computed the base/offset of MemLoc, do so now. + if (MemLocBase == 0) + MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, *TD); + + unsigned Size = MemoryDependenceAnalysis:: + getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size, + LI, *TD); + return Size != 0; +} + +/// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that +/// looks at a memory location for a load (specified by MemLocBase, Offs, +/// and Size) and compares it against a load. If the specified load could +/// be safely widened to a larger integer load that is 1) still efficient, +/// 2) safe for the target, and 3) would provide the specified memory +/// location value, then this function returns the size in bytes of the +/// load width to use. If not, this returns zero. +unsigned MemoryDependenceAnalysis:: +getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, + unsigned MemLocSize, const LoadInst *LI, + const TargetData &TD) { + // We can only extend non-volatile integer loads. + if (!isa<IntegerType>(LI->getType()) || LI->isVolatile()) return 0; + + // Get the base of this load. + int64_t LIOffs = 0; + const Value *LIBase = + GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, TD); + + // If the two pointers are not based on the same pointer, we can't tell that + // they are related. + if (LIBase != MemLocBase) return 0; + + // Okay, the two values are based on the same pointer, but returned as + // no-alias. This happens when we have things like two byte loads at "P+1" + // and "P+3". Check to see if increasing the size of the "LI" load up to its + // alignment (or the largest native integer type) will allow us to load all + // the bits required by MemLoc. + + // If MemLoc is before LI, then no widening of LI will help us out. + if (MemLocOffs < LIOffs) return 0; + + // Get the alignment of the load in bytes. We assume that it is safe to load + // any legal integer up to this size without a problem. For example, if we're + // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can + // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it + // to i16. + unsigned LoadAlign = LI->getAlignment(); + + int64_t MemLocEnd = MemLocOffs+MemLocSize; + + // If no amount of rounding up will let MemLoc fit into LI, then bail out. + if (LIOffs+LoadAlign < MemLocEnd) return 0; + + // This is the size of the load to try. Start with the next larger power of + // two. + unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U; + NewLoadByteSize = NextPowerOf2(NewLoadByteSize); + + while (1) { + // If this load size is bigger than our known alignment or would not fit + // into a native integer register, then we fail. + if (NewLoadByteSize > LoadAlign || + !TD.fitsInLegalInteger(NewLoadByteSize*8)) + return 0; + + // If a load of this width would include all of MemLoc, then we succeed. + if (LIOffs+NewLoadByteSize >= MemLocEnd) + return NewLoadByteSize; + + NewLoadByteSize <<= 1; + } + + return 0; } /// getPointerDependencyFrom - Return the instruction on which a memory @@ -229,58 +333,39 @@ MemDepResult MemoryDependenceAnalysis:: getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB) { - Value *InvariantTag = 0; + const Value *MemLocBase = 0; + int64_t MemLocOffset = 0; + + unsigned Limit = BlockScanLimit; // Walk backwards through the basic block, looking for dependencies. while (ScanIt != BB->begin()) { + // Limit the amount of scanning we do so we don't end up with quadratic + // running time on extreme testcases. + --Limit; + if (!Limit) + return MemDepResult::getUnknown(); + Instruction *Inst = --ScanIt; - // If we're in an invariant region, no dependencies can be found before - // we pass an invariant-begin marker. - if (InvariantTag == Inst) { - InvariantTag = 0; - continue; - } - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { // Debug intrinsics don't (and can't) cause dependences. if (isa<DbgInfoIntrinsic>(II)) continue; - // If we pass an invariant-end marker, then we've just entered an - // invariant region and can start ignoring dependencies. - if (II->getIntrinsicID() == Intrinsic::invariant_end) { - // FIXME: This only considers queries directly on the invariant-tagged - // pointer, not on query pointers that are indexed off of them. It'd - // be nice to handle that at some point. - AliasAnalysis::AliasResult R = - AA->alias(AliasAnalysis::Location(II->getArgOperand(2)), MemLoc); - if (R == AliasAnalysis::MustAlias) - InvariantTag = II->getArgOperand(0); - - continue; - } - // If we reach a lifetime begin or end marker, then the query ends here // because the value is undefined. if (II->getIntrinsicID() == Intrinsic::lifetime_start) { // FIXME: This only considers queries directly on the invariant-tagged // pointer, not on query pointers that are indexed off of them. It'd - // be nice to handle that at some point. - AliasAnalysis::AliasResult R = - AA->alias(AliasAnalysis::Location(II->getArgOperand(1)), MemLoc); - if (R == AliasAnalysis::MustAlias) + // be nice to handle that at some point (the right approach is to use + // GetPointerBaseWithConstantOffset). + if (AA->isMustAlias(AliasAnalysis::Location(II->getArgOperand(1)), + MemLoc)) return MemDepResult::getDef(II); continue; } } - // If we're querying on a load and we're in an invariant region, we're done - // at this point. Nothing a load depends on can live in an invariant region. - // - // FIXME: this will prevent us from returning load/load must-aliases, so GVN - // won't remove redundant loads. - if (isLoad && InvariantTag) continue; - // Values depend on loads if the pointers are must aliased. This means that // a load depends on another must aliased load from the same value. if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { @@ -288,27 +373,57 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc); - if (R == AliasAnalysis::NoAlias) - continue; - // May-alias loads don't depend on each other without a dependence. - if (isLoad && R != AliasAnalysis::MustAlias) + if (isLoad) { + if (R == AliasAnalysis::NoAlias) { + // If this is an over-aligned integer load (for example, + // "load i8* %P, align 4") see if it would obviously overlap with the + // queried location if widened to a larger load (e.g. if the queried + // location is 1 byte at P+1). If so, return it as a load/load + // clobber result, allowing the client to decide to widen the load if + // it wants to. + if (const IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) + if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() && + isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase, + MemLocOffset, LI, TD)) + return MemDepResult::getClobber(Inst); + + continue; + } + + // Must aliased loads are defs of each other. + if (R == AliasAnalysis::MustAlias) + return MemDepResult::getDef(Inst); + +#if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads + // in terms of clobbering loads, but since it does this by looking + // at the clobbering load directly, it doesn't know about any + // phi translation that may have happened along the way. + + // If we have a partial alias, then return this as a clobber for the + // client to handle. + if (R == AliasAnalysis::PartialAlias) + return MemDepResult::getClobber(Inst); +#endif + + // Random may-alias loads don't depend on each other without a + // dependence. + continue; + } + + // Stores don't depend on other no-aliased accesses. + if (R == AliasAnalysis::NoAlias) continue; // Stores don't alias loads from read-only memory. - if (!isLoad && AA->pointsToConstantMemory(LoadLoc)) + if (AA->pointsToConstantMemory(LoadLoc)) continue; - // Stores depend on may and must aliased loads, loads depend on must-alias - // loads. + // Stores depend on may/must aliased loads. return MemDepResult::getDef(Inst); } if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - // There can't be stores to the value we care about inside an - // invariant region. - if (InvariantTag) continue; - // If alias analysis can tell that this store is guaranteed to not modify // the query pointer, ignore it. Use getModRefInfo to handle cases where // the query pointer points to constant memory etc. @@ -341,8 +456,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, (isa<CallInst>(Inst) && extractMallocCall(Inst))) { const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD); - if (AccessPtr == Inst || - AA->alias(Inst, 1, AccessPtr, 1) == AliasAnalysis::MustAlias) + if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) return MemDepResult::getDef(Inst); continue; } @@ -353,9 +467,6 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // If the call has no effect on the queried pointer, just ignore it. continue; case AliasAnalysis::Mod: - // If we're in an invariant region, we can ignore calls that ONLY - // modify the pointer. - if (InvariantTag) continue; return MemDepResult::getClobber(Inst); case AliasAnalysis::Ref: // If the call is known to never store to the pointer, and if this is a @@ -368,11 +479,11 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, } } - // No dependence found. If this is the entry block of the function, it is a - // clobber, otherwise it is non-local. + // No dependence found. If this is the entry block of the function, it is + // unknown, otherwise it is non-local. if (BB != &BB->getParent()->getEntryBlock()) return MemDepResult::getNonLocal(); - return MemDepResult::getClobber(ScanIt); + return MemDepResult::getUnknown(); } /// getDependency - Return the instruction on which a memory operation @@ -400,12 +511,12 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { // Do the scan. if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) { - // No dependence found. If this is the entry block of the function, it is a - // clobber, otherwise it is non-local. + // No dependence found. If this is the entry block of the function, it is + // unknown, otherwise it is non-local. if (QueryParent != &QueryParent->getParent()->getEntryBlock()) LocalCache = MemDepResult::getNonLocal(); else - LocalCache = MemDepResult::getClobber(QueryInst); + LocalCache = MemDepResult::getUnknown(); } else { AliasAnalysis::Location MemLoc; AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA); @@ -413,7 +524,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { // If we can do a pointer scan, make it happen. bool isLoad = !(MR & AliasAnalysis::Mod); if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst)) - isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_end; + isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start; LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos, QueryParent); @@ -424,7 +535,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { QueryParent); } else // Non-memory instruction. - LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos)); + LocalCache = MemDepResult::getUnknown(); } // Remember the result! @@ -558,10 +669,10 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB); } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) { // No dependence found. If this is the entry block of the function, it is - // a clobber, otherwise it is non-local. + // a clobber, otherwise it is unknown. Dep = MemDepResult::getNonLocal(); } else { - Dep = MemDepResult::getClobber(ScanPos); + Dep = MemDepResult::getUnknown(); } // If we had a dirty entry for the block, update it. Otherwise, just add @@ -617,7 +728,7 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad, return; Result.clear(); Result.push_back(NonLocalDepResult(FromBB, - MemDepResult::getClobber(FromBB->begin()), + MemDepResult::getUnknown(), const_cast<Value *>(Loc.Ptr))); } @@ -679,7 +790,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the reverse association because we just added it // to Cache! - if (Dep.isNonLocal()) + if (Dep.isNonLocal() || Dep.isUnknown()) return Dep; // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently @@ -853,6 +964,9 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, SmallVector<BasicBlock*, 32> Worklist; Worklist.push_back(StartBB); + // PredList used inside loop. + SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList; + // Keep track of the entries that we know are sorted. Previously cached // entries will all be sorted. The entries we add we only sort on demand (we // don't insert every element into its sorted position). We know that we @@ -889,22 +1003,29 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // the same Pointer. if (!Pointer.NeedsPHITranslationFromBlock(BB)) { SkipFirstBlock = false; + SmallVector<BasicBlock*, 16> NewBlocks; for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { // Verify that we haven't looked at this block yet. std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr())); if (InsertRes.second) { // First time we've looked at *PI. - Worklist.push_back(*PI); + NewBlocks.push_back(*PI); continue; } // If we have seen this block before, but it was with a different // pointer then we have a phi translation failure and we have to treat // this as a clobber. - if (InsertRes.first->second != Pointer.getAddr()) + if (InsertRes.first->second != Pointer.getAddr()) { + // Make sure to clean up the Visited map before continuing on to + // PredTranslationFailure. + for (unsigned i = 0; i < NewBlocks.size(); i++) + Visited.erase(NewBlocks[i]); goto PredTranslationFailure; + } } + Worklist.append(NewBlocks.begin(), NewBlocks.end()); continue; } @@ -923,13 +1044,15 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, NumSortedEntries = Cache->size(); } Cache = 0; - + + PredList.clear(); for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { BasicBlock *Pred = *PI; - + PredList.push_back(std::make_pair(Pred, Pointer)); + // Get the PHI translated pointer in this predecessor. This can fail if // not translatable, in which case the getAddr() returns null. - PHITransAddr PredPointer(Pointer); + PHITransAddr &PredPointer = PredList.back().second; PredPointer.PHITranslateValue(BB, Pred, 0); Value *PredPtrVal = PredPointer.getAddr(); @@ -943,6 +1066,9 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal)); if (!InsertRes.second) { + // We found the pred; take it off the list of preds to visit. + PredList.pop_back(); + // If the predecessor was visited with PredPtr, then we already did // the analysis and can ignore it. if (InsertRes.first->second == PredPtrVal) @@ -951,18 +1077,49 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // Otherwise, the block was previously analyzed with a different // pointer. We can't represent the result of this case, so we just // treat this as a phi translation failure. + + // Make sure to clean up the Visited map before continuing on to + // PredTranslationFailure. + for (unsigned i = 0; i < PredList.size(); i++) + Visited.erase(PredList[i].first); + goto PredTranslationFailure; } - + } + + // Actually process results here; this need to be a separate loop to avoid + // calling getNonLocalPointerDepFromBB for blocks we don't want to return + // any results for. (getNonLocalPointerDepFromBB will modify our + // datastructures in ways the code after the PredTranslationFailure label + // doesn't expect.) + for (unsigned i = 0; i < PredList.size(); i++) { + BasicBlock *Pred = PredList[i].first; + PHITransAddr &PredPointer = PredList[i].second; + Value *PredPtrVal = PredPointer.getAddr(); + + bool CanTranslate = true; // If PHI translation was unable to find an available pointer in this // predecessor, then we have to assume that the pointer is clobbered in // that predecessor. We can still do PRE of the load, which would insert // a computation of the pointer in this predecessor. - if (PredPtrVal == 0) { + if (PredPtrVal == 0) + CanTranslate = false; + + // FIXME: it is entirely possible that PHI translating will end up with + // the same value. Consider PHI translating something like: + // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need* + // to recurse here, pedantically speaking. + + // If getNonLocalPointerDepFromBB fails here, that means the cached + // result conflicted with the Visited list; we have to conservatively + // assume it is unknown, but this also does not block PRE of the load. + if (!CanTranslate || + getNonLocalPointerDepFromBB(PredPointer, + Loc.getWithNewPtr(PredPtrVal), + isLoad, Pred, + Result, Visited)) { // Add the entry to the Result list. - NonLocalDepResult Entry(Pred, - MemDepResult::getClobber(Pred->getTerminator()), - PredPtrVal); + NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal); Result.push_back(Entry); // Since we had a phi translation failure, the cache for CacheKey won't @@ -974,19 +1131,6 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, NLPI.Pair = BBSkipFirstBlockPair(); continue; } - - // FIXME: it is entirely possible that PHI translating will end up with - // the same value. Consider PHI translating something like: - // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need* - // to recurse here, pedantically speaking. - - // If we have a problem phi translating, fall through to the code below - // to handle the failure condition. - if (getNonLocalPointerDepFromBB(PredPointer, - Loc.getWithNewPtr(PredPointer.getAddr()), - isLoad, Pred, - Result, Visited)) - goto PredTranslationFailure; } // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. @@ -1003,6 +1147,9 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, continue; PredTranslationFailure: + // The following code is "failure"; we can't produce a sane translation + // for the given block. It assumes that we haven't modified any of + // our datastructures while processing the current block. if (Cache == 0) { // Refresh the CacheInfo/Cache pointer if it got invalidated. @@ -1017,8 +1164,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // results from the set". Clear out the indicator for this. CacheInfo->Pair = BBSkipFirstBlockPair(); - // If *nothing* works, mark the pointer as being clobbered by the first - // instruction in this block. + // If *nothing* works, mark the pointer as unknown. // // If this is the magic first block, return this as a clobber of the whole // incoming value. Since we can't phi translate to one of the predecessors, @@ -1033,8 +1179,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, assert(I->getResult().isNonLocal() && "Should only be here with transparent block"); - I->setResult(MemDepResult::getClobber(BB->begin())); - ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey); + I->setResult(MemDepResult::getUnknown()); Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Pointer.getAddr())); break; diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index 93da5a4..70dcd0d 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp index 5d3f6bb..7c584da 100644 --- a/lib/Analysis/PathNumbering.cpp +++ b/lib/Analysis/PathNumbering.cpp @@ -38,13 +38,10 @@ #include "llvm/Support/TypeBuilder.h" #include "llvm/Support/raw_ostream.h" -#include <map> #include <queue> -#include <set> #include <stack> #include <string> #include <utility> -#include <vector> #include <sstream> using namespace llvm; @@ -286,7 +283,7 @@ void BallLarusDag::calculatePathNumbers() { BallLarusEdge* exitEdge = addEdge(node, getExit(), 0); exitEdge->setType(BallLarusEdge::SPLITEDGE_PHONY); - // Counters to handle the possibilty of a multi-graph + // Counters to handle the possibility of a multi-graph BasicBlock* oldTarget = 0; unsigned duplicateNumber = 0; diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp index c549773..0ae734e 100644 --- a/lib/Analysis/PathProfileVerifier.cpp +++ b/lib/Analysis/PathProfileVerifier.cpp @@ -124,7 +124,7 @@ bool PathProfileVerifier::runOnModule (Module &M) { ProfilePathEdgeVector* pev = currentPath->getPathEdges(); DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": " << currentPath->getCount() << "\n"); - // setup the entry edge (normally path profiling doens't care about this) + // setup the entry edge (normally path profiling doesn't care about this) if (currentPath->getFirstBlockInPath() == &F->getEntryBlock()) edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]] += currentPath->getCount(); diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp index 667ee1c..b594e2b 100644 --- a/lib/Analysis/ProfileEstimatorPass.cpp +++ b/lib/Analysis/ProfileEstimatorPass.cpp @@ -140,7 +140,7 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { // loop, thus the edge is a backedge, continue and do not check if the // value is valid. if (BBisHeader && BBLoop->contains(*bbi)) { - printEdgeError(edge, "but is backedge, continueing"); + printEdgeError(edge, "but is backedge, continuing"); continue; } // If the edges value is missing (and this is no loop header, and this is diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index 36f211e..173de2c 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -309,9 +309,9 @@ void ProfileInfoT<Function,BasicBlock>:: removeEdge(oldedge); } -/// Replaces all occurences of RmBB in the ProfilingInfo with DestBB. +/// Replaces all occurrences of RmBB in the ProfilingInfo with DestBB. /// This checks all edges of the function the blocks reside in and replaces the -/// occurences of RmBB with DestBB. +/// occurrences of RmBB with DestBB. template<> void ProfileInfoT<Function,BasicBlock>:: replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) { @@ -812,7 +812,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { } if (iw < 0) continue; - // Check the recieving end of the path if it can handle the flow. + // Check the receiving end of the path if it can handle the flow. double ow = getExecutionCount(Dest); Processed.clear(); for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp index 25481b2..eaa38da 100644 --- a/lib/Analysis/ProfileInfoLoader.cpp +++ b/lib/Analysis/ProfileInfoLoader.cpp @@ -19,7 +19,6 @@ #include "llvm/Support/raw_ostream.h" #include <cstdio> #include <cstdlib> -#include <map> using namespace llvm; // ByteSwap - Byteswap 'Var' if 'Really' is true. diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index 3269dcc..80eda79 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -249,7 +249,7 @@ void RegionPass::assignPassManager(PMStack &PMS, assert (!PMS.empty() && "Unable to create Region Pass Manager"); PMDataManager *PMD = PMS.top(); - // [1] Create new Call Graph Pass Manager + // [1] Create new Region Pass Manager RGPM = new RGPassManager(PMD->getDepth() + 1); RGPM->populateInheritedAnalysis(PMS); diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 228974d..025718e 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -1035,6 +1035,93 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, return S; } +// Get the limit of a recurrence such that incrementing by Step cannot cause +// signed overflow as long as the value of the recurrence within the loop does +// not exceed this limit before incrementing. +static const SCEV *getOverflowLimitForStep(const SCEV *Step, + ICmpInst::Predicate *Pred, + ScalarEvolution *SE) { + unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); + if (SE->isKnownPositive(Step)) { + *Pred = ICmpInst::ICMP_SLT; + return SE->getConstant(APInt::getSignedMinValue(BitWidth) - + SE->getSignedRange(Step).getSignedMax()); + } + if (SE->isKnownNegative(Step)) { + *Pred = ICmpInst::ICMP_SGT; + return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - + SE->getSignedRange(Step).getSignedMin()); + } + return 0; +} + +// The recurrence AR has been shown to have no signed wrap. Typically, if we can +// prove NSW for AR, then we can just as easily prove NSW for its preincrement +// or postincrement sibling. This allows normalizing a sign extended AddRec as +// such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a +// result, the expression "Step + sext(PreIncAR)" is congruent with +// "sext(PostIncAR)" +static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, + const Type *Ty, + ScalarEvolution *SE) { + const Loop *L = AR->getLoop(); + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(*SE); + + // Check for a simple looking step prior to loop entry. + const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start); + if (!SA || SA->getNumOperands() != 2 || SA->getOperand(0) != Step) + return 0; + + // This is a postinc AR. Check for overflow on the preinc recurrence using the + // same three conditions that getSignExtendedExpr checks. + + // 1. NSW flags on the step increment. + const SCEV *PreStart = SA->getOperand(1); + const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>( + SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); + + if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW)) + return PreStart; + + // 2. Direct overflow check on the step operation's expression. + unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); + const Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); + const SCEV *OperandExtendedStart = + SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy), + SE->getSignExtendExpr(Step, WideTy)); + if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) { + // Cache knowledge of PreAR NSW. + if (PreAR) + const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW); + // FIXME: this optimization needs a unit test + DEBUG(dbgs() << "SCEV: untested prestart overflow check\n"); + return PreStart; + } + + // 3. Loop precondition. + ICmpInst::Predicate Pred; + const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE); + + if (OverflowLimit && + SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) { + return PreStart; + } + return 0; +} + +// Get the normalized sign-extended expression for this AddRec's Start. +static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR, + const Type *Ty, + ScalarEvolution *SE) { + const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE); + if (!PreStart) + return SE->getSignExtendExpr(AR->getStart(), Ty); + + return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty), + SE->getSignExtendExpr(PreStart, Ty)); +} + const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, const Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && @@ -1097,7 +1184,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // If we have special knowledge that this addrec won't overflow, // we don't need to do any further analysis. if (AR->getNoWrapFlags(SCEV::FlagNSW)) - return getAddRecExpr(getSignExtendExpr(Start, Ty), + return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), getSignExtendExpr(Step, Ty), L, SCEV::FlagNSW); @@ -1133,7 +1220,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // Cache knowledge of AR NSW, which is propagated to this AddRec. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. - return getAddRecExpr(getSignExtendExpr(Start, Ty), + return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } @@ -1149,7 +1236,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // Cache knowledge of AR NSW, which is propagated to this AddRec. const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. - return getAddRecExpr(getSignExtendExpr(Start, Ty), + return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } @@ -1159,34 +1246,18 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // the addrec is safe. Also, if the entry is guarded by a comparison // with the start value and the backedge is guarded by a comparison // with the post-inc value, the addrec is safe. - if (isKnownPositive(Step)) { - const SCEV *N = getConstant(APInt::getSignedMinValue(BitWidth) - - getSignedRange(Step).getSignedMax()); - if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) || - (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) && - isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, - AR->getPostIncExpr(*this), N))) { - // Cache knowledge of AR NSW, which is propagated to this AddRec. - const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); - // Return the expression with the addrec on the outside. - return getAddRecExpr(getSignExtendExpr(Start, Ty), - getSignExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); - } - } else if (isKnownNegative(Step)) { - const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) - - getSignedRange(Step).getSignedMin()); - if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) || - (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) && - isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, - AR->getPostIncExpr(*this), N))) { - // Cache knowledge of AR NSW, which is propagated to this AddRec. - const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); - // Return the expression with the addrec on the outside. - return getAddRecExpr(getSignExtendExpr(Start, Ty), - getSignExtendExpr(Step, Ty), - L, AR->getNoWrapFlags()); - } + ICmpInst::Predicate Pred; + const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this); + if (OverflowLimit && + (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) || + (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) && + isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this), + OverflowLimit)))) { + // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); + return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), + getSignExtendExpr(Step, Ty), + L, AR->getNoWrapFlags()); } } } @@ -1882,7 +1953,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // outer mul and the inner addrec are guaranteed to have no overflow. // // No self-wrap cannot be guaranteed after changing the step size, but - // will be infered if either NUW or NSW is true. + // will be inferred if either NUW or NSW is true. Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW)); const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags); @@ -2015,7 +2086,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, } } // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. - if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) { + if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) { SmallVector<const SCEV *, 4> Operands; for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy)); @@ -3783,24 +3854,25 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // update the value. The temporary CouldNotCompute value tells SCEV // code elsewhere that it shouldn't attempt to request a new // backedge-taken count, which could result in infinite recursion. - std::pair<std::map<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair = + std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair = BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute())); if (!Pair.second) return Pair.first->second; - BackedgeTakenInfo BECount = ComputeBackedgeTakenCount(L); - if (BECount.Exact != getCouldNotCompute()) { - assert(isLoopInvariant(BECount.Exact, L) && - isLoopInvariant(BECount.Max, L) && + BackedgeTakenInfo Result = getCouldNotCompute(); + BackedgeTakenInfo Computed = ComputeBackedgeTakenCount(L); + if (Computed.Exact != getCouldNotCompute()) { + assert(isLoopInvariant(Computed.Exact, L) && + isLoopInvariant(Computed.Max, L) && "Computed backedge-taken count isn't loop invariant for loop!"); ++NumTripCountsComputed; // Update the value in the map. - Pair.first->second = BECount; + Result = Computed; } else { - if (BECount.Max != getCouldNotCompute()) + if (Computed.Max != getCouldNotCompute()) // Update the value in the map. - Pair.first->second = BECount; + Result = Computed; if (isa<PHINode>(L->getHeader()->begin())) // Only count loops that have phi nodes as not being computable. ++NumTripCountsNotComputed; @@ -3811,7 +3883,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // conservative estimates made without the benefit of trip count // information. This is similar to the code in forgetLoop, except that // it handles SCEVUnknown PHI nodes specially. - if (BECount.hasAnyInfo()) { + if (Computed.hasAnyInfo()) { SmallVector<Instruction *, 16> Worklist; PushLoopPHIs(L, Worklist); @@ -3842,7 +3914,13 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { PushDefUseChildren(I, Worklist); } } - return Pair.first->second; + + // Re-lookup the insert position, since the call to + // ComputeBackedgeTakenCount above could result in a + // recusive call to getBackedgeTakenInfo (on a different + // loop), which would invalidate the iterator computed + // earlier. + return BackedgeTakenCounts.find(L)->second = Result; } /// forgetLoop - This method should be called by the client when it has @@ -4426,7 +4504,7 @@ Constant * ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, const APInt &BEs, const Loop *L) { - std::map<PHINode*, Constant*>::const_iterator I = + DenseMap<PHINode*, Constant*>::const_iterator I = ConstantEvolutionLoopExitValue.find(PN); if (I != ConstantEvolutionLoopExitValue.end()) return I->second; @@ -4694,9 +4772,15 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { for (++i; i != e; ++i) NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L)); - AddRec = cast<SCEVAddRecExpr>( + const SCEV *FoldedRec = getAddRecExpr(NewOps, AddRec->getLoop(), - AddRec->getNoWrapFlags(SCEV::FlagNW))); + AddRec->getNoWrapFlags(SCEV::FlagNW)); + AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec); + // The addrec may be folded to a nonrecurrence, for example, if the + // induction variable is multiplied by zero after constant folding. Go + // ahead and return the folded value. + if (!AddRec) + return FoldedRec; break; } diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp index 40e18ab..0faf139 100644 --- a/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -31,7 +31,7 @@ // // The second field identifies the type's parent node in the tree, or // is null or omitted for a root node. A type is considered to alias -// all of its decendents and all of its ancestors in the tree. Also, +// all of its descendants and all of its ancestors in the tree. Also, // a type is considered to alias all types in other trees, so that // bitcode produced from multiple front-ends is handled conservatively. // @@ -59,6 +59,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Constants.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Metadata.h" diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index a8117e6..dab5aeb 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -131,8 +131,18 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, } return; } + + if (Argument *A = dyn_cast<Argument>(V)) { + // Get alignment information off byval arguments if specified in the IR. + if (A->hasByValAttr()) + if (unsigned Align = A->getParamAlignment()) + KnownZero = Mask & APInt::getLowBitsSet(BitWidth, + CountTrailingZeros_32(Align)); + return; + } - KnownZero.clearAllBits(); KnownOne.clearAllBits(); // Start out not knowing anything. + // Start out not knowing anything. + KnownZero.clearAllBits(); KnownOne.clearAllBits(); if (Depth == MaxDepth || Mask == 0) return; // Limit search depth. @@ -670,6 +680,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); break; } + case Intrinsic::x86_sse42_crc32_64_8: + case Intrinsic::x86_sse42_crc32_64_64: + KnownZero = APInt::getHighBitsSet(64, 32); + break; } } break; @@ -1328,7 +1342,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, break; } } - // If we succesfully found a value for each of our subaggregates + // If we successfully found a value for each of our subaggregates if (To) return To; } @@ -1757,7 +1771,7 @@ llvm::GetUnderlyingObject(Value *V, const TargetData *TD, unsigned MaxLookup) { } else { // See if InstructionSimplify knows any relevant tricks. if (Instruction *I = dyn_cast<Instruction>(V)) - // TODO: Aquire a DominatorTree and use it. + // TODO: Acquire a DominatorTree and use it. if (Value *Simplified = SimplifyInstruction(I, TD, 0)) { V = Simplified; continue; |