diff options
Diffstat (limited to 'lib/Analysis')
30 files changed, 688 insertions, 164 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 0b0fd50..43db176 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -82,6 +82,23 @@ void AliasAnalysis::addEscapingUse(Use &U) { AA->addEscapingUse(U); } +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) { + // We may have two calls + if (auto CS = ImmutableCallSite(I)) { + // Check if the two calls modify the same memory + return getModRefInfo(Call, CS); + } else { + // Otherwise, check if the call modifies or references the + // location this memory access defines. The best we can say + // is that if the call references what this instruction + // defines, it must be clobbered by this location. + const AliasAnalysis::Location DefLoc = AA->getLocation(I); + if (getModRefInfo(Call, DefLoc) != AliasAnalysis::NoModRef) + return AliasAnalysis::ModRef; + } + return AliasAnalysis::NoModRef; +} AliasAnalysis::ModRefResult AliasAnalysis::getModRefInfo(ImmutableCallSite CS, @@ -330,7 +347,7 @@ AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) { // If the load address doesn't alias the given address, it doesn't read // or write the specified memory. - if (!alias(getLocation(L), Loc)) + if (Loc.Ptr && !alias(getLocation(L), Loc)) return NoModRef; // Otherwise, a load just reads. @@ -343,15 +360,18 @@ AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) { if (!S->isUnordered()) return ModRef; - // If the store address cannot alias the pointer in question, then the - // specified memory cannot be modified by the store. - if (!alias(getLocation(S), Loc)) - return NoModRef; + if (Loc.Ptr) { + // If the store address cannot alias the pointer in question, then the + // specified memory cannot be modified by the store. + if (!alias(getLocation(S), Loc)) + return NoModRef; - // If the pointer is a pointer to constant memory, then it could not have been - // modified by this store. - if (pointsToConstantMemory(Loc)) - return NoModRef; + // If the pointer is a pointer to constant memory, then it could not have + // been modified by this store. + if (pointsToConstantMemory(Loc)) + return NoModRef; + + } // Otherwise, a store just writes. return Mod; diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp index 5865259..a1bfba1 100644 --- a/lib/Analysis/AliasAnalysisCounter.cpp +++ b/lib/Analysis/AliasAnalysisCounter.cpp @@ -44,7 +44,7 @@ namespace { errs() << " " << Val << " " << Desc << " responses (" << Val*100/Sum << "%)\n"; } - ~AliasAnalysisCounter() { + ~AliasAnalysisCounter() override { unsigned AASum = No+May+Partial+Must; unsigned MRSum = NoMR+JustRef+JustMod+MR; if (AASum + MRSum) { // Print a report if any counted queries occurred... diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index fe4bd4c..273eacc 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -158,7 +158,7 @@ bool AAEval::runOnFunction(Function &F) { if (EvalAAMD && isa<StoreInst>(&*I)) Stores.insert(&*I); Instruction &Inst = *I; - if (CallSite CS = cast<Value>(&Inst)) { + if (auto CS = CallSite(&Inst)) { Value *Callee = CS.getCalledValue(); // Skip actual functions for direct function calls. if (!isa<Function>(Callee) && isInterestingPointer(Callee)) diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 45442b0..4c79b9f 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -187,7 +187,7 @@ bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const { return false; for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) { - CallSite C1 = getUnknownInst(i), C2 = Inst; + CallSite C1(getUnknownInst(i)), C2(Inst); if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != AliasAnalysis::NoModRef || AA.getModRefInfo(C2, C1) != AliasAnalysis::NoModRef) diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 4549c1e..842ff0a 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -37,6 +37,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeCFLAliasAnalysisPass(Registry); initializeDependenceAnalysisPass(Registry); initializeDelinearizationPass(Registry); + initializeDivergenceAnalysisPass(Registry); initializeDominanceFrontierPass(Registry); initializeDomViewerPass(Registry); initializeDomPrinterPass(Registry); diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk index 277956c..94a1d2e 100644 --- a/lib/Analysis/Android.mk +++ b/lib/Analysis/Android.mk @@ -22,6 +22,7 @@ analysis_SRC_FILES := \ CostModel.cpp \ Delinearization.cpp \ DependenceAnalysis.cpp \ + DivergenceAnalysis.cpp \ DomPrinter.cpp \ DominanceFrontier.cpp \ IVUsers.cpp \ diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index be2282f..2767e41 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -932,14 +932,14 @@ aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V1Size, // Also, check that they all index through arrays. for (unsigned i = 1, e = GEP1->getNumIndices() - 1; i != e; ++i) { if (!isa<ArrayType>(GetElementPtrInst::getIndexedType( - GEP1->getPointerOperandType(), IntermediateIndices))) + GEP1->getSourceElementType(), IntermediateIndices))) return AliasAnalysis::MayAlias; IntermediateIndices.push_back(GEP1->getOperand(i + 1)); } StructType *LastIndexedStruct = dyn_cast<StructType>(GetElementPtrInst::getIndexedType( - GEP1->getPointerOperandType(), IntermediateIndices)); + GEP1->getSourceElementType(), IntermediateIndices)); if (!LastIndexedStruct) return AliasAnalysis::MayAlias; diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp index 37f2fae..3d819eb 100644 --- a/lib/Analysis/BlockFrequencyInfo.cpp +++ b/lib/Analysis/BlockFrequencyInfo.cpp @@ -85,7 +85,7 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits { std::string Result; raw_string_ostream OS(Result); - OS << Node->getName().str() << ":"; + OS << Node->getName() << ":"; switch (ViewBlockFreqPropagationDAG) { case GVDT_Fraction: Graph->printBlockFreq(OS, Node); diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp index 278073c..456cee1 100644 --- a/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -331,32 +331,35 @@ bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist( return true; } -/// \brief Get the maximum allowed loop scale. -/// -/// Gives the maximum number of estimated iterations allowed for a loop. Very -/// large numbers cause problems downstream (even within 64-bits). -static Scaled64 getMaxLoopScale() { return Scaled64(1, 12); } - /// \brief Compute the loop scale for a loop. void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) { // Compute loop scale. DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n"); + // Infinite loops need special handling. If we give the back edge an infinite + // mass, they may saturate all the other scales in the function down to 1, + // making all the other region temperatures look exactly the same. Choose an + // arbitrary scale to avoid these issues. + // + // FIXME: An alternate way would be to select a symbolic scale which is later + // replaced to be the maximum of all computed scales plus 1. This would + // appropriately describe the loop as having a large scale, without skewing + // the final frequency computation. + const Scaled64 InifiniteLoopScale(1, 12); + // LoopScale == 1 / ExitMass // ExitMass == HeadMass - BackedgeMass BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass; - // Block scale stores the inverse of the scale. - Loop.Scale = ExitMass.toScaled().inverse(); + // Block scale stores the inverse of the scale. If this is an infinite loop, + // its exit mass will be zero. In this case, use an arbitrary scale for the + // loop scale. + Loop.Scale = + ExitMass.isEmpty() ? InifiniteLoopScale : ExitMass.toScaled().inverse(); DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull() << " - " << Loop.BackedgeMass << ")\n" << " - scale = " << Loop.Scale << "\n"); - - if (Loop.Scale > getMaxLoopScale()) { - Loop.Scale = getMaxLoopScale(); - DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n"); - } } /// \brief Package up a loop. @@ -424,15 +427,24 @@ static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, const Scaled64 &Min, const Scaled64 &Max) { // Scale the Factor to a size that creates integers. Ideally, integers would // be scaled so that Max == UINT64_MAX so that they can be best - // differentiated. However, the register allocator currently deals poorly - // with large numbers. Instead, push Min up a little from 1 to give some - // room to differentiate small, unequal numbers. - // - // TODO: fix issues downstream so that ScalingFactor can be - // Scaled64(1,64)/Max. - Scaled64 ScalingFactor = Min.inverse(); - if ((Max / Min).lg() < 60) + // differentiated. However, in the presence of large frequency values, small + // frequencies are scaled down to 1, making it impossible to differentiate + // small, unequal numbers. When the spread between Min and Max frequencies + // fits well within MaxBits, we make the scale be at least 8. + const unsigned MaxBits = 64; + const unsigned SpreadBits = (Max / Min).lg(); + Scaled64 ScalingFactor; + if (SpreadBits <= MaxBits - 3) { + // If the values are small enough, make the scaling factor at least 8 to + // allow distinguishing small values. + ScalingFactor = Min.inverse(); ScalingFactor <<= 3; + } else { + // If the values need more than MaxBits to be represented, saturate small + // frequency values down to 1 by using a scaling factor that benefits large + // frequency values. + ScalingFactor = Scaled64(1, MaxBits) / Max; + } // Translate the floats to integers. DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 14800f4..8799a71 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -379,6 +379,14 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) { if (!CV) return false; + // If the LHS is the result of AND'ing a value with a single bit bitmask, + // we don't have information about probabilities. + if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0))) + if (LHS->getOpcode() == Instruction::And) + if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) + if (AndRHS->getUniqueInteger().isPowerOf2()) + return false; + bool isProb; if (CV->isZero()) { switch (CI->getPredicate()) { @@ -499,25 +507,23 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) { // Walk the basic blocks in post-order so that we can build up state about // the successors of a block iteratively. - for (po_iterator<BasicBlock *> I = po_begin(&F.getEntryBlock()), - E = po_end(&F.getEntryBlock()); - I != E; ++I) { - DEBUG(dbgs() << "Computing probabilities for " << I->getName() << "\n"); - if (calcUnreachableHeuristics(*I)) + for (auto BB : post_order(&F.getEntryBlock())) { + DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n"); + if (calcUnreachableHeuristics(BB)) continue; - if (calcMetadataWeights(*I)) + if (calcMetadataWeights(BB)) continue; - if (calcColdCallHeuristics(*I)) + if (calcColdCallHeuristics(BB)) continue; - if (calcLoopBranchHeuristics(*I)) + if (calcLoopBranchHeuristics(BB)) continue; - if (calcPointerHeuristics(*I)) + if (calcPointerHeuristics(BB)) continue; - if (calcZeroHeuristics(*I)) + if (calcZeroHeuristics(BB)) continue; - if (calcFloatingPointHeuristics(*I)) + if (calcFloatingPointHeuristics(BB)) continue; - calcInvokeHeuristics(*I); + calcInvokeHeuristics(BB); } PostDominatedByUnreachable.clear(); diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp index 89787f82..c86f1f5 100644 --- a/lib/Analysis/CFGPrinter.cpp +++ b/lib/Analysis/CFGPrinter.cpp @@ -77,7 +77,7 @@ namespace { } bool runOnFunction(Function &F) override { - std::string Filename = "cfg." + F.getName().str() + ".dot"; + std::string Filename = ("cfg." + F.getName() + ".dot").str(); errs() << "Writing '" << Filename << "'..."; std::error_code EC; @@ -111,7 +111,7 @@ namespace { } bool runOnFunction(Function &F) override { - std::string Filename = "cfg." + F.getName().str() + ".dot"; + std::string Filename = ("cfg." + F.getName() + ".dot").str(); errs() << "Writing '" << Filename << "'..."; std::error_code EC; diff --git a/lib/Analysis/CFLAliasAnalysis.cpp b/lib/Analysis/CFLAliasAnalysis.cpp index 53d748d..3147992 100644 --- a/lib/Analysis/CFLAliasAnalysis.cpp +++ b/lib/Analysis/CFLAliasAnalysis.cpp @@ -161,7 +161,7 @@ struct FunctionHandle : public CallbackVH { assert(CFLAA != nullptr); } - virtual ~FunctionHandle() {} + ~FunctionHandle() override {} void deleted() override { removeSelfFromCache(); } void allUsesReplacedWith(Value *) override { removeSelfFromCache(); } @@ -189,7 +189,7 @@ public: initializeCFLAliasAnalysisPass(*PassRegistry::getPassRegistry()); } - virtual ~CFLAliasAnalysis() {} + ~CFLAliasAnalysis() override {} void getAnalysisUsage(AnalysisUsage &AU) const override { AliasAnalysis::getAnalysisUsage(AU); diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index ae40321..1335a6d 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_library(LLVMAnalysis ConstantFolding.cpp Delinearization.cpp DependenceAnalysis.cpp + DivergenceAnalysis.cpp DomPrinter.cpp DominanceFrontier.cpp IVUsers.cpp diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 995465d..a85e813 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -671,8 +671,8 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, /// If array indices are not pointer-sized integers, explicitly cast them so /// that they aren't implicitly casted by the getelementptr. -static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy, - const DataLayout &DL, +static Constant *CastGEPIndices(Type *SrcTy, ArrayRef<Constant *> Ops, + Type *ResultTy, const DataLayout &DL, const TargetLibraryInfo *TLI) { Type *IntPtrTy = DL.getIntPtrType(ResultTy); @@ -681,8 +681,9 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy, for (unsigned i = 1, e = Ops.size(); i != e; ++i) { if ((i == 1 || !isa<StructType>(GetElementPtrInst::getIndexedType( - Ops[0]->getType(), - Ops.slice(1, i - 1)))) && + cast<PointerType>(Ops[0]->getType()->getScalarType()) + ->getElementType(), + Ops.slice(1, i - 1)))) && Ops[i]->getType() != IntPtrTy) { Any = true; NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i], @@ -697,7 +698,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy, if (!Any) return nullptr; - Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs); + Constant *C = ConstantExpr::getGetElementPtr(SrcTy, Ops[0], NewIdxs); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI)) C = Folded; @@ -723,7 +724,7 @@ static Constant* StripPtrCastKeepAS(Constant* Ptr) { } /// If we can symbolically evaluate the GEP constant expression, do so. -static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, +static Constant *SymbolicallyEvaluateGEP(Type *SrcTy, ArrayRef<Constant *> Ops, Type *ResultTy, const DataLayout &DL, const TargetLibraryInfo *TLI) { Constant *Ptr = Ops[0]; @@ -865,7 +866,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, return nullptr; // Create a GEP. - Constant *C = ConstantExpr::getGetElementPtr(Ptr, NewIdxs); + Constant *C = ConstantExpr::getGetElementPtr(SrcTy, Ptr, NewIdxs); assert(C->getType()->getPointerElementType() == Ty && "Computed GetElementPtr has unexpected type!"); @@ -1085,13 +1086,15 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]); case Instruction::ShuffleVector: return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); - case Instruction::GetElementPtr: - if (Constant *C = CastGEPIndices(Ops, DestTy, DL, TLI)) + case Instruction::GetElementPtr: { + Type *SrcTy = nullptr; + if (Constant *C = CastGEPIndices(SrcTy, Ops, DestTy, DL, TLI)) return C; - if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, DL, TLI)) + if (Constant *C = SymbolicallyEvaluateGEP(SrcTy, Ops, DestTy, DL, TLI)) return C; - return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1)); + return ConstantExpr::getGetElementPtr(SrcTy, Ops[0], Ops.slice(1)); + } } } diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp new file mode 100644 index 0000000..e5ee295 --- /dev/null +++ b/lib/Analysis/DivergenceAnalysis.cpp @@ -0,0 +1,337 @@ +//===- DivergenceAnalysis.cpp ------ Divergence Analysis ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines divergence analysis which determines whether a branch in a +// GPU program is divergent. It can help branch optimizations such as jump +// threading and loop unswitching to make better decisions. +// +// GPU programs typically use the SIMD execution model, where multiple threads +// in the same execution group have to execute in lock-step. Therefore, if the +// code contains divergent branches (i.e., threads in a group do not agree on +// which path of the branch to take), the group of threads has to execute all +// the paths from that branch with different subsets of threads enabled until +// they converge at the immediately post-dominating BB of the paths. +// +// Due to this execution model, some optimizations such as jump +// threading and loop unswitching can be unfortunately harmful when performed on +// divergent branches. Therefore, an analysis that computes which branches in a +// GPU program are divergent can help the compiler to selectively run these +// optimizations. +// +// This file defines divergence analysis which computes a conservative but +// non-trivial approximation of all divergent branches in a GPU program. It +// partially implements the approach described in +// +// Divergence Analysis +// Sampaio, Souza, Collange, Pereira +// TOPLAS '13 +// +// The divergence analysis identifies the sources of divergence (e.g., special +// variables that hold the thread ID), and recursively marks variables that are +// data or sync dependent on a source of divergence as divergent. +// +// While data dependency is a well-known concept, the notion of sync dependency +// is worth more explanation. Sync dependence characterizes the control flow +// aspect of the propagation of branch divergence. For example, +// +// %cond = icmp slt i32 %tid, 10 +// br i1 %cond, label %then, label %else +// then: +// br label %merge +// else: +// br label %merge +// merge: +// %a = phi i32 [ 0, %then ], [ 1, %else ] +// +// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid +// because %tid is not on its use-def chains, %a is sync dependent on %tid +// because the branch "br i1 %cond" depends on %tid and affects which value %a +// is assigned to. +// +// The current implementation has the following limitations: +// 1. intra-procedural. It conservatively considers the arguments of a +// non-kernel-entry function and the return value of a function call as +// divergent. +// 2. memory as black box. It conservatively considers values loaded from +// generic or local address as divergent. This can be improved by leveraging +// pointer analysis. +//===----------------------------------------------------------------------===// + +#include <vector> +#include "llvm/IR/Dominators.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +using namespace llvm; + +#define DEBUG_TYPE "divergence" + +namespace { +class DivergenceAnalysis : public FunctionPass { +public: + static char ID; + + DivergenceAnalysis() : FunctionPass(ID) { + initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<PostDominatorTree>(); + AU.setPreservesAll(); + } + + bool runOnFunction(Function &F) override; + + // Print all divergent branches in the function. + void print(raw_ostream &OS, const Module *) const override; + + // Returns true if V is divergent. + bool isDivergent(const Value *V) const { return DivergentValues.count(V); } + // Returns true if V is uniform/non-divergent. + bool isUniform(const Value *V) const { return !isDivergent(V); } + +private: + // Stores all divergent values. + DenseSet<const Value *> DivergentValues; +}; +} // End of anonymous namespace + +// Register this pass. +char DivergenceAnalysis::ID = 0; +INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis", + false, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) +INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis", + false, true) + +namespace { + +class DivergencePropagator { +public: + DivergencePropagator(Function &F, TargetTransformInfo &TTI, + DominatorTree &DT, PostDominatorTree &PDT, + DenseSet<const Value *> &DV) + : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {} + void populateWithSourcesOfDivergence(); + void propagate(); + +private: + // A helper function that explores data dependents of V. + void exploreDataDependency(Value *V); + // A helper function that explores sync dependents of TI. + void exploreSyncDependency(TerminatorInst *TI); + // Computes the influence region from Start to End. This region includes all + // basic blocks on any path from Start to End. + void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End, + DenseSet<BasicBlock *> &InfluenceRegion); + // Finds all users of I that are outside the influence region, and add these + // users to Worklist. + void findUsersOutsideInfluenceRegion( + Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion); + + Function &F; + TargetTransformInfo &TTI; + DominatorTree &DT; + PostDominatorTree &PDT; + std::vector<Value *> Worklist; // Stack for DFS. + DenseSet<const Value *> &DV; // Stores all divergent values. +}; + +void DivergencePropagator::populateWithSourcesOfDivergence() { + Worklist.clear(); + DV.clear(); + for (auto &I : inst_range(F)) { + if (TTI.isSourceOfDivergence(&I)) { + Worklist.push_back(&I); + DV.insert(&I); + } + } + for (auto &Arg : F.args()) { + if (TTI.isSourceOfDivergence(&Arg)) { + Worklist.push_back(&Arg); + DV.insert(&Arg); + } + } +} + +void DivergencePropagator::exploreSyncDependency(TerminatorInst *TI) { + // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's + // immediate post dominator are divergent. This rule handles if-then-else + // patterns. For example, + // + // if (tid < 5) + // a1 = 1; + // else + // a2 = 2; + // a = phi(a1, a2); // sync dependent on (tid < 5) + BasicBlock *ThisBB = TI->getParent(); + BasicBlock *IPostDom = PDT.getNode(ThisBB)->getIDom()->getBlock(); + if (IPostDom == nullptr) + return; + + for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) { + // A PHINode is uniform if it returns the same value no matter which path is + // taken. + if (!cast<PHINode>(I)->hasConstantValue() && DV.insert(I).second) + Worklist.push_back(I); + } + + // Propagation rule 2: if a value defined in a loop is used outside, the user + // is sync dependent on the condition of the loop exits that dominate the + // user. For example, + // + // int i = 0; + // do { + // i++; + // if (foo(i)) ... // uniform + // } while (i < tid); + // if (bar(i)) ... // divergent + // + // A program may contain unstructured loops. Therefore, we cannot leverage + // LoopInfo, which only recognizes natural loops. + // + // The algorithm used here handles both natural and unstructured loops. Given + // a branch TI, we first compute its influence region, the union of all simple + // paths from TI to its immediate post dominator (IPostDom). Then, we search + // for all the values defined in the influence region but used outside. All + // these users are sync dependent on TI. + DenseSet<BasicBlock *> InfluenceRegion; + computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion); + // An insight that can speed up the search process is that all the in-region + // values that are used outside must dominate TI. Therefore, instead of + // searching every basic blocks in the influence region, we search all the + // dominators of TI until it is outside the influence region. + BasicBlock *InfluencedBB = ThisBB; + while (InfluenceRegion.count(InfluencedBB)) { + for (auto &I : *InfluencedBB) + findUsersOutsideInfluenceRegion(I, InfluenceRegion); + DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom(); + if (IDomNode == nullptr) + break; + InfluencedBB = IDomNode->getBlock(); + } +} + +void DivergencePropagator::findUsersOutsideInfluenceRegion( + Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) { + for (User *U : I.users()) { + Instruction *UserInst = cast<Instruction>(U); + if (!InfluenceRegion.count(UserInst->getParent())) { + if (DV.insert(UserInst).second) + Worklist.push_back(UserInst); + } + } +} + +void DivergencePropagator::computeInfluenceRegion( + BasicBlock *Start, BasicBlock *End, + DenseSet<BasicBlock *> &InfluenceRegion) { + assert(PDT.properlyDominates(End, Start) && + "End does not properly dominate Start"); + std::vector<BasicBlock *> InfluenceStack; + InfluenceStack.push_back(Start); + InfluenceRegion.insert(Start); + while (!InfluenceStack.empty()) { + BasicBlock *BB = InfluenceStack.back(); + InfluenceStack.pop_back(); + for (BasicBlock *Succ : successors(BB)) { + if (End != Succ && InfluenceRegion.insert(Succ).second) + InfluenceStack.push_back(Succ); + } + } +} + +void DivergencePropagator::exploreDataDependency(Value *V) { + // Follow def-use chains of V. + for (User *U : V->users()) { + Instruction *UserInst = cast<Instruction>(U); + if (DV.insert(UserInst).second) + Worklist.push_back(UserInst); + } +} + +void DivergencePropagator::propagate() { + // Traverse the dependency graph using DFS. + while (!Worklist.empty()) { + Value *V = Worklist.back(); + Worklist.pop_back(); + if (TerminatorInst *TI = dyn_cast<TerminatorInst>(V)) { + // Terminators with less than two successors won't introduce sync + // dependency. Ignore them. + if (TI->getNumSuccessors() > 1) + exploreSyncDependency(TI); + } + exploreDataDependency(V); + } +} + +} /// end namespace anonymous + +FunctionPass *llvm::createDivergenceAnalysisPass() { + return new DivergenceAnalysis(); +} + +bool DivergenceAnalysis::runOnFunction(Function &F) { + auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); + if (TTIWP == nullptr) + return false; + + TargetTransformInfo &TTI = TTIWP->getTTI(F); + // Fast path: if the target does not have branch divergence, we do not mark + // any branch as divergent. + if (!TTI.hasBranchDivergence()) + return false; + + DivergentValues.clear(); + DivergencePropagator DP(F, TTI, + getAnalysis<DominatorTreeWrapperPass>().getDomTree(), + getAnalysis<PostDominatorTree>(), DivergentValues); + DP.populateWithSourcesOfDivergence(); + DP.propagate(); + return false; +} + +void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const { + if (DivergentValues.empty()) + return; + const Value *FirstDivergentValue = *DivergentValues.begin(); + const Function *F; + if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) { + F = Arg->getParent(); + } else if (const Instruction *I = + dyn_cast<Instruction>(FirstDivergentValue)) { + F = I->getParent()->getParent(); + } else { + llvm_unreachable("Only arguments and instructions can be divergent"); + } + + // Dumps all divergent values in F, arguments and then instructions. + for (auto &Arg : F->args()) { + if (DivergentValues.count(&Arg)) + OS << "DIVERGENT: " << Arg << "\n"; + } + // Iterate instructions using inst_range to ensure a deterministic order. + for (auto &I : inst_range(F)) { + if (DivergentValues.count(&I)) + OS << "DIVERGENT:" << I << "\n"; + } +} diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index 9d607cc..65ba1c7 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -212,10 +212,13 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, // list of the same call. CallSites.count(I->first) || - // If the call edge is not from a call or invoke, then the function - // pass RAUW'd a call with another value. This can happen when - // constant folding happens of well known functions etc. - !CallSite(I->first)) { + // If the call edge is not from a call or invoke, or it is a + // instrinsic call, then the function pass RAUW'd a call with + // another value. This can happen when constant folding happens + // of well known functions etc. + !CallSite(I->first) || + (CallSite(I->first).getCalledFunction() && + CallSite(I->first).getCalledFunction()->isIntrinsic())) { assert(!CheckingMode && "CallGraphSCCPass did not update the CallGraph correctly!"); diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index 2208f32..018ae99 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -269,7 +269,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, } else if (Operator::getOpcode(I) == Instruction::BitCast) { if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest)) return true; - } else if (CallSite CS = I) { + } else if (auto CS = CallSite(I)) { // Make sure that this is just the function being called, not that it is // passing into the function. if (!CS.isCallee(&U)) { diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp index eeb3b87..cacf70d 100644 --- a/lib/Analysis/IPA/InlineCost.cpp +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -64,6 +64,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool ContainsNoDuplicateCall; bool HasReturn; bool HasIndirectBr; + bool HasFrameEscape; /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize; @@ -148,12 +149,12 @@ public: IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), - AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), - FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), - NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), - NumConstantPtrCmps(0), NumConstantPtrDiffs(0), - NumInstructionsSimplified(0), SROACostSavings(0), - SROACostSavingsLost(0) {} + HasFrameEscape(false), AllocatedSize(0), NumInstructions(0), + NumVectorInstructions(0), FiftyPercentVectorBonus(0), + TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), + NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), + NumConstantPtrDiffs(0), NumInstructionsSimplified(0), + SROACostSavings(0), SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -743,6 +744,9 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { case Intrinsic::memmove: // SROA can usually chew through these intrinsics, but they aren't free. return false; + case Intrinsic::frameescape: + HasFrameEscape = true; + return false; } } @@ -941,7 +945,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, // If the visit this instruction detected an uninlinable pattern, abort. if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || - HasIndirectBr) + HasIndirectBr || HasFrameEscape) return false; // If the caller is a recursive function then we don't want to inline @@ -1171,7 +1175,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // returns false, and we can bail on out. if (!analyzeBlock(BB, EphValues)) { if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || - HasIndirectBr) + HasIndirectBr || HasFrameEscape) return false; // If the caller is a recursive function then we don't want to inline @@ -1286,16 +1290,18 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) { /// \brief Test that two functions either have or have not the given attribute /// at the same time. -static bool attributeMatches(Function *F1, Function *F2, - Attribute::AttrKind Attr) { - return F1->hasFnAttribute(Attr) == F2->hasFnAttribute(Attr); +template<typename AttrKind> +static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) { + return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr); } /// \brief Test that there are no attribute conflicts between Caller and Callee /// that prevent inlining. static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee) { - return attributeMatches(Caller, Callee, Attribute::SanitizeAddress) && + return attributeMatches(Caller, Callee, "target-cpu") && + attributeMatches(Caller, Callee, "target-features") && + attributeMatches(Caller, Callee, Attribute::SanitizeAddress) && attributeMatches(Caller, Callee, Attribute::SanitizeMemory) && attributeMatches(Caller, Callee, Attribute::SanitizeThread); } @@ -1370,6 +1376,13 @@ bool InlineCostAnalysis::isInlineViable(Function &F) { if (!ReturnsTwice && CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice()) return false; + + // Disallow inlining functions that call @llvm.frameescape. Doing this + // correctly would require major changes to the inliner. + if (CS.getCalledFunction() && + CS.getCalledFunction()->getIntrinsicID() == + llvm::Intrinsic::frameescape) + return false; } } diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 99c477d..d45f7bd 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -2978,10 +2978,12 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // what constant folding can make out of it. Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType()); SmallVector<Value *, 4> IndicesLHS(GLHS->idx_begin(), GLHS->idx_end()); - Constant *NewLHS = ConstantExpr::getGetElementPtr(Null, IndicesLHS); + Constant *NewLHS = ConstantExpr::getGetElementPtr( + GLHS->getSourceElementType(), Null, IndicesLHS); SmallVector<Value *, 4> IndicesRHS(GRHS->idx_begin(), GRHS->idx_end()); - Constant *NewRHS = ConstantExpr::getGetElementPtr(Null, IndicesRHS); + Constant *NewRHS = ConstantExpr::getGetElementPtr( + GLHS->getSourceElementType(), Null, IndicesRHS); return ConstantExpr::getICmp(Pred, NewLHS, NewRHS); } } @@ -3241,17 +3243,18 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { +static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, + const Query &Q, unsigned) { // The type of the GEP pointer operand. - PointerType *PtrTy = cast<PointerType>(Ops[0]->getType()->getScalarType()); - unsigned AS = PtrTy->getAddressSpace(); + unsigned AS = + cast<PointerType>(Ops[0]->getType()->getScalarType())->getAddressSpace(); // getelementptr P -> P. if (Ops.size() == 1) return Ops[0]; // Compute the (pointer) type returned by the GEP instruction. - Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1)); + Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Ops.slice(1)); Type *GEPTy = PointerType::get(LastType, AS); if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType())) GEPTy = VectorType::get(GEPTy, VT->getNumElements()); @@ -3264,7 +3267,7 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { if (match(Ops[1], m_Zero())) return Ops[0]; - Type *Ty = PtrTy->getElementType(); + Type *Ty = SrcTy; if (Ty->isSized()) { Value *P; uint64_t C; @@ -3318,14 +3321,17 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { if (!isa<Constant>(Ops[i])) return nullptr; - return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1)); + return ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ops[0]), + Ops.slice(1)); } Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyGEPInst(Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); + return ::SimplifyGEPInst( + cast<PointerType>(Ops[0]->getType()->getScalarType())->getElementType(), + Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index 1818e93..724c21f 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -177,6 +177,17 @@ void LoopAccessInfo::RuntimePointerCheck::print( } } +bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking( + const SmallVectorImpl<int> *PtrPartition) const { + unsigned NumPointers = Pointers.size(); + + for (unsigned I = 0; I < NumPointers; ++I) + for (unsigned J = I + 1; J < NumPointers; ++J) + if (needsChecking(I, J, PtrPartition)) + return true; + return false; +} + namespace { /// \brief Analyses memory accesses in a loop. /// @@ -1033,16 +1044,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) { StoreInst *ST = cast<StoreInst>(*I); Value* Ptr = ST->getPointerOperand(); - - if (isUniform(Ptr)) { - emitAnalysis( - LoopAccessReport(ST) - << "write to a loop invariant address could not be vectorized"); - DEBUG(dbgs() << "LAA: We don't allow storing to uniform addresses\n"); - CanVecMem = false; - return; - } - + // Check for store to loop invariant address. + StoreToLoopInvariantAddress |= isUniform(Ptr); // If we did *not* see this pointer before, insert it to the read-write // list. At this phase it is only a 'write' list. if (Seen.insert(Ptr).second) { @@ -1211,9 +1214,8 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V, std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck( Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const { - Instruction *tnullptr = nullptr; if (!PtrRtCheck.Need) - return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr); + return std::make_pair(nullptr, nullptr); unsigned NumPointers = PtrRtCheck.Pointers.size(); SmallVector<TrackingVH<Value> , 2> Starts; @@ -1284,6 +1286,9 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck( } } + if (!MemoryRuntimeCheck) + return std::make_pair(nullptr, nullptr); + // We have to do this trickery because the IRBuilder might fold the check to a // constant expression in which case there is no Instruction anchored in a // the block. @@ -1301,19 +1306,24 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const ValueToValueMap &Strides) : DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT), NumLoads(0), NumStores(0), - MaxSafeDepDistBytes(-1U), CanVecMem(false) { + MaxSafeDepDistBytes(-1U), CanVecMem(false), + StoreToLoopInvariantAddress(false) { if (canAnalyzeLoop()) analyzeLoop(Strides); } void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { if (CanVecMem) { - if (PtrRtCheck.empty()) - OS.indent(Depth) << "Memory dependences are safe\n"; - else + if (PtrRtCheck.Need) OS.indent(Depth) << "Memory dependences are safe with run-time checks\n"; + else + OS.indent(Depth) << "Memory dependences are safe\n"; } + OS.indent(Depth) << "Store to invariant address was " + << (StoreToLoopInvariantAddress ? "" : "not ") + << "found in loop.\n"; + if (Report) OS.indent(Depth) << "Report: " << Report->str() << "\n"; diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp index e1b7b4b..da3b829 100644 --- a/lib/Analysis/MemDepPrinter.cpp +++ b/lib/Analysis/MemDepPrinter.cpp @@ -106,7 +106,7 @@ bool MemDepPrinter::runOnFunction(Function &F) { if (!Res.isNonLocal()) { Deps[Inst].insert(std::make_pair(getInstTypePair(Res), static_cast<BasicBlock *>(nullptr))); - } else if (CallSite CS = cast<Value>(Inst)) { + } else if (auto CS = CallSite(Inst)) { const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI = MDA.getNonLocalCallDependency(CS); diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 716e3e6..84769cb 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -223,7 +223,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, continue; } - if (CallSite InstCS = cast<Value>(Inst)) { + if (auto InstCS = CallSite(Inst)) { // Debug intrinsics don't cause dependences. if (isa<DbgInfoIntrinsic>(Inst)) continue; // If these two calls do not interfere, look past it. @@ -874,23 +874,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { void MemoryDependenceAnalysis:: getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) { - - auto getLocation = [](AliasAnalysis *AA, Instruction *Inst) { - if (auto *I = dyn_cast<LoadInst>(Inst)) - return AA->getLocation(I); - else if (auto *I = dyn_cast<StoreInst>(Inst)) - return AA->getLocation(I); - else if (auto *I = dyn_cast<VAArgInst>(Inst)) - return AA->getLocation(I); - else if (auto *I = dyn_cast<AtomicCmpXchgInst>(Inst)) - return AA->getLocation(I); - else if (auto *I = dyn_cast<AtomicRMWInst>(Inst)) - return AA->getLocation(I); - else - llvm_unreachable("unsupported memory instruction"); - }; - - const AliasAnalysis::Location Loc = getLocation(AA, QueryInst); + const AliasAnalysis::Location Loc = AA->getLocation(QueryInst); bool isLoad = isa<LoadInst>(QueryInst); BasicBlock *FromBB = QueryInst->getParent(); assert(FromBB); diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp index cbc4700..f2a11cb 100644 --- a/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -72,55 +72,53 @@ void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const { // Printing the nodes directly isn't particularly helpful (since they // reference other nodes that won't be printed, particularly for the // filenames), so just print a few useful things. - for (DICompileUnit CU : Finder.compile_units()) { + for (MDCompileUnit *CU : Finder.compile_units()) { O << "Compile unit: "; - if (const char *Lang = LanguageString(CU.getLanguage())) + if (const char *Lang = dwarf::LanguageString(CU->getSourceLanguage())) O << Lang; else - O << "unknown-language(" << CU.getLanguage() << ")"; - printFile(O, CU.getFilename(), CU.getDirectory()); + O << "unknown-language(" << CU->getSourceLanguage() << ")"; + printFile(O, CU->getFilename(), CU->getDirectory()); O << '\n'; } - for (DISubprogram S : Finder.subprograms()) { - O << "Subprogram: " << S.getName(); - printFile(O, S.getFilename(), S.getDirectory(), S.getLineNumber()); - if (!S.getLinkageName().empty()) - O << " ('" << S.getLinkageName() << "')"; + for (MDSubprogram *S : Finder.subprograms()) { + O << "Subprogram: " << S->getName(); + printFile(O, S->getFilename(), S->getDirectory(), S->getLine()); + if (!S->getLinkageName().empty()) + O << " ('" << S->getLinkageName() << "')"; O << '\n'; } for (DIGlobalVariable GV : Finder.global_variables()) { - O << "Global variable: " << GV.getName(); - printFile(O, GV.getFilename(), GV.getDirectory(), GV.getLineNumber()); - if (!GV.getLinkageName().empty()) - O << " ('" << GV.getLinkageName() << "')"; + O << "Global variable: " << GV->getName(); + printFile(O, GV->getFilename(), GV->getDirectory(), GV->getLine()); + if (!GV->getLinkageName().empty()) + O << " ('" << GV->getLinkageName() << "')"; O << '\n'; } - for (DIType T : Finder.types()) { + for (const MDType *T : Finder.types()) { O << "Type:"; - if (!T.getName().empty()) - O << ' ' << T.getName(); - printFile(O, T.getFilename(), T.getDirectory(), T.getLineNumber()); - if (T.isBasicType()) { - DIBasicType BT(T.get()); + if (!T->getName().empty()) + O << ' ' << T->getName(); + printFile(O, T->getFilename(), T->getDirectory(), T->getLine()); + if (auto *BT = dyn_cast<MDBasicType>(T)) { O << " "; if (const char *Encoding = - dwarf::AttributeEncodingString(BT.getEncoding())) + dwarf::AttributeEncodingString(BT->getEncoding())) O << Encoding; else - O << "unknown-encoding(" << BT.getEncoding() << ')'; + O << "unknown-encoding(" << BT->getEncoding() << ')'; } else { O << ' '; - if (const char *Tag = dwarf::TagString(T.getTag())) + if (const char *Tag = dwarf::TagString(T->getTag())) O << Tag; else - O << "unknown-tag(" << T.getTag() << ")"; + O << "unknown-tag(" << T->getTag() << ")"; } - if (T.isCompositeType()) { - DICompositeType CT(T.get()); - if (auto *S = CT.getIdentifier()) + if (auto *CT = dyn_cast<MDCompositeType>(T)) { + if (auto *S = CT->getRawIdentifier()) O << " (identifier: '" << S->getString() << "')"; } O << '\n'; diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index cd1e944..5e1cdd4 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -199,7 +199,7 @@ public: bool runOnRegion(Region *R, RGPassManager &RGM) override { Out << Banner; - for (const auto &BB : R->blocks()) { + for (const auto *BB : R->blocks()) { if (BB) BB->print(Out); else diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp index ad83113..d7f5109 100644 --- a/lib/Analysis/RegionPrinter.cpp +++ b/lib/Analysis/RegionPrinter.cpp @@ -123,7 +123,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> { const RegionInfo &RI = *static_cast<const RegionInfo*>(R.getRegionInfo()); - for (const auto &BB : R.blocks()) + for (auto *BB : R.blocks()) if (RI.getRegionFor(BB) == &R) O.indent(2 * (depth + 1)) << "Node" << static_cast<const void*>(RI.getTopLevelRegion()->getBBNode(BB)) diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 4e713fb..37377f0 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -5690,7 +5690,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { if (PTy->getElementType()->isStructTy()) C2 = ConstantExpr::getIntegerCast( C2, Type::getInt32Ty(C->getContext()), true); - C = ConstantExpr::getGetElementPtr(C, C2); + C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2); } else C = ConstantExpr::getAdd(C, C2); } @@ -6698,6 +6698,65 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, return true; } + struct ClearWalkingBEDominatingCondsOnExit { + ScalarEvolution &SE; + + explicit ClearWalkingBEDominatingCondsOnExit(ScalarEvolution &SE) + : SE(SE){}; + + ~ClearWalkingBEDominatingCondsOnExit() { + SE.WalkingBEDominatingConds = false; + } + }; + + // We don't want more than one activation of the following loop on the stack + // -- that can lead to O(n!) time complexity. + if (WalkingBEDominatingConds) + return false; + + WalkingBEDominatingConds = true; + ClearWalkingBEDominatingCondsOnExit ClearOnExit(*this); + + // If the loop is not reachable from the entry block, we risk running into an + // infinite loop as we walk up into the dom tree. These loops do not matter + // anyway, so we just return a conservative answer when we see them. + if (!DT->isReachableFromEntry(L->getHeader())) + return false; + + for (DomTreeNode *DTN = (*DT)[Latch], *HeaderDTN = (*DT)[L->getHeader()]; + DTN != HeaderDTN; + DTN = DTN->getIDom()) { + + assert(DTN && "should reach the loop header before reaching the root!"); + + BasicBlock *BB = DTN->getBlock(); + BasicBlock *PBB = BB->getSinglePredecessor(); + if (!PBB) + continue; + + BranchInst *ContinuePredicate = dyn_cast<BranchInst>(PBB->getTerminator()); + if (!ContinuePredicate || !ContinuePredicate->isConditional()) + continue; + + Value *Condition = ContinuePredicate->getCondition(); + + // If we have an edge `E` within the loop body that dominates the only + // latch, the condition guarding `E` also guards the backedge. This + // reasoning works only for loops with a single latch. + + BasicBlockEdge DominatingEdge(PBB, BB); + if (DominatingEdge.isSingleEdge()) { + // We're constructively (and conservatively) enumerating edges within the + // loop body that dominate the latch. The dominator tree better agree + // with us on this: + assert(DT->dominates(DominatingEdge, Latch) && "should be!"); + + if (isImpliedCond(Pred, LHS, RHS, Condition, + BB != ContinuePredicate->getSuccessor(0))) + return true; + } + } + return false; } @@ -7968,8 +8027,8 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) //===----------------------------------------------------------------------===// ScalarEvolution::ScalarEvolution() - : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), - BlockDispositions(64), FirstUnknown(nullptr) { + : FunctionPass(ID), WalkingBEDominatingConds(false), ValuesAtScopes(64), + LoopDispositions(64), BlockDispositions(64), FirstUnknown(nullptr) { initializeScalarEvolutionPass(*PassRegistry::getPassRegistry()); } @@ -8000,6 +8059,7 @@ void ScalarEvolution::releaseMemory() { } assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); + assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!"); BackedgeTakenCounts.clear(); ConstantEvolutionLoopExitValue.clear(); diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index a73ec9e..0bd427b 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -488,7 +489,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // Fold a GEP with constant operands. if (Constant *CLHS = dyn_cast<Constant>(V)) if (Constant *CRHS = dyn_cast<Constant>(Idx)) - return ConstantExpr::getGetElementPtr(CLHS, CRHS); + return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ty->getContext()), + CLHS, CRHS); // Do a quick scan to see if we have this GEP nearby. If so, reuse it. unsigned ScanLimit = 6; @@ -523,7 +525,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } // Emit a GEP. - Value *GEP = Builder.CreateGEP(V, Idx, "uglygep"); + Value *GEP = Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep"); rememberInstruction(GEP); return GEP; @@ -1803,6 +1805,72 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, return NumElim; } +bool SCEVExpander::isHighCostExpansionHelper( + const SCEV *S, Loop *L, SmallPtrSetImpl<const SCEV *> &Processed) { + if (!Processed.insert(S).second) + return false; + + if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) { + // If the divisor is a power of two and the SCEV type fits in a native + // integer, consider the divison cheap irrespective of whether it occurs in + // the user code since it can be lowered into a right shift. + if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS())) + if (SC->getValue()->getValue().isPowerOf2()) { + const DataLayout &DL = + L->getHeader()->getParent()->getParent()->getDataLayout(); + unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth(); + return DL.isIllegalInteger(Width); + } + + // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or + // HowManyLessThans produced to compute a precise expression, rather than a + // UDiv from the user's code. If we can't find a UDiv in the code with some + // simple searching, assume the former consider UDivExpr expensive to + // compute. + BasicBlock *ExitingBB = L->getExitingBlock(); + if (!ExitingBB) + return true; + + BranchInst *ExitingBI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); + if (!ExitingBI || !ExitingBI->isConditional()) + return true; + + ICmpInst *OrigCond = dyn_cast<ICmpInst>(ExitingBI->getCondition()); + if (!OrigCond) + return true; + + const SCEV *RHS = SE.getSCEV(OrigCond->getOperand(1)); + RHS = SE.getMinusSCEV(RHS, SE.getConstant(RHS->getType(), 1)); + if (RHS != S) { + const SCEV *LHS = SE.getSCEV(OrigCond->getOperand(0)); + LHS = SE.getMinusSCEV(LHS, SE.getConstant(LHS->getType(), 1)); + if (LHS != S) + return true; + } + } + + // Recurse past add expressions, which commonly occur in the + // BackedgeTakenCount. They may already exist in program code, and if not, + // they are not too expensive rematerialize. + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) { + if (isHighCostExpansionHelper(*I, L, Processed)) + return true; + } + return false; + } + + // HowManyLessThans uses a Max expression whenever the loop is not guarded by + // the exit condition. + if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S)) + return true; + + // If we haven't recognized an expensive SCEV pattern, assume it's an + // expression produced by program code. + return false; +} + namespace { // Search for a SCEV subexpression that is not safe to expand. Any expression // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp index 7e574d5..8b378a3 100644 --- a/lib/Analysis/TargetLibraryInfo.cpp +++ b/lib/Analysis/TargetLibraryInfo.cpp @@ -409,9 +409,7 @@ static StringRef sanitizeFunctionName(StringRef funcName) { // Check for \01 prefix that is used to mangle __asm declarations and // strip it if present. - if (funcName.front() == '\01') - funcName = funcName.substr(1); - return funcName; + return GlobalValue::getRealLinkageName(funcName); } bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index f51c7f54..a1519de 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -76,6 +76,10 @@ bool TargetTransformInfo::hasBranchDivergence() const { return TTIImpl->hasBranchDivergence(); } +bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const { + return TTIImpl->isSourceOfDivergence(V); +} + bool TargetTransformInfo::isLoweredToCall(const Function *F) const { return TTIImpl->isLoweredToCall(F); } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index f329e3a..3651301 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -694,10 +694,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // We're running this loop for once for each value queried resulting in a // runtime of ~O(#assumes * #values). - assert(isa<IntrinsicInst>(I) && - dyn_cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::assume && + assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && "must be an assume intrinsic"); - + Value *Arg = I->getArgOperand(0); if (Arg == V && isValidAssumeForContext(I, Q)) { @@ -2935,7 +2934,7 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { if (const LoadInst *LI = dyn_cast<LoadInst>(V)) return LI->getMetadata(LLVMContext::MD_nonnull); - if (ImmutableCallSite CS = V) + if (auto CS = ImmutableCallSite(V)) if (CS.isReturnNonNull()) return true; |