aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Analysis
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Analysis')
-rw-r--r--lib/Analysis/AliasAnalysis.cpp38
-rw-r--r--lib/Analysis/AliasAnalysisCounter.cpp2
-rw-r--r--lib/Analysis/AliasAnalysisEvaluator.cpp2
-rw-r--r--lib/Analysis/AliasSetTracker.cpp2
-rw-r--r--lib/Analysis/Analysis.cpp1
-rw-r--r--lib/Analysis/Android.mk1
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp4
-rw-r--r--lib/Analysis/BlockFrequencyInfo.cpp2
-rw-r--r--lib/Analysis/BlockFrequencyInfoImpl.cpp54
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp30
-rw-r--r--lib/Analysis/CFGPrinter.cpp4
-rw-r--r--lib/Analysis/CFLAliasAnalysis.cpp4
-rw-r--r--lib/Analysis/CMakeLists.txt1
-rw-r--r--lib/Analysis/ConstantFolding.cpp25
-rw-r--r--lib/Analysis/DivergenceAnalysis.cpp337
-rw-r--r--lib/Analysis/IPA/CallGraphSCCPass.cpp11
-rw-r--r--lib/Analysis/IPA/GlobalsModRef.cpp2
-rw-r--r--lib/Analysis/IPA/InlineCost.cpp37
-rw-r--r--lib/Analysis/InstructionSimplify.cpp24
-rw-r--r--lib/Analysis/LoopAccessAnalysis.cpp42
-rw-r--r--lib/Analysis/MemDepPrinter.cpp2
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp20
-rw-r--r--lib/Analysis/ModuleDebugInfoPrinter.cpp50
-rw-r--r--lib/Analysis/RegionPass.cpp2
-rw-r--r--lib/Analysis/RegionPrinter.cpp2
-rw-r--r--lib/Analysis/ScalarEvolution.cpp66
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp72
-rw-r--r--lib/Analysis/TargetLibraryInfo.cpp4
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp4
-rw-r--r--lib/Analysis/ValueTracking.cpp7
30 files changed, 688 insertions, 164 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 0b0fd50..43db176 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -82,6 +82,23 @@ void AliasAnalysis::addEscapingUse(Use &U) {
AA->addEscapingUse(U);
}
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
+ // We may have two calls
+ if (auto CS = ImmutableCallSite(I)) {
+ // Check if the two calls modify the same memory
+ return getModRefInfo(Call, CS);
+ } else {
+ // Otherwise, check if the call modifies or references the
+ // location this memory access defines. The best we can say
+ // is that if the call references what this instruction
+ // defines, it must be clobbered by this location.
+ const AliasAnalysis::Location DefLoc = AA->getLocation(I);
+ if (getModRefInfo(Call, DefLoc) != AliasAnalysis::NoModRef)
+ return AliasAnalysis::ModRef;
+ }
+ return AliasAnalysis::NoModRef;
+}
AliasAnalysis::ModRefResult
AliasAnalysis::getModRefInfo(ImmutableCallSite CS,
@@ -330,7 +347,7 @@ AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
// If the load address doesn't alias the given address, it doesn't read
// or write the specified memory.
- if (!alias(getLocation(L), Loc))
+ if (Loc.Ptr && !alias(getLocation(L), Loc))
return NoModRef;
// Otherwise, a load just reads.
@@ -343,15 +360,18 @@ AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) {
if (!S->isUnordered())
return ModRef;
- // If the store address cannot alias the pointer in question, then the
- // specified memory cannot be modified by the store.
- if (!alias(getLocation(S), Loc))
- return NoModRef;
+ if (Loc.Ptr) {
+ // If the store address cannot alias the pointer in question, then the
+ // specified memory cannot be modified by the store.
+ if (!alias(getLocation(S), Loc))
+ return NoModRef;
- // If the pointer is a pointer to constant memory, then it could not have been
- // modified by this store.
- if (pointsToConstantMemory(Loc))
- return NoModRef;
+ // If the pointer is a pointer to constant memory, then it could not have
+ // been modified by this store.
+ if (pointsToConstantMemory(Loc))
+ return NoModRef;
+
+ }
// Otherwise, a store just writes.
return Mod;
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index 5865259..a1bfba1 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -44,7 +44,7 @@ namespace {
errs() << " " << Val << " " << Desc << " responses ("
<< Val*100/Sum << "%)\n";
}
- ~AliasAnalysisCounter() {
+ ~AliasAnalysisCounter() override {
unsigned AASum = No+May+Partial+Must;
unsigned MRSum = NoMR+JustRef+JustMod+MR;
if (AASum + MRSum) { // Print a report if any counted queries occurred...
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index fe4bd4c..273eacc 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -158,7 +158,7 @@ bool AAEval::runOnFunction(Function &F) {
if (EvalAAMD && isa<StoreInst>(&*I))
Stores.insert(&*I);
Instruction &Inst = *I;
- if (CallSite CS = cast<Value>(&Inst)) {
+ if (auto CS = CallSite(&Inst)) {
Value *Callee = CS.getCalledValue();
// Skip actual functions for direct function calls.
if (!isa<Function>(Callee) && isInterestingPointer(Callee))
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 45442b0..4c79b9f 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -187,7 +187,7 @@ bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const {
return false;
for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
- CallSite C1 = getUnknownInst(i), C2 = Inst;
+ CallSite C1(getUnknownInst(i)), C2(Inst);
if (!C1 || !C2 ||
AA.getModRefInfo(C1, C2) != AliasAnalysis::NoModRef ||
AA.getModRefInfo(C2, C1) != AliasAnalysis::NoModRef)
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 4549c1e..842ff0a 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -37,6 +37,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeCFLAliasAnalysisPass(Registry);
initializeDependenceAnalysisPass(Registry);
initializeDelinearizationPass(Registry);
+ initializeDivergenceAnalysisPass(Registry);
initializeDominanceFrontierPass(Registry);
initializeDomViewerPass(Registry);
initializeDomPrinterPass(Registry);
diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk
index 277956c..94a1d2e 100644
--- a/lib/Analysis/Android.mk
+++ b/lib/Analysis/Android.mk
@@ -22,6 +22,7 @@ analysis_SRC_FILES := \
CostModel.cpp \
Delinearization.cpp \
DependenceAnalysis.cpp \
+ DivergenceAnalysis.cpp \
DomPrinter.cpp \
DominanceFrontier.cpp \
IVUsers.cpp \
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index be2282f..2767e41 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -932,14 +932,14 @@ aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V1Size,
// Also, check that they all index through arrays.
for (unsigned i = 1, e = GEP1->getNumIndices() - 1; i != e; ++i) {
if (!isa<ArrayType>(GetElementPtrInst::getIndexedType(
- GEP1->getPointerOperandType(), IntermediateIndices)))
+ GEP1->getSourceElementType(), IntermediateIndices)))
return AliasAnalysis::MayAlias;
IntermediateIndices.push_back(GEP1->getOperand(i + 1));
}
StructType *LastIndexedStruct =
dyn_cast<StructType>(GetElementPtrInst::getIndexedType(
- GEP1->getPointerOperandType(), IntermediateIndices));
+ GEP1->getSourceElementType(), IntermediateIndices));
if (!LastIndexedStruct)
return AliasAnalysis::MayAlias;
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index 37f2fae..3d819eb 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -85,7 +85,7 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {
std::string Result;
raw_string_ostream OS(Result);
- OS << Node->getName().str() << ":";
+ OS << Node->getName() << ":";
switch (ViewBlockFreqPropagationDAG) {
case GVDT_Fraction:
Graph->printBlockFreq(OS, Node);
diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 278073c..456cee1 100644
--- a/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -331,32 +331,35 @@ bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
return true;
}
-/// \brief Get the maximum allowed loop scale.
-///
-/// Gives the maximum number of estimated iterations allowed for a loop. Very
-/// large numbers cause problems downstream (even within 64-bits).
-static Scaled64 getMaxLoopScale() { return Scaled64(1, 12); }
-
/// \brief Compute the loop scale for a loop.
void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
// Compute loop scale.
DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
+ // Infinite loops need special handling. If we give the back edge an infinite
+ // mass, they may saturate all the other scales in the function down to 1,
+ // making all the other region temperatures look exactly the same. Choose an
+ // arbitrary scale to avoid these issues.
+ //
+ // FIXME: An alternate way would be to select a symbolic scale which is later
+ // replaced to be the maximum of all computed scales plus 1. This would
+ // appropriately describe the loop as having a large scale, without skewing
+ // the final frequency computation.
+ const Scaled64 InifiniteLoopScale(1, 12);
+
// LoopScale == 1 / ExitMass
// ExitMass == HeadMass - BackedgeMass
BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass;
- // Block scale stores the inverse of the scale.
- Loop.Scale = ExitMass.toScaled().inverse();
+ // Block scale stores the inverse of the scale. If this is an infinite loop,
+ // its exit mass will be zero. In this case, use an arbitrary scale for the
+ // loop scale.
+ Loop.Scale =
+ ExitMass.isEmpty() ? InifiniteLoopScale : ExitMass.toScaled().inverse();
DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
<< " - " << Loop.BackedgeMass << ")\n"
<< " - scale = " << Loop.Scale << "\n");
-
- if (Loop.Scale > getMaxLoopScale()) {
- Loop.Scale = getMaxLoopScale();
- DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n");
- }
}
/// \brief Package up a loop.
@@ -424,15 +427,24 @@ static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
const Scaled64 &Min, const Scaled64 &Max) {
// Scale the Factor to a size that creates integers. Ideally, integers would
// be scaled so that Max == UINT64_MAX so that they can be best
- // differentiated. However, the register allocator currently deals poorly
- // with large numbers. Instead, push Min up a little from 1 to give some
- // room to differentiate small, unequal numbers.
- //
- // TODO: fix issues downstream so that ScalingFactor can be
- // Scaled64(1,64)/Max.
- Scaled64 ScalingFactor = Min.inverse();
- if ((Max / Min).lg() < 60)
+ // differentiated. However, in the presence of large frequency values, small
+ // frequencies are scaled down to 1, making it impossible to differentiate
+ // small, unequal numbers. When the spread between Min and Max frequencies
+ // fits well within MaxBits, we make the scale be at least 8.
+ const unsigned MaxBits = 64;
+ const unsigned SpreadBits = (Max / Min).lg();
+ Scaled64 ScalingFactor;
+ if (SpreadBits <= MaxBits - 3) {
+ // If the values are small enough, make the scaling factor at least 8 to
+ // allow distinguishing small values.
+ ScalingFactor = Min.inverse();
ScalingFactor <<= 3;
+ } else {
+ // If the values need more than MaxBits to be represented, saturate small
+ // frequency values down to 1 by using a scaling factor that benefits large
+ // frequency values.
+ ScalingFactor = Scaled64(1, MaxBits) / Max;
+ }
// Translate the floats to integers.
DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 14800f4..8799a71 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -379,6 +379,14 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) {
if (!CV)
return false;
+ // If the LHS is the result of AND'ing a value with a single bit bitmask,
+ // we don't have information about probabilities.
+ if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0)))
+ if (LHS->getOpcode() == Instruction::And)
+ if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(LHS->getOperand(1)))
+ if (AndRHS->getUniqueInteger().isPowerOf2())
+ return false;
+
bool isProb;
if (CV->isZero()) {
switch (CI->getPredicate()) {
@@ -499,25 +507,23 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
// Walk the basic blocks in post-order so that we can build up state about
// the successors of a block iteratively.
- for (po_iterator<BasicBlock *> I = po_begin(&F.getEntryBlock()),
- E = po_end(&F.getEntryBlock());
- I != E; ++I) {
- DEBUG(dbgs() << "Computing probabilities for " << I->getName() << "\n");
- if (calcUnreachableHeuristics(*I))
+ for (auto BB : post_order(&F.getEntryBlock())) {
+ DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n");
+ if (calcUnreachableHeuristics(BB))
continue;
- if (calcMetadataWeights(*I))
+ if (calcMetadataWeights(BB))
continue;
- if (calcColdCallHeuristics(*I))
+ if (calcColdCallHeuristics(BB))
continue;
- if (calcLoopBranchHeuristics(*I))
+ if (calcLoopBranchHeuristics(BB))
continue;
- if (calcPointerHeuristics(*I))
+ if (calcPointerHeuristics(BB))
continue;
- if (calcZeroHeuristics(*I))
+ if (calcZeroHeuristics(BB))
continue;
- if (calcFloatingPointHeuristics(*I))
+ if (calcFloatingPointHeuristics(BB))
continue;
- calcInvokeHeuristics(*I);
+ calcInvokeHeuristics(BB);
}
PostDominatedByUnreachable.clear();
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 89787f82..c86f1f5 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -77,7 +77,7 @@ namespace {
}
bool runOnFunction(Function &F) override {
- std::string Filename = "cfg." + F.getName().str() + ".dot";
+ std::string Filename = ("cfg." + F.getName() + ".dot").str();
errs() << "Writing '" << Filename << "'...";
std::error_code EC;
@@ -111,7 +111,7 @@ namespace {
}
bool runOnFunction(Function &F) override {
- std::string Filename = "cfg." + F.getName().str() + ".dot";
+ std::string Filename = ("cfg." + F.getName() + ".dot").str();
errs() << "Writing '" << Filename << "'...";
std::error_code EC;
diff --git a/lib/Analysis/CFLAliasAnalysis.cpp b/lib/Analysis/CFLAliasAnalysis.cpp
index 53d748d..3147992 100644
--- a/lib/Analysis/CFLAliasAnalysis.cpp
+++ b/lib/Analysis/CFLAliasAnalysis.cpp
@@ -161,7 +161,7 @@ struct FunctionHandle : public CallbackVH {
assert(CFLAA != nullptr);
}
- virtual ~FunctionHandle() {}
+ ~FunctionHandle() override {}
void deleted() override { removeSelfFromCache(); }
void allUsesReplacedWith(Value *) override { removeSelfFromCache(); }
@@ -189,7 +189,7 @@ public:
initializeCFLAliasAnalysisPass(*PassRegistry::getPassRegistry());
}
- virtual ~CFLAliasAnalysis() {}
+ ~CFLAliasAnalysis() override {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AliasAnalysis::getAnalysisUsage(AU);
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index ae40321..1335a6d 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_library(LLVMAnalysis
ConstantFolding.cpp
Delinearization.cpp
DependenceAnalysis.cpp
+ DivergenceAnalysis.cpp
DomPrinter.cpp
DominanceFrontier.cpp
IVUsers.cpp
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 995465d..a85e813 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -671,8 +671,8 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
/// If array indices are not pointer-sized integers, explicitly cast them so
/// that they aren't implicitly casted by the getelementptr.
-static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy,
- const DataLayout &DL,
+static Constant *CastGEPIndices(Type *SrcTy, ArrayRef<Constant *> Ops,
+ Type *ResultTy, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
Type *IntPtrTy = DL.getIntPtrType(ResultTy);
@@ -681,8 +681,9 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy,
for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
if ((i == 1 ||
!isa<StructType>(GetElementPtrInst::getIndexedType(
- Ops[0]->getType(),
- Ops.slice(1, i - 1)))) &&
+ cast<PointerType>(Ops[0]->getType()->getScalarType())
+ ->getElementType(),
+ Ops.slice(1, i - 1)))) &&
Ops[i]->getType() != IntPtrTy) {
Any = true;
NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
@@ -697,7 +698,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy,
if (!Any)
return nullptr;
- Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
+ Constant *C = ConstantExpr::getGetElementPtr(SrcTy, Ops[0], NewIdxs);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
C = Folded;
@@ -723,7 +724,7 @@ static Constant* StripPtrCastKeepAS(Constant* Ptr) {
}
/// If we can symbolically evaluate the GEP constant expression, do so.
-static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
+static Constant *SymbolicallyEvaluateGEP(Type *SrcTy, ArrayRef<Constant *> Ops,
Type *ResultTy, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
Constant *Ptr = Ops[0];
@@ -865,7 +866,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
return nullptr;
// Create a GEP.
- Constant *C = ConstantExpr::getGetElementPtr(Ptr, NewIdxs);
+ Constant *C = ConstantExpr::getGetElementPtr(SrcTy, Ptr, NewIdxs);
assert(C->getType()->getPointerElementType() == Ty &&
"Computed GetElementPtr has unexpected type!");
@@ -1085,13 +1086,15 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
case Instruction::ShuffleVector:
return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
- case Instruction::GetElementPtr:
- if (Constant *C = CastGEPIndices(Ops, DestTy, DL, TLI))
+ case Instruction::GetElementPtr: {
+ Type *SrcTy = nullptr;
+ if (Constant *C = CastGEPIndices(SrcTy, Ops, DestTy, DL, TLI))
return C;
- if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, DL, TLI))
+ if (Constant *C = SymbolicallyEvaluateGEP(SrcTy, Ops, DestTy, DL, TLI))
return C;
- return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1));
+ return ConstantExpr::getGetElementPtr(SrcTy, Ops[0], Ops.slice(1));
+ }
}
}
diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp
new file mode 100644
index 0000000..e5ee295
--- /dev/null
+++ b/lib/Analysis/DivergenceAnalysis.cpp
@@ -0,0 +1,337 @@
+//===- DivergenceAnalysis.cpp ------ Divergence Analysis ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines divergence analysis which determines whether a branch in a
+// GPU program is divergent. It can help branch optimizations such as jump
+// threading and loop unswitching to make better decisions.
+//
+// GPU programs typically use the SIMD execution model, where multiple threads
+// in the same execution group have to execute in lock-step. Therefore, if the
+// code contains divergent branches (i.e., threads in a group do not agree on
+// which path of the branch to take), the group of threads has to execute all
+// the paths from that branch with different subsets of threads enabled until
+// they converge at the immediately post-dominating BB of the paths.
+//
+// Due to this execution model, some optimizations such as jump
+// threading and loop unswitching can be unfortunately harmful when performed on
+// divergent branches. Therefore, an analysis that computes which branches in a
+// GPU program are divergent can help the compiler to selectively run these
+// optimizations.
+//
+// This file defines divergence analysis which computes a conservative but
+// non-trivial approximation of all divergent branches in a GPU program. It
+// partially implements the approach described in
+//
+// Divergence Analysis
+// Sampaio, Souza, Collange, Pereira
+// TOPLAS '13
+//
+// The divergence analysis identifies the sources of divergence (e.g., special
+// variables that hold the thread ID), and recursively marks variables that are
+// data or sync dependent on a source of divergence as divergent.
+//
+// While data dependency is a well-known concept, the notion of sync dependency
+// is worth more explanation. Sync dependence characterizes the control flow
+// aspect of the propagation of branch divergence. For example,
+//
+// %cond = icmp slt i32 %tid, 10
+// br i1 %cond, label %then, label %else
+// then:
+// br label %merge
+// else:
+// br label %merge
+// merge:
+// %a = phi i32 [ 0, %then ], [ 1, %else ]
+//
+// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
+// because %tid is not on its use-def chains, %a is sync dependent on %tid
+// because the branch "br i1 %cond" depends on %tid and affects which value %a
+// is assigned to.
+//
+// The current implementation has the following limitations:
+// 1. intra-procedural. It conservatively considers the arguments of a
+// non-kernel-entry function and the return value of a function call as
+// divergent.
+// 2. memory as black box. It conservatively considers values loaded from
+// generic or local address as divergent. This can be improved by leveraging
+// pointer analysis.
+//===----------------------------------------------------------------------===//
+
+#include <vector>
+#include "llvm/IR/Dominators.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "divergence"
+
+namespace {
+class DivergenceAnalysis : public FunctionPass {
+public:
+ static char ID;
+
+ DivergenceAnalysis() : FunctionPass(ID) {
+ initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTree>();
+ AU.setPreservesAll();
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ // Print all divergent branches in the function.
+ void print(raw_ostream &OS, const Module *) const override;
+
+ // Returns true if V is divergent.
+ bool isDivergent(const Value *V) const { return DivergentValues.count(V); }
+ // Returns true if V is uniform/non-divergent.
+ bool isUniform(const Value *V) const { return !isDivergent(V); }
+
+private:
+ // Stores all divergent values.
+ DenseSet<const Value *> DivergentValues;
+};
+} // End of anonymous namespace
+
+// Register this pass.
+char DivergenceAnalysis::ID = 0;
+INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis",
+ false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis",
+ false, true)
+
+namespace {
+
+class DivergencePropagator {
+public:
+ DivergencePropagator(Function &F, TargetTransformInfo &TTI,
+ DominatorTree &DT, PostDominatorTree &PDT,
+ DenseSet<const Value *> &DV)
+ : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
+ void populateWithSourcesOfDivergence();
+ void propagate();
+
+private:
+ // A helper function that explores data dependents of V.
+ void exploreDataDependency(Value *V);
+ // A helper function that explores sync dependents of TI.
+ void exploreSyncDependency(TerminatorInst *TI);
+ // Computes the influence region from Start to End. This region includes all
+ // basic blocks on any path from Start to End.
+ void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,
+ DenseSet<BasicBlock *> &InfluenceRegion);
+ // Finds all users of I that are outside the influence region, and add these
+ // users to Worklist.
+ void findUsersOutsideInfluenceRegion(
+ Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion);
+
+ Function &F;
+ TargetTransformInfo &TTI;
+ DominatorTree &DT;
+ PostDominatorTree &PDT;
+ std::vector<Value *> Worklist; // Stack for DFS.
+ DenseSet<const Value *> &DV; // Stores all divergent values.
+};
+
+void DivergencePropagator::populateWithSourcesOfDivergence() {
+ Worklist.clear();
+ DV.clear();
+ for (auto &I : inst_range(F)) {
+ if (TTI.isSourceOfDivergence(&I)) {
+ Worklist.push_back(&I);
+ DV.insert(&I);
+ }
+ }
+ for (auto &Arg : F.args()) {
+ if (TTI.isSourceOfDivergence(&Arg)) {
+ Worklist.push_back(&Arg);
+ DV.insert(&Arg);
+ }
+ }
+}
+
+void DivergencePropagator::exploreSyncDependency(TerminatorInst *TI) {
+ // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's
+ // immediate post dominator are divergent. This rule handles if-then-else
+ // patterns. For example,
+ //
+ // if (tid < 5)
+ // a1 = 1;
+ // else
+ // a2 = 2;
+ // a = phi(a1, a2); // sync dependent on (tid < 5)
+ BasicBlock *ThisBB = TI->getParent();
+ BasicBlock *IPostDom = PDT.getNode(ThisBB)->getIDom()->getBlock();
+ if (IPostDom == nullptr)
+ return;
+
+ for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
+ // A PHINode is uniform if it returns the same value no matter which path is
+ // taken.
+ if (!cast<PHINode>(I)->hasConstantValue() && DV.insert(I).second)
+ Worklist.push_back(I);
+ }
+
+ // Propagation rule 2: if a value defined in a loop is used outside, the user
+ // is sync dependent on the condition of the loop exits that dominate the
+ // user. For example,
+ //
+ // int i = 0;
+ // do {
+ // i++;
+ // if (foo(i)) ... // uniform
+ // } while (i < tid);
+ // if (bar(i)) ... // divergent
+ //
+ // A program may contain unstructured loops. Therefore, we cannot leverage
+ // LoopInfo, which only recognizes natural loops.
+ //
+ // The algorithm used here handles both natural and unstructured loops. Given
+ // a branch TI, we first compute its influence region, the union of all simple
+ // paths from TI to its immediate post dominator (IPostDom). Then, we search
+ // for all the values defined in the influence region but used outside. All
+ // these users are sync dependent on TI.
+ DenseSet<BasicBlock *> InfluenceRegion;
+ computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
+ // An insight that can speed up the search process is that all the in-region
+ // values that are used outside must dominate TI. Therefore, instead of
+ // searching every basic blocks in the influence region, we search all the
+ // dominators of TI until it is outside the influence region.
+ BasicBlock *InfluencedBB = ThisBB;
+ while (InfluenceRegion.count(InfluencedBB)) {
+ for (auto &I : *InfluencedBB)
+ findUsersOutsideInfluenceRegion(I, InfluenceRegion);
+ DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
+ if (IDomNode == nullptr)
+ break;
+ InfluencedBB = IDomNode->getBlock();
+ }
+}
+
+void DivergencePropagator::findUsersOutsideInfluenceRegion(
+ Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
+ for (User *U : I.users()) {
+ Instruction *UserInst = cast<Instruction>(U);
+ if (!InfluenceRegion.count(UserInst->getParent())) {
+ if (DV.insert(UserInst).second)
+ Worklist.push_back(UserInst);
+ }
+ }
+}
+
+void DivergencePropagator::computeInfluenceRegion(
+ BasicBlock *Start, BasicBlock *End,
+ DenseSet<BasicBlock *> &InfluenceRegion) {
+ assert(PDT.properlyDominates(End, Start) &&
+ "End does not properly dominate Start");
+ std::vector<BasicBlock *> InfluenceStack;
+ InfluenceStack.push_back(Start);
+ InfluenceRegion.insert(Start);
+ while (!InfluenceStack.empty()) {
+ BasicBlock *BB = InfluenceStack.back();
+ InfluenceStack.pop_back();
+ for (BasicBlock *Succ : successors(BB)) {
+ if (End != Succ && InfluenceRegion.insert(Succ).second)
+ InfluenceStack.push_back(Succ);
+ }
+ }
+}
+
+void DivergencePropagator::exploreDataDependency(Value *V) {
+ // Follow def-use chains of V.
+ for (User *U : V->users()) {
+ Instruction *UserInst = cast<Instruction>(U);
+ if (DV.insert(UserInst).second)
+ Worklist.push_back(UserInst);
+ }
+}
+
+void DivergencePropagator::propagate() {
+ // Traverse the dependency graph using DFS.
+ while (!Worklist.empty()) {
+ Value *V = Worklist.back();
+ Worklist.pop_back();
+ if (TerminatorInst *TI = dyn_cast<TerminatorInst>(V)) {
+ // Terminators with less than two successors won't introduce sync
+ // dependency. Ignore them.
+ if (TI->getNumSuccessors() > 1)
+ exploreSyncDependency(TI);
+ }
+ exploreDataDependency(V);
+ }
+}
+
+} /// end namespace anonymous
+
+FunctionPass *llvm::createDivergenceAnalysisPass() {
+ return new DivergenceAnalysis();
+}
+
+bool DivergenceAnalysis::runOnFunction(Function &F) {
+ auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
+ if (TTIWP == nullptr)
+ return false;
+
+ TargetTransformInfo &TTI = TTIWP->getTTI(F);
+ // Fast path: if the target does not have branch divergence, we do not mark
+ // any branch as divergent.
+ if (!TTI.hasBranchDivergence())
+ return false;
+
+ DivergentValues.clear();
+ DivergencePropagator DP(F, TTI,
+ getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ getAnalysis<PostDominatorTree>(), DivergentValues);
+ DP.populateWithSourcesOfDivergence();
+ DP.propagate();
+ return false;
+}
+
+void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
+ if (DivergentValues.empty())
+ return;
+ const Value *FirstDivergentValue = *DivergentValues.begin();
+ const Function *F;
+ if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) {
+ F = Arg->getParent();
+ } else if (const Instruction *I =
+ dyn_cast<Instruction>(FirstDivergentValue)) {
+ F = I->getParent()->getParent();
+ } else {
+ llvm_unreachable("Only arguments and instructions can be divergent");
+ }
+
+ // Dumps all divergent values in F, arguments and then instructions.
+ for (auto &Arg : F->args()) {
+ if (DivergentValues.count(&Arg))
+ OS << "DIVERGENT: " << Arg << "\n";
+ }
+ // Iterate instructions using inst_range to ensure a deterministic order.
+ for (auto &I : inst_range(F)) {
+ if (DivergentValues.count(&I))
+ OS << "DIVERGENT:" << I << "\n";
+ }
+}
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index 9d607cc..65ba1c7 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -212,10 +212,13 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
// list of the same call.
CallSites.count(I->first) ||
- // If the call edge is not from a call or invoke, then the function
- // pass RAUW'd a call with another value. This can happen when
- // constant folding happens of well known functions etc.
- !CallSite(I->first)) {
+ // If the call edge is not from a call or invoke, or it is a
+ // instrinsic call, then the function pass RAUW'd a call with
+ // another value. This can happen when constant folding happens
+ // of well known functions etc.
+ !CallSite(I->first) ||
+ (CallSite(I->first).getCalledFunction() &&
+ CallSite(I->first).getCalledFunction()->isIntrinsic())) {
assert(!CheckingMode &&
"CallGraphSCCPass did not update the CallGraph correctly!");
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index 2208f32..018ae99 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -269,7 +269,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
} else if (Operator::getOpcode(I) == Instruction::BitCast) {
if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest))
return true;
- } else if (CallSite CS = I) {
+ } else if (auto CS = CallSite(I)) {
// Make sure that this is just the function being called, not that it is
// passing into the function.
if (!CS.isCallee(&U)) {
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index eeb3b87..cacf70d 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -64,6 +64,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool ContainsNoDuplicateCall;
bool HasReturn;
bool HasIndirectBr;
+ bool HasFrameEscape;
/// Number of bytes allocated statically by the callee.
uint64_t AllocatedSize;
@@ -148,12 +149,12 @@ public:
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
- AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
- FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
- NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
- NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
- NumInstructionsSimplified(0), SROACostSavings(0),
- SROACostSavingsLost(0) {}
+ HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
+ NumVectorInstructions(0), FiftyPercentVectorBonus(0),
+ TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
+ NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
+ NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
+ SROACostSavings(0), SROACostSavingsLost(0) {}
bool analyzeCall(CallSite CS);
@@ -743,6 +744,9 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
case Intrinsic::memmove:
// SROA can usually chew through these intrinsics, but they aren't free.
return false;
+ case Intrinsic::frameescape:
+ HasFrameEscape = true;
+ return false;
}
}
@@ -941,7 +945,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
// If the visit this instruction detected an uninlinable pattern, abort.
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
- HasIndirectBr)
+ HasIndirectBr || HasFrameEscape)
return false;
// If the caller is a recursive function then we don't want to inline
@@ -1171,7 +1175,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// returns false, and we can bail on out.
if (!analyzeBlock(BB, EphValues)) {
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
- HasIndirectBr)
+ HasIndirectBr || HasFrameEscape)
return false;
// If the caller is a recursive function then we don't want to inline
@@ -1286,16 +1290,18 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
/// \brief Test that two functions either have or have not the given attribute
/// at the same time.
-static bool attributeMatches(Function *F1, Function *F2,
- Attribute::AttrKind Attr) {
- return F1->hasFnAttribute(Attr) == F2->hasFnAttribute(Attr);
+template<typename AttrKind>
+static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {
+ return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr);
}
/// \brief Test that there are no attribute conflicts between Caller and Callee
/// that prevent inlining.
static bool functionsHaveCompatibleAttributes(Function *Caller,
Function *Callee) {
- return attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
+ return attributeMatches(Caller, Callee, "target-cpu") &&
+ attributeMatches(Caller, Callee, "target-features") &&
+ attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
attributeMatches(Caller, Callee, Attribute::SanitizeMemory) &&
attributeMatches(Caller, Callee, Attribute::SanitizeThread);
}
@@ -1370,6 +1376,13 @@ bool InlineCostAnalysis::isInlineViable(Function &F) {
if (!ReturnsTwice && CS.isCall() &&
cast<CallInst>(CS.getInstruction())->canReturnTwice())
return false;
+
+ // Disallow inlining functions that call @llvm.frameescape. Doing this
+ // correctly would require major changes to the inliner.
+ if (CS.getCalledFunction() &&
+ CS.getCalledFunction()->getIntrinsicID() ==
+ llvm::Intrinsic::frameescape)
+ return false;
}
}
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 99c477d..d45f7bd 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -2978,10 +2978,12 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// what constant folding can make out of it.
Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType());
SmallVector<Value *, 4> IndicesLHS(GLHS->idx_begin(), GLHS->idx_end());
- Constant *NewLHS = ConstantExpr::getGetElementPtr(Null, IndicesLHS);
+ Constant *NewLHS = ConstantExpr::getGetElementPtr(
+ GLHS->getSourceElementType(), Null, IndicesLHS);
SmallVector<Value *, 4> IndicesRHS(GRHS->idx_begin(), GRHS->idx_end());
- Constant *NewRHS = ConstantExpr::getGetElementPtr(Null, IndicesRHS);
+ Constant *NewRHS = ConstantExpr::getGetElementPtr(
+ GLHS->getSourceElementType(), Null, IndicesRHS);
return ConstantExpr::getICmp(Pred, NewLHS, NewRHS);
}
}
@@ -3241,17 +3243,18 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
+static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
+ const Query &Q, unsigned) {
// The type of the GEP pointer operand.
- PointerType *PtrTy = cast<PointerType>(Ops[0]->getType()->getScalarType());
- unsigned AS = PtrTy->getAddressSpace();
+ unsigned AS =
+ cast<PointerType>(Ops[0]->getType()->getScalarType())->getAddressSpace();
// getelementptr P -> P.
if (Ops.size() == 1)
return Ops[0];
// Compute the (pointer) type returned by the GEP instruction.
- Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1));
+ Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Ops.slice(1));
Type *GEPTy = PointerType::get(LastType, AS);
if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType()))
GEPTy = VectorType::get(GEPTy, VT->getNumElements());
@@ -3264,7 +3267,7 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
if (match(Ops[1], m_Zero()))
return Ops[0];
- Type *Ty = PtrTy->getElementType();
+ Type *Ty = SrcTy;
if (Ty->isSized()) {
Value *P;
uint64_t C;
@@ -3318,14 +3321,17 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
if (!isa<Constant>(Ops[i]))
return nullptr;
- return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1));
+ return ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ops[0]),
+ Ops.slice(1));
}
Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyGEPInst(Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
+ return ::SimplifyGEPInst(
+ cast<PointerType>(Ops[0]->getType()->getScalarType())->getElementType(),
+ Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
}
/// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index 1818e93..724c21f 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -177,6 +177,17 @@ void LoopAccessInfo::RuntimePointerCheck::print(
}
}
+bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking(
+ const SmallVectorImpl<int> *PtrPartition) const {
+ unsigned NumPointers = Pointers.size();
+
+ for (unsigned I = 0; I < NumPointers; ++I)
+ for (unsigned J = I + 1; J < NumPointers; ++J)
+ if (needsChecking(I, J, PtrPartition))
+ return true;
+ return false;
+}
+
namespace {
/// \brief Analyses memory accesses in a loop.
///
@@ -1033,16 +1044,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
StoreInst *ST = cast<StoreInst>(*I);
Value* Ptr = ST->getPointerOperand();
-
- if (isUniform(Ptr)) {
- emitAnalysis(
- LoopAccessReport(ST)
- << "write to a loop invariant address could not be vectorized");
- DEBUG(dbgs() << "LAA: We don't allow storing to uniform addresses\n");
- CanVecMem = false;
- return;
- }
-
+ // Check for store to loop invariant address.
+ StoreToLoopInvariantAddress |= isUniform(Ptr);
// If we did *not* see this pointer before, insert it to the read-write
// list. At this phase it is only a 'write' list.
if (Seen.insert(Ptr).second) {
@@ -1211,9 +1214,8 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const {
- Instruction *tnullptr = nullptr;
if (!PtrRtCheck.Need)
- return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
+ return std::make_pair(nullptr, nullptr);
unsigned NumPointers = PtrRtCheck.Pointers.size();
SmallVector<TrackingVH<Value> , 2> Starts;
@@ -1284,6 +1286,9 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
}
}
+ if (!MemoryRuntimeCheck)
+ return std::make_pair(nullptr, nullptr);
+
// We have to do this trickery because the IRBuilder might fold the check to a
// constant expression in which case there is no Instruction anchored in a
// the block.
@@ -1301,19 +1306,24 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const ValueToValueMap &Strides)
: DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL),
TLI(TLI), AA(AA), DT(DT), NumLoads(0), NumStores(0),
- MaxSafeDepDistBytes(-1U), CanVecMem(false) {
+ MaxSafeDepDistBytes(-1U), CanVecMem(false),
+ StoreToLoopInvariantAddress(false) {
if (canAnalyzeLoop())
analyzeLoop(Strides);
}
void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
if (CanVecMem) {
- if (PtrRtCheck.empty())
- OS.indent(Depth) << "Memory dependences are safe\n";
- else
+ if (PtrRtCheck.Need)
OS.indent(Depth) << "Memory dependences are safe with run-time checks\n";
+ else
+ OS.indent(Depth) << "Memory dependences are safe\n";
}
+ OS.indent(Depth) << "Store to invariant address was "
+ << (StoreToLoopInvariantAddress ? "" : "not ")
+ << "found in loop.\n";
+
if (Report)
OS.indent(Depth) << "Report: " << Report->str() << "\n";
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index e1b7b4b..da3b829 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -106,7 +106,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
if (!Res.isNonLocal()) {
Deps[Inst].insert(std::make_pair(getInstTypePair(Res),
static_cast<BasicBlock *>(nullptr)));
- } else if (CallSite CS = cast<Value>(Inst)) {
+ } else if (auto CS = CallSite(Inst)) {
const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI =
MDA.getNonLocalCallDependency(CS);
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 716e3e6..84769cb 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -223,7 +223,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
continue;
}
- if (CallSite InstCS = cast<Value>(Inst)) {
+ if (auto InstCS = CallSite(Inst)) {
// Debug intrinsics don't cause dependences.
if (isa<DbgInfoIntrinsic>(Inst)) continue;
// If these two calls do not interfere, look past it.
@@ -874,23 +874,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
void MemoryDependenceAnalysis::
getNonLocalPointerDependency(Instruction *QueryInst,
SmallVectorImpl<NonLocalDepResult> &Result) {
-
- auto getLocation = [](AliasAnalysis *AA, Instruction *Inst) {
- if (auto *I = dyn_cast<LoadInst>(Inst))
- return AA->getLocation(I);
- else if (auto *I = dyn_cast<StoreInst>(Inst))
- return AA->getLocation(I);
- else if (auto *I = dyn_cast<VAArgInst>(Inst))
- return AA->getLocation(I);
- else if (auto *I = dyn_cast<AtomicCmpXchgInst>(Inst))
- return AA->getLocation(I);
- else if (auto *I = dyn_cast<AtomicRMWInst>(Inst))
- return AA->getLocation(I);
- else
- llvm_unreachable("unsupported memory instruction");
- };
-
- const AliasAnalysis::Location Loc = getLocation(AA, QueryInst);
+ const AliasAnalysis::Location Loc = AA->getLocation(QueryInst);
bool isLoad = isa<LoadInst>(QueryInst);
BasicBlock *FromBB = QueryInst->getParent();
assert(FromBB);
diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp
index cbc4700..f2a11cb 100644
--- a/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -72,55 +72,53 @@ void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
// Printing the nodes directly isn't particularly helpful (since they
// reference other nodes that won't be printed, particularly for the
// filenames), so just print a few useful things.
- for (DICompileUnit CU : Finder.compile_units()) {
+ for (MDCompileUnit *CU : Finder.compile_units()) {
O << "Compile unit: ";
- if (const char *Lang = LanguageString(CU.getLanguage()))
+ if (const char *Lang = dwarf::LanguageString(CU->getSourceLanguage()))
O << Lang;
else
- O << "unknown-language(" << CU.getLanguage() << ")";
- printFile(O, CU.getFilename(), CU.getDirectory());
+ O << "unknown-language(" << CU->getSourceLanguage() << ")";
+ printFile(O, CU->getFilename(), CU->getDirectory());
O << '\n';
}
- for (DISubprogram S : Finder.subprograms()) {
- O << "Subprogram: " << S.getName();
- printFile(O, S.getFilename(), S.getDirectory(), S.getLineNumber());
- if (!S.getLinkageName().empty())
- O << " ('" << S.getLinkageName() << "')";
+ for (MDSubprogram *S : Finder.subprograms()) {
+ O << "Subprogram: " << S->getName();
+ printFile(O, S->getFilename(), S->getDirectory(), S->getLine());
+ if (!S->getLinkageName().empty())
+ O << " ('" << S->getLinkageName() << "')";
O << '\n';
}
for (DIGlobalVariable GV : Finder.global_variables()) {
- O << "Global variable: " << GV.getName();
- printFile(O, GV.getFilename(), GV.getDirectory(), GV.getLineNumber());
- if (!GV.getLinkageName().empty())
- O << " ('" << GV.getLinkageName() << "')";
+ O << "Global variable: " << GV->getName();
+ printFile(O, GV->getFilename(), GV->getDirectory(), GV->getLine());
+ if (!GV->getLinkageName().empty())
+ O << " ('" << GV->getLinkageName() << "')";
O << '\n';
}
- for (DIType T : Finder.types()) {
+ for (const MDType *T : Finder.types()) {
O << "Type:";
- if (!T.getName().empty())
- O << ' ' << T.getName();
- printFile(O, T.getFilename(), T.getDirectory(), T.getLineNumber());
- if (T.isBasicType()) {
- DIBasicType BT(T.get());
+ if (!T->getName().empty())
+ O << ' ' << T->getName();
+ printFile(O, T->getFilename(), T->getDirectory(), T->getLine());
+ if (auto *BT = dyn_cast<MDBasicType>(T)) {
O << " ";
if (const char *Encoding =
- dwarf::AttributeEncodingString(BT.getEncoding()))
+ dwarf::AttributeEncodingString(BT->getEncoding()))
O << Encoding;
else
- O << "unknown-encoding(" << BT.getEncoding() << ')';
+ O << "unknown-encoding(" << BT->getEncoding() << ')';
} else {
O << ' ';
- if (const char *Tag = dwarf::TagString(T.getTag()))
+ if (const char *Tag = dwarf::TagString(T->getTag()))
O << Tag;
else
- O << "unknown-tag(" << T.getTag() << ")";
+ O << "unknown-tag(" << T->getTag() << ")";
}
- if (T.isCompositeType()) {
- DICompositeType CT(T.get());
- if (auto *S = CT.getIdentifier())
+ if (auto *CT = dyn_cast<MDCompositeType>(T)) {
+ if (auto *S = CT->getRawIdentifier())
O << " (identifier: '" << S->getString() << "')";
}
O << '\n';
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index cd1e944..5e1cdd4 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -199,7 +199,7 @@ public:
bool runOnRegion(Region *R, RGPassManager &RGM) override {
Out << Banner;
- for (const auto &BB : R->blocks()) {
+ for (const auto *BB : R->blocks()) {
if (BB)
BB->print(Out);
else
diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp
index ad83113..d7f5109 100644
--- a/lib/Analysis/RegionPrinter.cpp
+++ b/lib/Analysis/RegionPrinter.cpp
@@ -123,7 +123,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
const RegionInfo &RI = *static_cast<const RegionInfo*>(R.getRegionInfo());
- for (const auto &BB : R.blocks())
+ for (auto *BB : R.blocks())
if (RI.getRegionFor(BB) == &R)
O.indent(2 * (depth + 1)) << "Node"
<< static_cast<const void*>(RI.getTopLevelRegion()->getBBNode(BB))
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 4e713fb..37377f0 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -5690,7 +5690,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
if (PTy->getElementType()->isStructTy())
C2 = ConstantExpr::getIntegerCast(
C2, Type::getInt32Ty(C->getContext()), true);
- C = ConstantExpr::getGetElementPtr(C, C2);
+ C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2);
} else
C = ConstantExpr::getAdd(C, C2);
}
@@ -6698,6 +6698,65 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
return true;
}
+ struct ClearWalkingBEDominatingCondsOnExit {
+ ScalarEvolution &SE;
+
+ explicit ClearWalkingBEDominatingCondsOnExit(ScalarEvolution &SE)
+ : SE(SE){};
+
+ ~ClearWalkingBEDominatingCondsOnExit() {
+ SE.WalkingBEDominatingConds = false;
+ }
+ };
+
+ // We don't want more than one activation of the following loop on the stack
+ // -- that can lead to O(n!) time complexity.
+ if (WalkingBEDominatingConds)
+ return false;
+
+ WalkingBEDominatingConds = true;
+ ClearWalkingBEDominatingCondsOnExit ClearOnExit(*this);
+
+ // If the loop is not reachable from the entry block, we risk running into an
+ // infinite loop as we walk up into the dom tree. These loops do not matter
+ // anyway, so we just return a conservative answer when we see them.
+ if (!DT->isReachableFromEntry(L->getHeader()))
+ return false;
+
+ for (DomTreeNode *DTN = (*DT)[Latch], *HeaderDTN = (*DT)[L->getHeader()];
+ DTN != HeaderDTN;
+ DTN = DTN->getIDom()) {
+
+ assert(DTN && "should reach the loop header before reaching the root!");
+
+ BasicBlock *BB = DTN->getBlock();
+ BasicBlock *PBB = BB->getSinglePredecessor();
+ if (!PBB)
+ continue;
+
+ BranchInst *ContinuePredicate = dyn_cast<BranchInst>(PBB->getTerminator());
+ if (!ContinuePredicate || !ContinuePredicate->isConditional())
+ continue;
+
+ Value *Condition = ContinuePredicate->getCondition();
+
+ // If we have an edge `E` within the loop body that dominates the only
+ // latch, the condition guarding `E` also guards the backedge. This
+ // reasoning works only for loops with a single latch.
+
+ BasicBlockEdge DominatingEdge(PBB, BB);
+ if (DominatingEdge.isSingleEdge()) {
+ // We're constructively (and conservatively) enumerating edges within the
+ // loop body that dominate the latch. The dominator tree better agree
+ // with us on this:
+ assert(DT->dominates(DominatingEdge, Latch) && "should be!");
+
+ if (isImpliedCond(Pred, LHS, RHS, Condition,
+ BB != ContinuePredicate->getSuccessor(0)))
+ return true;
+ }
+ }
+
return false;
}
@@ -7968,8 +8027,8 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
//===----------------------------------------------------------------------===//
ScalarEvolution::ScalarEvolution()
- : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64),
- BlockDispositions(64), FirstUnknown(nullptr) {
+ : FunctionPass(ID), WalkingBEDominatingConds(false), ValuesAtScopes(64),
+ LoopDispositions(64), BlockDispositions(64), FirstUnknown(nullptr) {
initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
}
@@ -8000,6 +8059,7 @@ void ScalarEvolution::releaseMemory() {
}
assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
+ assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
BackedgeTakenCounts.clear();
ConstantEvolutionLoopExitValue.clear();
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index a73ec9e..0bd427b 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -488,7 +489,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
// Fold a GEP with constant operands.
if (Constant *CLHS = dyn_cast<Constant>(V))
if (Constant *CRHS = dyn_cast<Constant>(Idx))
- return ConstantExpr::getGetElementPtr(CLHS, CRHS);
+ return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ty->getContext()),
+ CLHS, CRHS);
// Do a quick scan to see if we have this GEP nearby. If so, reuse it.
unsigned ScanLimit = 6;
@@ -523,7 +525,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
}
// Emit a GEP.
- Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
+ Value *GEP = Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep");
rememberInstruction(GEP);
return GEP;
@@ -1803,6 +1805,72 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
return NumElim;
}
+bool SCEVExpander::isHighCostExpansionHelper(
+ const SCEV *S, Loop *L, SmallPtrSetImpl<const SCEV *> &Processed) {
+ if (!Processed.insert(S).second)
+ return false;
+
+ if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) {
+ // If the divisor is a power of two and the SCEV type fits in a native
+ // integer, consider the divison cheap irrespective of whether it occurs in
+ // the user code since it can be lowered into a right shift.
+ if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS()))
+ if (SC->getValue()->getValue().isPowerOf2()) {
+ const DataLayout &DL =
+ L->getHeader()->getParent()->getParent()->getDataLayout();
+ unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth();
+ return DL.isIllegalInteger(Width);
+ }
+
+ // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
+ // HowManyLessThans produced to compute a precise expression, rather than a
+ // UDiv from the user's code. If we can't find a UDiv in the code with some
+ // simple searching, assume the former consider UDivExpr expensive to
+ // compute.
+ BasicBlock *ExitingBB = L->getExitingBlock();
+ if (!ExitingBB)
+ return true;
+
+ BranchInst *ExitingBI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!ExitingBI || !ExitingBI->isConditional())
+ return true;
+
+ ICmpInst *OrigCond = dyn_cast<ICmpInst>(ExitingBI->getCondition());
+ if (!OrigCond)
+ return true;
+
+ const SCEV *RHS = SE.getSCEV(OrigCond->getOperand(1));
+ RHS = SE.getMinusSCEV(RHS, SE.getConstant(RHS->getType(), 1));
+ if (RHS != S) {
+ const SCEV *LHS = SE.getSCEV(OrigCond->getOperand(0));
+ LHS = SE.getMinusSCEV(LHS, SE.getConstant(LHS->getType(), 1));
+ if (LHS != S)
+ return true;
+ }
+ }
+
+ // Recurse past add expressions, which commonly occur in the
+ // BackedgeTakenCount. They may already exist in program code, and if not,
+ // they are not too expensive rematerialize.
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I) {
+ if (isHighCostExpansionHelper(*I, L, Processed))
+ return true;
+ }
+ return false;
+ }
+
+ // HowManyLessThans uses a Max expression whenever the loop is not guarded by
+ // the exit condition.
+ if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
+ return true;
+
+ // If we haven't recognized an expensive SCEV pattern, assume it's an
+ // expression produced by program code.
+ return false;
+}
+
namespace {
// Search for a SCEV subexpression that is not safe to expand. Any expression
// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp
index 7e574d5..8b378a3 100644
--- a/lib/Analysis/TargetLibraryInfo.cpp
+++ b/lib/Analysis/TargetLibraryInfo.cpp
@@ -409,9 +409,7 @@ static StringRef sanitizeFunctionName(StringRef funcName) {
// Check for \01 prefix that is used to mangle __asm declarations and
// strip it if present.
- if (funcName.front() == '\01')
- funcName = funcName.substr(1);
- return funcName;
+ return GlobalValue::getRealLinkageName(funcName);
}
bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName,
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index f51c7f54..a1519de 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -76,6 +76,10 @@ bool TargetTransformInfo::hasBranchDivergence() const {
return TTIImpl->hasBranchDivergence();
}
+bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const {
+ return TTIImpl->isSourceOfDivergence(V);
+}
+
bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
return TTIImpl->isLoweredToCall(F);
}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index f329e3a..3651301 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -694,10 +694,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
// We're running this loop for once for each value queried resulting in a
// runtime of ~O(#assumes * #values).
- assert(isa<IntrinsicInst>(I) &&
- dyn_cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::assume &&
+ assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
"must be an assume intrinsic");
-
+
Value *Arg = I->getArgOperand(0);
if (Arg == V && isValidAssumeForContext(I, Q)) {
@@ -2935,7 +2934,7 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
if (const LoadInst *LI = dyn_cast<LoadInst>(V))
return LI->getMetadata(LLVMContext::MD_nonnull);
- if (ImmutableCallSite CS = V)
+ if (auto CS = ImmutableCallSite(V))
if (CS.isReturnNonNull())
return true;