aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Analysis
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2015-03-23 12:10:34 -0700
committerStephen Hines <srhines@google.com>2015-03-23 12:10:34 -0700
commitebe69fe11e48d322045d5949c83283927a0d790b (patch)
treec92f1907a6b8006628a4b01615f38264d29834ea /lib/Analysis
parentb7d2e72b02a4cb8034f32f8247a2558d2434e121 (diff)
downloadexternal_llvm-ebe69fe11e48d322045d5949c83283927a0d790b.zip
external_llvm-ebe69fe11e48d322045d5949c83283927a0d790b.tar.gz
external_llvm-ebe69fe11e48d322045d5949c83283927a0d790b.tar.bz2
Update aosp/master LLVM for rebase to r230699.
Change-Id: I2b5be30509658cb8266be782de0ab24f9099f9b9
Diffstat (limited to 'lib/Analysis')
-rw-r--r--lib/Analysis/AliasAnalysis.cpp26
-rw-r--r--lib/Analysis/Analysis.cpp5
-rw-r--r--lib/Analysis/Android.mk6
-rw-r--r--lib/Analysis/AssumptionCache.cpp140
-rw-r--r--lib/Analysis/AssumptionTracker.cpp110
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp191
-rw-r--r--lib/Analysis/BlockFrequencyInfo.cpp6
-rw-r--r--lib/Analysis/BlockFrequencyInfoImpl.cpp21
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp9
-rw-r--r--lib/Analysis/CFG.cpp2
-rw-r--r--lib/Analysis/CFLAliasAnalysis.cpp61
-rw-r--r--lib/Analysis/CGSCCPassManager.cpp103
-rw-r--r--lib/Analysis/CMakeLists.txt9
-rw-r--r--lib/Analysis/CaptureTracking.cpp2
-rw-r--r--lib/Analysis/CodeMetrics.cpp26
-rw-r--r--lib/Analysis/ConstantFolding.cpp2
-rw-r--r--lib/Analysis/CostModel.cpp3
-rw-r--r--lib/Analysis/Delinearization.cpp6
-rw-r--r--lib/Analysis/DependenceAnalysis.cpp6
-rw-r--r--lib/Analysis/FunctionTargetTransformInfo.cpp50
-rw-r--r--lib/Analysis/IPA/Android.mk1
-rw-r--r--lib/Analysis/IPA/CMakeLists.txt1
-rw-r--r--lib/Analysis/IPA/CallGraphSCCPass.cpp2
-rw-r--r--lib/Analysis/IPA/FindUsedTypes.cpp100
-rw-r--r--lib/Analysis/IPA/IPA.cpp1
-rw-r--r--lib/Analysis/IPA/InlineCost.cpp63
-rw-r--r--lib/Analysis/IVUsers.cpp6
-rw-r--r--lib/Analysis/InstructionSimplify.cpp700
-rw-r--r--lib/Analysis/LLVMBuild.txt2
-rw-r--r--lib/Analysis/LazyCallGraph.cpp5
-rw-r--r--lib/Analysis/LazyValueInfo.cpp341
-rw-r--r--lib/Analysis/LibCallSemantics.cpp39
-rw-r--r--lib/Analysis/Lint.cpp232
-rw-r--r--lib/Analysis/Loads.cpp14
-rw-r--r--lib/Analysis/LoopAccessAnalysis.cpp1396
-rw-r--r--lib/Analysis/LoopInfo.cpp92
-rw-r--r--lib/Analysis/LoopPass.cpp7
-rw-r--r--lib/Analysis/MemDepPrinter.cpp45
-rw-r--r--lib/Analysis/MemDerefPrinter.cpp70
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp4
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp129
-rw-r--r--lib/Analysis/PHITransAddr.cpp6
-rw-r--r--lib/Analysis/RegionInfo.cpp2
-rw-r--r--lib/Analysis/RegionPass.cpp3
-rw-r--r--lib/Analysis/ScalarEvolution.cpp839
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp41
-rw-r--r--lib/Analysis/ScopedNoAliasAA.cpp2
-rw-r--r--lib/Analysis/TargetLibraryInfo.cpp810
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp629
-rw-r--r--lib/Analysis/TypeBasedAliasAnalysis.cpp27
-rw-r--r--lib/Analysis/ValueTracking.cpp389
51 files changed, 4764 insertions, 2018 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 5171a45..4e95aa0 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
@@ -37,7 +38,6 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
-#include "llvm/Target/TargetLibraryInfo.h"
using namespace llvm;
// Register the AliasAnalysis interface, providing a nice name to refer to.
@@ -465,7 +465,8 @@ AliasAnalysis::~AliasAnalysis() {}
void AliasAnalysis::InitializeAliasAnalysis(Pass *P) {
DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
- TLI = P->getAnalysisIfAvailable<TargetLibraryInfo>();
+ auto *TLIP = P->getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ TLI = TLIP ? &TLIP->getTLI() : nullptr;
AA = &P->getAnalysis<AliasAnalysis>();
}
@@ -483,21 +484,22 @@ uint64_t AliasAnalysis::getTypeStoreSize(Type *Ty) {
}
/// canBasicBlockModify - Return true if it is possible for execution of the
-/// specified basic block to modify the value pointed to by Ptr.
+/// specified basic block to modify the location Loc.
///
bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
const Location &Loc) {
- return canInstructionRangeModify(BB.front(), BB.back(), Loc);
+ return canInstructionRangeModRef(BB.front(), BB.back(), Loc, Mod);
}
-/// canInstructionRangeModify - Return true if it is possible for the execution
-/// of the specified instructions to modify the value pointed to by Ptr. The
-/// instructions to consider are all of the instructions in the range of [I1,I2]
-/// INCLUSIVE. I1 and I2 must be in the same basic block.
-///
-bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1,
+/// canInstructionRangeModRef - Return true if it is possible for the
+/// execution of the specified instructions to mod\ref (according to the
+/// mode) the location Loc. The instructions to consider are all
+/// of the instructions in the range of [I1,I2] INCLUSIVE.
+/// I1 and I2 must be in the same basic block.
+bool AliasAnalysis::canInstructionRangeModRef(const Instruction &I1,
const Instruction &I2,
- const Location &Loc) {
+ const Location &Loc,
+ const ModRefResult Mode) {
assert(I1.getParent() == I2.getParent() &&
"Instructions not in same basic block!");
BasicBlock::const_iterator I = &I1;
@@ -505,7 +507,7 @@ bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1,
++E; // Convert from inclusive to exclusive range.
for (; I != E; ++I) // Check every instruction in range
- if (getModRefInfo(I, Loc) & Mod)
+ if (getModRefInfo(I, Loc) & Mode)
return true;
return false;
}
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index f64bf0e..1bfb06d 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -53,8 +53,9 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeLazyValueInfoPass(Registry);
initializeLibCallAliasAnalysisPass(Registry);
initializeLintPass(Registry);
- initializeLoopInfoPass(Registry);
+ initializeLoopInfoWrapperPassPass(Registry);
initializeMemDepPrinterPass(Registry);
+ initializeMemDerefPrinterPass(Registry);
initializeMemoryDependenceAnalysisPass(Registry);
initializeModuleDebugInfoPrinterPass(Registry);
initializePostDominatorTreePass(Registry);
@@ -65,7 +66,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeRegionOnlyPrinterPass(Registry);
initializeScalarEvolutionPass(Registry);
initializeScalarEvolutionAliasAnalysisPass(Registry);
- initializeTargetTransformInfoAnalysisGroup(Registry);
+ initializeTargetTransformInfoWrapperPassPass(Registry);
initializeTypeBasedAliasAnalysisPass(Registry);
initializeScopedNoAliasAAPass(Registry);
}
diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk
index 8770fa7..e17b870 100644
--- a/lib/Analysis/Android.mk
+++ b/lib/Analysis/Android.mk
@@ -7,7 +7,7 @@ analysis_SRC_FILES := \
AliasDebugger.cpp \
AliasSetTracker.cpp \
Analysis.cpp \
- AssumptionTracker.cpp \
+ AssumptionCache.cpp \
BasicAliasAnalysis.cpp \
BlockFrequencyInfo.cpp \
BlockFrequencyInfoImpl.cpp \
@@ -24,7 +24,6 @@ analysis_SRC_FILES := \
DependenceAnalysis.cpp \
DomPrinter.cpp \
DominanceFrontier.cpp \
- FunctionTargetTransformInfo.cpp \
IVUsers.cpp \
InstCount.cpp \
InstructionSimplify.cpp \
@@ -37,9 +36,11 @@ analysis_SRC_FILES := \
LibCallSemantics.cpp \
Lint.cpp \
Loads.cpp \
+ LoopAccessAnalysis.cpp \
LoopInfo.cpp \
LoopPass.cpp \
MemDepPrinter.cpp \
+ MemDerefPrinter.cpp \
MemoryBuiltins.cpp \
MemoryDependenceAnalysis.cpp \
ModuleDebugInfoPrinter.cpp \
@@ -56,6 +57,7 @@ analysis_SRC_FILES := \
ScalarEvolutionNormalization.cpp \
ScopedNoAliasAA.cpp \
SparsePropagation.cpp \
+ TargetLibraryInfo.cpp \
TargetTransformInfo.cpp \
Trace.cpp \
TypeBasedAliasAnalysis.cpp \
diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp
new file mode 100644
index 0000000..f468a43
--- /dev/null
+++ b/lib/Analysis/AssumptionCache.cpp
@@ -0,0 +1,140 @@
+//===- AssumptionCache.cpp - Cache finding @llvm.assume calls -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that keeps track of @llvm.assume intrinsics in
+// the functions of a module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+void AssumptionCache::scanFunction() {
+ assert(!Scanned && "Tried to scan the function twice!");
+ assert(AssumeHandles.empty() && "Already have assumes when scanning!");
+
+ // Go through all instructions in all blocks, add all calls to @llvm.assume
+ // to this cache.
+ for (BasicBlock &B : F)
+ for (Instruction &II : B)
+ if (match(&II, m_Intrinsic<Intrinsic::assume>()))
+ AssumeHandles.push_back(&II);
+
+ // Mark the scan as complete.
+ Scanned = true;
+}
+
+void AssumptionCache::registerAssumption(CallInst *CI) {
+ assert(match(CI, m_Intrinsic<Intrinsic::assume>()) &&
+ "Registered call does not call @llvm.assume");
+
+ // If we haven't scanned the function yet, just drop this assumption. It will
+ // be found when we scan later.
+ if (!Scanned)
+ return;
+
+ AssumeHandles.push_back(CI);
+
+#ifndef NDEBUG
+ assert(CI->getParent() &&
+ "Cannot register @llvm.assume call not in a basic block");
+ assert(&F == CI->getParent()->getParent() &&
+ "Cannot register @llvm.assume call not in this function");
+
+ // We expect the number of assumptions to be small, so in an asserts build
+ // check that we don't accumulate duplicates and that all assumptions point
+ // to the same function.
+ SmallPtrSet<Value *, 16> AssumptionSet;
+ for (auto &VH : AssumeHandles) {
+ if (!VH)
+ continue;
+
+ assert(&F == cast<Instruction>(VH)->getParent()->getParent() &&
+ "Cached assumption not inside this function!");
+ assert(match(cast<CallInst>(VH), m_Intrinsic<Intrinsic::assume>()) &&
+ "Cached something other than a call to @llvm.assume!");
+ assert(AssumptionSet.insert(VH).second &&
+ "Cache contains multiple copies of a call!");
+ }
+#endif
+}
+
+char AssumptionAnalysis::PassID;
+
+PreservedAnalyses AssumptionPrinterPass::run(Function &F,
+ AnalysisManager<Function> *AM) {
+ AssumptionCache &AC = AM->getResult<AssumptionAnalysis>(F);
+
+ OS << "Cached assumptions for function: " << F.getName() << "\n";
+ for (auto &VH : AC.assumptions())
+ if (VH)
+ OS << " " << *cast<CallInst>(VH)->getArgOperand(0) << "\n";
+
+ return PreservedAnalyses::all();
+}
+
+void AssumptionCacheTracker::FunctionCallbackVH::deleted() {
+ auto I = ACT->AssumptionCaches.find_as(cast<Function>(getValPtr()));
+ if (I != ACT->AssumptionCaches.end())
+ ACT->AssumptionCaches.erase(I);
+ // 'this' now dangles!
+}
+
+AssumptionCache &AssumptionCacheTracker::getAssumptionCache(Function &F) {
+ // We probe the function map twice to try and avoid creating a value handle
+ // around the function in common cases. This makes insertion a bit slower,
+ // but if we have to insert we're going to scan the whole function so that
+ // shouldn't matter.
+ auto I = AssumptionCaches.find_as(&F);
+ if (I != AssumptionCaches.end())
+ return *I->second;
+
+ // Ok, build a new cache by scanning the function, insert it and the value
+ // handle into our map, and return the newly populated cache.
+ auto IP = AssumptionCaches.insert(std::make_pair(
+ FunctionCallbackVH(&F, this), llvm::make_unique<AssumptionCache>(F)));
+ assert(IP.second && "Scanning function already in the map?");
+ return *IP.first->second;
+}
+
+void AssumptionCacheTracker::verifyAnalysis() const {
+#ifndef NDEBUG
+ SmallPtrSet<const CallInst *, 4> AssumptionSet;
+ for (const auto &I : AssumptionCaches) {
+ for (auto &VH : I.second->assumptions())
+ if (VH)
+ AssumptionSet.insert(cast<CallInst>(VH));
+
+ for (const BasicBlock &B : cast<Function>(*I.first))
+ for (const Instruction &II : B)
+ if (match(&II, m_Intrinsic<Intrinsic::assume>()))
+ assert(AssumptionSet.count(cast<CallInst>(&II)) &&
+ "Assumption in scanned function not in cache");
+ }
+#endif
+}
+
+AssumptionCacheTracker::AssumptionCacheTracker() : ImmutablePass(ID) {
+ initializeAssumptionCacheTrackerPass(*PassRegistry::getPassRegistry());
+}
+
+AssumptionCacheTracker::~AssumptionCacheTracker() {}
+
+INITIALIZE_PASS(AssumptionCacheTracker, "assumption-cache-tracker",
+ "Assumption Cache Tracker", false, true)
+char AssumptionCacheTracker::ID = 0;
diff --git a/lib/Analysis/AssumptionTracker.cpp b/lib/Analysis/AssumptionTracker.cpp
deleted file mode 100644
index 775ce1d..0000000
--- a/lib/Analysis/AssumptionTracker.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-//===- AssumptionTracker.cpp - Track @llvm.assume -------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a pass that keeps track of @llvm.assume intrinsics in
-// the functions of a module.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/AssumptionTracker.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/PatternMatch.h"
-#include "llvm/Support/Debug.h"
-using namespace llvm;
-using namespace llvm::PatternMatch;
-
-void AssumptionTracker::FunctionCallbackVH::deleted() {
- AT->forgetCachedAssumptions(cast<Function>(getValPtr()));
- // 'this' now dangles!
-}
-
-void AssumptionTracker::forgetCachedAssumptions(Function *F) {
- auto I = CachedAssumeCalls.find_as(F);
- if (I != CachedAssumeCalls.end())
- CachedAssumeCalls.erase(I);
-}
-
-void AssumptionTracker::CallCallbackVH::deleted() {
- assert(F && "delete callback called on dummy handle");
- FunctionCallsMap::iterator I = AT->CachedAssumeCalls.find_as(F);
- assert(I != AT->CachedAssumeCalls.end() &&
- "Function cleared from the map without removing the values?");
-
- I->second->erase(*this);
- // 'this' now dangles!
-}
-
-AssumptionTracker::FunctionCallsMap::iterator
-AssumptionTracker::scanFunction(Function *F) {
- auto IP = CachedAssumeCalls.insert(std::make_pair(
- FunctionCallbackVH(F, this), llvm::make_unique<CallHandleSet>()));
- assert(IP.second && "Scanning function already in the map?");
-
- FunctionCallsMap::iterator I = IP.first;
-
- // Go through all instructions in all blocks, add all calls to @llvm.assume
- // to our cache.
- for (BasicBlock &B : *F)
- for (Instruction &II : B)
- if (match(&II, m_Intrinsic<Intrinsic::assume>()))
- I->second->insert(CallCallbackVH(&II, this));
-
- return I;
-}
-
-void AssumptionTracker::verifyAnalysis() const {
-#ifndef NDEBUG
- for (const auto &I : CachedAssumeCalls) {
- for (const BasicBlock &B : cast<Function>(*I.first))
- for (const Instruction &II : B) {
- if (match(&II, m_Intrinsic<Intrinsic::assume>())) {
- assert(I.second->find_as(&II) != I.second->end() &&
- "Assumption in scanned function not in cache");
- }
- }
- }
-#endif
-}
-
-void AssumptionTracker::registerAssumption(CallInst *CI) {
- assert(match(CI, m_Intrinsic<Intrinsic::assume>()) &&
- "Registered call does not call @llvm.assume");
- assert(CI->getParent() &&
- "Cannot register @llvm.assume call not in a basic block");
-
- Function *F = CI->getParent()->getParent();
- assert(F && "Cannot register @llvm.assume call not in a function");
-
- FunctionCallsMap::iterator I = CachedAssumeCalls.find_as(F);
- if (I == CachedAssumeCalls.end()) {
- // If this function has not already been scanned, then don't do anything
- // here. This intrinsic will be found, if it still exists, if the list of
- // assumptions in this function is requested at some later point. This
- // maintains the following invariant: if a function is present in the
- // cache, then its list of assumption intrinsic calls is complete.
- return;
- }
-
- I->second->insert(CallCallbackVH(CI, this));
-}
-
-AssumptionTracker::AssumptionTracker() : ImmutablePass(ID) {
- initializeAssumptionTrackerPass(*PassRegistry::getPassRegistry());
-}
-
-AssumptionTracker::~AssumptionTracker() {}
-
-INITIALIZE_PASS(AssumptionTracker, "assumption-tracker", "Assumption Tracker",
- false, true)
-char AssumptionTracker::ID = 0;
-
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 9aba0d3..46ca6ee 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -17,12 +17,13 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AssumptionTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -38,7 +39,6 @@
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include <algorithm>
using namespace llvm;
@@ -196,8 +196,7 @@ namespace {
static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
ExtensionKind &Extension,
const DataLayout &DL, unsigned Depth,
- AssumptionTracker *AT,
- DominatorTree *DT) {
+ AssumptionCache *AC, DominatorTree *DT) {
assert(V->getType()->isIntegerTy() && "Not an integer value");
// Limit our recursion depth.
@@ -222,24 +221,24 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
case Instruction::Or:
// X|C == X+C if all the bits in C are unset in X. Otherwise we can't
// analyze it.
- if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &DL, 0,
- AT, BOp, DT))
+ if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &DL, 0, AC,
+ BOp, DT))
break;
// FALL THROUGH.
case Instruction::Add:
V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
- DL, Depth+1, AT, DT);
+ DL, Depth + 1, AC, DT);
Offset += RHSC->getValue();
return V;
case Instruction::Mul:
V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
- DL, Depth+1, AT, DT);
+ DL, Depth + 1, AC, DT);
Offset *= RHSC->getValue();
Scale *= RHSC->getValue();
return V;
case Instruction::Shl:
V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
- DL, Depth+1, AT, DT);
+ DL, Depth + 1, AC, DT);
Offset <<= RHSC->getValue().getLimitedValue();
Scale <<= RHSC->getValue().getLimitedValue();
return V;
@@ -259,8 +258,8 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
Offset = Offset.trunc(SmallWidth);
Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt;
- Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension,
- DL, Depth+1, AT, DT);
+ Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension, DL,
+ Depth + 1, AC, DT);
Scale = Scale.zext(OldWidth);
// We have to sign-extend even if Extension == EK_ZeroExt as we can't
@@ -294,7 +293,7 @@ static const Value *
DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
SmallVectorImpl<VariableGEPIndex> &VarIndices,
bool &MaxLookupReached, const DataLayout *DL,
- AssumptionTracker *AT, DominatorTree *DT) {
+ AssumptionCache *AC, DominatorTree *DT) {
// Limit recursion depth to limit compile time in crazy cases.
unsigned MaxLookup = MaxLookupSearchDepth;
MaxLookupReached = false;
@@ -325,7 +324,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// If it's not a GEP, hand it off to SimplifyInstruction to see if it
// can come up with something. This matches what GetUnderlyingObject does.
if (const Instruction *I = dyn_cast<Instruction>(V))
- // TODO: Get a DominatorTree and AssumptionTracker and use them here
+ // TODO: Get a DominatorTree and AssumptionCache and use them here
// (these are both now available in this function, but this should be
// updated when GetUnderlyingObject is updated). TLI should be
// provided also.
@@ -387,7 +386,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// Use GetLinearExpression to decompose the index into a C1*V+C2 form.
APInt IndexScale(Width, 0), IndexOffset(Width, 0);
Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension,
- *DL, 0, AT, DT);
+ *DL, 0, AC, DT);
// The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
// This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
@@ -468,8 +467,8 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AliasAnalysis>();
- AU.addRequired<AssumptionTracker>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
AliasResult alias(const Location &LocA, const Location &LocB) override {
@@ -591,8 +590,8 @@ char BasicAliasAnalysis::ID = 0;
INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa",
"Basic Alias Analysis (stateless AA impl)",
false, true, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa",
"Basic Alias Analysis (stateless AA impl)",
false, true, false)
@@ -719,7 +718,8 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) {
if (F->onlyReadsMemory())
Min = OnlyReadsMemory;
- const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>();
+ const TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
if (isMemsetPattern16(F, TLI))
Min = OnlyAccessesArgumentPointees;
@@ -731,7 +731,8 @@ AliasAnalysis::Location
BasicAliasAnalysis::getArgLocation(ImmutableCallSite CS, unsigned ArgIdx,
ModRefResult &Mask) {
Location Loc = AliasAnalysis::getArgLocation(CS, ArgIdx, Mask);
- const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>();
+ const TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
if (II != nullptr)
switch (II->getIntrinsicID()) {
@@ -889,6 +890,99 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
return AliasAnalysis::getModRefInfo(CS1, CS2);
}
+/// \brief Provide ad-hoc rules to disambiguate accesses through two GEP
+/// operators, both having the exact same pointer operand.
+static AliasAnalysis::AliasResult
+aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V1Size,
+ const GEPOperator *GEP2, uint64_t V2Size,
+ const DataLayout &DL) {
+
+ assert(GEP1->getPointerOperand() == GEP2->getPointerOperand() &&
+ "Expected GEPs with the same pointer operand");
+
+ // Try to determine whether GEP1 and GEP2 index through arrays, into structs,
+ // such that the struct field accesses provably cannot alias.
+ // We also need at least two indices (the pointer, and the struct field).
+ if (GEP1->getNumIndices() != GEP2->getNumIndices() ||
+ GEP1->getNumIndices() < 2)
+ return AliasAnalysis::MayAlias;
+
+ // If we don't know the size of the accesses through both GEPs, we can't
+ // determine whether the struct fields accessed can't alias.
+ if (V1Size == AliasAnalysis::UnknownSize ||
+ V2Size == AliasAnalysis::UnknownSize)
+ return AliasAnalysis::MayAlias;
+
+ ConstantInt *C1 =
+ dyn_cast<ConstantInt>(GEP1->getOperand(GEP1->getNumOperands() - 1));
+ ConstantInt *C2 =
+ dyn_cast<ConstantInt>(GEP2->getOperand(GEP2->getNumOperands() - 1));
+
+ // If the last (struct) indices aren't constants, we can't say anything.
+ // If they're identical, the other indices might be also be dynamically
+ // equal, so the GEPs can alias.
+ if (!C1 || !C2 || C1 == C2)
+ return AliasAnalysis::MayAlias;
+
+ // Find the last-indexed type of the GEP, i.e., the type you'd get if
+ // you stripped the last index.
+ // On the way, look at each indexed type. If there's something other
+ // than an array, different indices can lead to different final types.
+ SmallVector<Value *, 8> IntermediateIndices;
+
+ // Insert the first index; we don't need to check the type indexed
+ // through it as it only drops the pointer indirection.
+ assert(GEP1->getNumIndices() > 1 && "Not enough GEP indices to examine");
+ IntermediateIndices.push_back(GEP1->getOperand(1));
+
+ // Insert all the remaining indices but the last one.
+ // Also, check that they all index through arrays.
+ for (unsigned i = 1, e = GEP1->getNumIndices() - 1; i != e; ++i) {
+ if (!isa<ArrayType>(GetElementPtrInst::getIndexedType(
+ GEP1->getPointerOperandType(), IntermediateIndices)))
+ return AliasAnalysis::MayAlias;
+ IntermediateIndices.push_back(GEP1->getOperand(i + 1));
+ }
+
+ StructType *LastIndexedStruct =
+ dyn_cast<StructType>(GetElementPtrInst::getIndexedType(
+ GEP1->getPointerOperandType(), IntermediateIndices));
+
+ if (!LastIndexedStruct)
+ return AliasAnalysis::MayAlias;
+
+ // We know that:
+ // - both GEPs begin indexing from the exact same pointer;
+ // - the last indices in both GEPs are constants, indexing into a struct;
+ // - said indices are different, hence, the pointed-to fields are different;
+ // - both GEPs only index through arrays prior to that.
+ //
+ // This lets us determine that the struct that GEP1 indexes into and the
+ // struct that GEP2 indexes into must either precisely overlap or be
+ // completely disjoint. Because they cannot partially overlap, indexing into
+ // different non-overlapping fields of the struct will never alias.
+
+ // Therefore, the only remaining thing needed to show that both GEPs can't
+ // alias is that the fields are not overlapping.
+ const StructLayout *SL = DL.getStructLayout(LastIndexedStruct);
+ const uint64_t StructSize = SL->getSizeInBytes();
+ const uint64_t V1Off = SL->getElementOffset(C1->getZExtValue());
+ const uint64_t V2Off = SL->getElementOffset(C2->getZExtValue());
+
+ auto EltsDontOverlap = [StructSize](uint64_t V1Off, uint64_t V1Size,
+ uint64_t V2Off, uint64_t V2Size) {
+ return V1Off < V2Off && V1Off + V1Size <= V2Off &&
+ ((V2Off + V2Size <= StructSize) ||
+ (V2Off + V2Size - StructSize <= V1Off));
+ };
+
+ if (EltsDontOverlap(V1Off, V1Size, V2Off, V2Size) ||
+ EltsDontOverlap(V2Off, V2Size, V1Off, V1Size))
+ return AliasAnalysis::NoAlias;
+
+ return AliasAnalysis::MayAlias;
+}
+
/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
/// against another pointer. We know that V1 is a GEP, but we don't know
/// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, DL),
@@ -905,7 +999,22 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
bool GEP1MaxLookupReached;
SmallVector<VariableGEPIndex, 4> GEP1VariableIndices;
- AssumptionTracker *AT = &getAnalysis<AssumptionTracker>();
+ // We have to get two AssumptionCaches here because GEP1 and V2 may be from
+ // different functions.
+ // FIXME: This really doesn't make any sense. We get a dominator tree below
+ // that can only refer to a single function. But this function (aliasGEP) is
+ // a method on an immutable pass that can be called when there *isn't*
+ // a single function. The old pass management layer makes this "work", but
+ // this isn't really a clean solution.
+ AssumptionCacheTracker &ACT = getAnalysis<AssumptionCacheTracker>();
+ AssumptionCache *AC1 = nullptr, *AC2 = nullptr;
+ if (auto *GEP1I = dyn_cast<Instruction>(GEP1))
+ AC1 = &ACT.getAssumptionCache(
+ const_cast<Function &>(*GEP1I->getParent()->getParent()));
+ if (auto *I2 = dyn_cast<Instruction>(V2))
+ AC2 = &ACT.getAssumptionCache(
+ const_cast<Function &>(*I2->getParent()->getParent()));
+
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
@@ -932,11 +1041,11 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
bool GEP2MaxLookupReached;
SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
- DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
- GEP2MaxLookupReached, DL, AT, DT);
+ DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
+ GEP2MaxLookupReached, DL, AC2, DT);
const Value *GEP1BasePtr =
- DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, DL, AT, DT);
+ DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
+ GEP1MaxLookupReached, DL, AC1, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
@@ -964,15 +1073,15 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
// exactly, see if the computed offset from the common pointer tells us
// about the relation of the resulting pointer.
const Value *GEP1BasePtr =
- DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, DL, AT, DT);
+ DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
+ GEP1MaxLookupReached, DL, AC1, DT);
int64_t GEP2BaseOffset;
bool GEP2MaxLookupReached;
SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
- DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
- GEP2MaxLookupReached, DL, AT, DT);
+ DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
+ GEP2MaxLookupReached, DL, AC2, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
@@ -981,6 +1090,17 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
"DecomposeGEPExpression and GetUnderlyingObject disagree!");
return MayAlias;
}
+
+ // If we know the two GEPs are based off of the exact same pointer (and not
+ // just the same underlying object), see if that tells us anything about
+ // the resulting pointers.
+ if (DL && GEP1->getPointerOperand() == GEP2->getPointerOperand()) {
+ AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, *DL);
+ // If we couldn't find anything interesting, don't abandon just yet.
+ if (R != MayAlias)
+ return R;
+ }
+
// If the max search depth is reached the result is undefined
if (GEP2MaxLookupReached || GEP1MaxLookupReached)
return MayAlias;
@@ -1010,8 +1130,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
return R;
const Value *GEP1BasePtr =
- DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, DL, AT, DT);
+ DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
+ GEP1MaxLookupReached, DL, AC1, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
@@ -1080,10 +1200,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
const Value *V = GEP1VariableIndices[i].V;
bool SignKnownZero, SignKnownOne;
- ComputeSignBit(
- const_cast<Value *>(V),
- SignKnownZero, SignKnownOne,
- DL, 0, AT, nullptr, DT);
+ ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, DL,
+ 0, AC1, nullptr, DT);
// Zero-extension widens the variable, and so forces the sign
// bit to zero.
@@ -1422,7 +1540,8 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- LoopInfo *LI = getAnalysisIfAvailable<LoopInfo>();
+ auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+ LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
// Make sure that the visited phis cannot reach the Value. This ensures that
// the Values cannot come from different iterations of a potential cycle the
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index 8ed8e3e..37f2fae 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -108,7 +108,7 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {
INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq",
"Block Frequency Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq",
"Block Frequency Analysis", true, true)
@@ -123,13 +123,13 @@ BlockFrequencyInfo::~BlockFrequencyInfo() {}
void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<BranchProbabilityInfo>();
- AU.addRequired<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.setPreservesAll();
}
bool BlockFrequencyInfo::runOnFunction(Function &F) {
BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
- LoopInfo &LI = getAnalysis<LoopInfo>();
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
if (!BFI)
BFI.reset(new ImplType);
BFI->doFunction(&F, &BPI, &LI);
diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 06b8acd..278073c 100644
--- a/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/Support/raw_ostream.h"
+#include <numeric>
using namespace llvm;
using namespace llvm::bfi_detail;
@@ -122,8 +123,12 @@ static void combineWeight(Weight &W, const Weight &OtherW) {
}
assert(W.Type == OtherW.Type);
assert(W.TargetNode == OtherW.TargetNode);
- assert(W.Amount < W.Amount + OtherW.Amount && "Unexpected overflow");
- W.Amount += OtherW.Amount;
+ assert(OtherW.Amount && "Expected non-zero weight");
+ if (W.Amount > W.Amount + OtherW.Amount)
+ // Saturate on overflow.
+ W.Amount = UINT64_MAX;
+ else
+ W.Amount += OtherW.Amount;
}
static void combineWeightsBySorting(WeightList &Weights) {
// Sort so edges to the same node are adjacent.
@@ -206,11 +211,19 @@ void Distribution::normalize() {
Shift = 33 - countLeadingZeros(Total);
// Early exit if nothing needs to be scaled.
- if (!Shift)
+ if (!Shift) {
+ // If we didn't overflow then combineWeights() shouldn't have changed the
+ // sum of the weights, but let's double-check.
+ assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),
+ [](uint64_t Sum, const Weight &W) {
+ return Sum + W.Amount;
+ }) &&
+ "Expected total to be correct");
return;
+ }
// Recompute the total through accumulation (rather than shifting it) so that
- // it's accurate after shifting.
+ // it's accurate after shifting and any changes combineWeights() made above.
Total = 0;
// Sum the weights to each node and shift right if necessary.
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index bbd8750..8cd6ea4 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -28,7 +28,7 @@ using namespace llvm;
INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob",
"Branch Probability Analysis", false, true)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob",
"Branch Probability Analysis", false, true)
@@ -196,7 +196,8 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) {
SmallVector<uint32_t, 2> Weights;
Weights.reserve(TI->getNumSuccessors());
for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) {
- ConstantInt *Weight = dyn_cast<ConstantInt>(WeightsNode->getOperand(i));
+ ConstantInt *Weight =
+ mdconst::dyn_extract<ConstantInt>(WeightsNode->getOperand(i));
if (!Weight)
return false;
Weights.push_back(
@@ -483,7 +484,7 @@ bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) {
}
void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.setPreservesAll();
}
@@ -491,7 +492,7 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
<< " ----\n\n");
LastF = &F; // Store the last function we ran on for printing.
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
assert(PostDominatedByUnreachable.empty());
assert(PostDominatedByColdCall.empty());
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index 25e7bc0..8ecd70b 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
void llvm::FindFunctionBackedges(const Function &F,
SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result) {
const BasicBlock *BB = &F.getEntryBlock();
- if (succ_begin(BB) == succ_end(BB))
+ if (succ_empty(BB))
return;
SmallPtrSet<const BasicBlock*, 8> Visited;
diff --git a/lib/Analysis/CFLAliasAnalysis.cpp b/lib/Analysis/CFLAliasAnalysis.cpp
index 5f1b3d3..82fbfe0 100644
--- a/lib/Analysis/CFLAliasAnalysis.cpp
+++ b/lib/Analysis/CFLAliasAnalysis.cpp
@@ -29,20 +29,21 @@
//===----------------------------------------------------------------------===//
#include "StratifiedSets.h"
-#include "llvm/Analysis/Passes.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
#include <cassert>
@@ -51,6 +52,8 @@
using namespace llvm;
+#define DEBUG_TYPE "cfl-aa"
+
// Try to go from a Value* to a Function*. Never returns nullptr.
static Optional<Function *> parentFunctionOfValue(Value *);
@@ -227,10 +230,14 @@ public:
// Comparisons between global variables and other constants should be
// handled by BasicAA.
if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) {
- return MayAlias;
+ return AliasAnalysis::alias(LocA, LocB);
}
- return query(LocA, LocB);
+ AliasResult QueryResult = query(LocA, LocB);
+ if (QueryResult == MayAlias)
+ return AliasAnalysis::alias(LocA, LocB);
+
+ return QueryResult;
}
void initializePass() override { InitializeAliasAnalysis(this); }
@@ -295,8 +302,11 @@ public:
}
void visitSelectInst(SelectInst &Inst) {
- auto *Condition = Inst.getCondition();
- Output.push_back(Edge(&Inst, Condition, EdgeType::Assign, AttrNone));
+ // Condition is not processed here (The actual statement producing
+ // the condition result is processed elsewhere). For select, the
+ // condition is evaluated, but not loaded, stored, or assigned
+ // simply as a result of being the condition of a select.
+
auto *TrueVal = Inst.getTrueValue();
Output.push_back(Edge(&Inst, TrueVal, EdgeType::Assign, AttrNone));
auto *FalseVal = Inst.getFalseValue();
@@ -768,13 +778,16 @@ static Optional<StratifiedAttr> valueToAttrIndex(Value *Val) {
return AttrGlobalIndex;
if (auto *Arg = dyn_cast<Argument>(Val))
- if (!Arg->hasNoAliasAttr())
+ // Only pointer arguments should have the argument attribute,
+ // because things can't escape through scalars without us seeing a
+ // cast, and thus, interaction with them doesn't matter.
+ if (!Arg->hasNoAliasAttr() && Arg->getType()->isPointerTy())
return argNumberToAttrIndex(Arg->getArgNo());
return NoneType();
}
static StratifiedAttr argNumberToAttrIndex(unsigned ArgNum) {
- if (ArgNum > AttrMaxNumArgs)
+ if (ArgNum >= AttrMaxNumArgs)
return AttrAllIndex;
return ArgNum + AttrFirstArgIndex;
}
@@ -964,8 +977,10 @@ CFLAliasAnalysis::query(const AliasAnalysis::Location &LocA,
auto MaybeFnA = parentFunctionOfValue(ValA);
auto MaybeFnB = parentFunctionOfValue(ValB);
if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) {
- llvm_unreachable("Don't know how to extract the parent function "
- "from values A or B");
+ // The only times this is known to happen are when globals + InlineAsm
+ // are involved
+ DEBUG(dbgs() << "CFLAA: could not extract parent function information.\n");
+ return AliasAnalysis::MayAlias;
}
if (MaybeFnA.hasValue()) {
@@ -991,23 +1006,31 @@ CFLAliasAnalysis::query(const AliasAnalysis::Location &LocA,
auto SetA = *MaybeA;
auto SetB = *MaybeB;
-
- if (SetA.Index == SetB.Index)
- return AliasAnalysis::PartialAlias;
-
auto AttrsA = Sets.getLink(SetA.Index).Attrs;
auto AttrsB = Sets.getLink(SetB.Index).Attrs;
+
// Stratified set attributes are used as markets to signify whether a member
- // of a StratifiedSet (or a member of a set above the current set) has
+ // of a StratifiedSet (or a member of a set above the current set) has
// interacted with either arguments or globals. "Interacted with" meaning
- // its value may be different depending on the value of an argument or
+ // its value may be different depending on the value of an argument or
// global. The thought behind this is that, because arguments and globals
// may alias each other, if AttrsA and AttrsB have touched args/globals,
- // we must conservatively say that they alias. However, if at least one of
- // the sets has no values that could legally be altered by changing the value
+ // we must conservatively say that they alias. However, if at least one of
+ // the sets has no values that could legally be altered by changing the value
// of an argument or global, then we don't have to be as conservative.
if (AttrsA.any() && AttrsB.any())
return AliasAnalysis::MayAlias;
+ // We currently unify things even if the accesses to them may not be in
+ // bounds, so we can't return partial alias here because we don't
+ // know whether the pointer is really within the object or not.
+ // IE Given an out of bounds GEP and an alloca'd pointer, we may
+ // unify the two. We can't return partial alias for this case.
+ // Since we do not currently track enough information to
+ // differentiate
+
+ if (SetA.Index == SetB.Index)
+ return AliasAnalysis::MayAlias;
+
return AliasAnalysis::NoAlias;
}
diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp
index 5d1d8a9..4a03002 100644
--- a/lib/Analysis/CGSCCPassManager.cpp
+++ b/lib/Analysis/CGSCCPassManager.cpp
@@ -13,105 +13,10 @@
using namespace llvm;
-static cl::opt<bool>
-DebugPM("debug-cgscc-pass-manager", cl::Hidden,
- cl::desc("Print CGSCC pass management debugging information"));
-
-PreservedAnalyses CGSCCPassManager::run(LazyCallGraph::SCC *C,
- CGSCCAnalysisManager *AM) {
- PreservedAnalyses PA = PreservedAnalyses::all();
-
- if (DebugPM)
- dbgs() << "Starting CGSCC pass manager run.\n";
-
- for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) {
- if (DebugPM)
- dbgs() << "Running CGSCC pass: " << Passes[Idx]->name() << "\n";
-
- PreservedAnalyses PassPA = Passes[Idx]->run(C, AM);
- if (AM)
- AM->invalidate(C, PassPA);
- PA.intersect(std::move(PassPA));
- }
-
- if (DebugPM)
- dbgs() << "Finished CGSCC pass manager run.\n";
-
- return PA;
-}
-
-bool CGSCCAnalysisManager::empty() const {
- assert(CGSCCAnalysisResults.empty() == CGSCCAnalysisResultLists.empty() &&
- "The storage and index of analysis results disagree on how many there "
- "are!");
- return CGSCCAnalysisResults.empty();
-}
-
-void CGSCCAnalysisManager::clear() {
- CGSCCAnalysisResults.clear();
- CGSCCAnalysisResultLists.clear();
-}
-
-CGSCCAnalysisManager::ResultConceptT &
-CGSCCAnalysisManager::getResultImpl(void *PassID, LazyCallGraph::SCC *C) {
- CGSCCAnalysisResultMapT::iterator RI;
- bool Inserted;
- std::tie(RI, Inserted) = CGSCCAnalysisResults.insert(std::make_pair(
- std::make_pair(PassID, C), CGSCCAnalysisResultListT::iterator()));
-
- // If we don't have a cached result for this function, look up the pass and
- // run it to produce a result, which we then add to the cache.
- if (Inserted) {
- CGSCCAnalysisResultListT &ResultList = CGSCCAnalysisResultLists[C];
- ResultList.emplace_back(PassID, lookupPass(PassID).run(C, this));
- RI->second = std::prev(ResultList.end());
- }
-
- return *RI->second->second;
-}
-
-CGSCCAnalysisManager::ResultConceptT *
-CGSCCAnalysisManager::getCachedResultImpl(void *PassID,
- LazyCallGraph::SCC *C) const {
- CGSCCAnalysisResultMapT::const_iterator RI =
- CGSCCAnalysisResults.find(std::make_pair(PassID, C));
- return RI == CGSCCAnalysisResults.end() ? nullptr : &*RI->second->second;
-}
-
-void CGSCCAnalysisManager::invalidateImpl(void *PassID, LazyCallGraph::SCC *C) {
- CGSCCAnalysisResultMapT::iterator RI =
- CGSCCAnalysisResults.find(std::make_pair(PassID, C));
- if (RI == CGSCCAnalysisResults.end())
- return;
-
- CGSCCAnalysisResultLists[C].erase(RI->second);
-}
-
-void CGSCCAnalysisManager::invalidateImpl(LazyCallGraph::SCC *C,
- const PreservedAnalyses &PA) {
- // Clear all the invalidated results associated specifically with this
- // function.
- SmallVector<void *, 8> InvalidatedPassIDs;
- CGSCCAnalysisResultListT &ResultsList = CGSCCAnalysisResultLists[C];
- for (CGSCCAnalysisResultListT::iterator I = ResultsList.begin(),
- E = ResultsList.end();
- I != E;)
- if (I->second->invalidate(C, PA)) {
- InvalidatedPassIDs.push_back(I->first);
- I = ResultsList.erase(I);
- } else {
- ++I;
- }
- while (!InvalidatedPassIDs.empty())
- CGSCCAnalysisResults.erase(
- std::make_pair(InvalidatedPassIDs.pop_back_val(), C));
- CGSCCAnalysisResultLists.erase(C);
-}
-
char CGSCCAnalysisManagerModuleProxy::PassID;
CGSCCAnalysisManagerModuleProxy::Result
-CGSCCAnalysisManagerModuleProxy::run(Module *M) {
+CGSCCAnalysisManagerModuleProxy::run(Module &M) {
assert(CGAM->empty() && "CGSCC analyses ran prior to the module proxy!");
return Result(*CGAM);
}
@@ -123,7 +28,7 @@ CGSCCAnalysisManagerModuleProxy::Result::~Result() {
}
bool CGSCCAnalysisManagerModuleProxy::Result::invalidate(
- Module *M, const PreservedAnalyses &PA) {
+ Module &M, const PreservedAnalyses &PA) {
// If this proxy isn't marked as preserved, then we can't even invalidate
// individual CGSCC analyses, there may be an invalid set of SCC objects in
// the cache making it impossible to incrementally preserve them.
@@ -140,7 +45,7 @@ char ModuleAnalysisManagerCGSCCProxy::PassID;
char FunctionAnalysisManagerCGSCCProxy::PassID;
FunctionAnalysisManagerCGSCCProxy::Result
-FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC *C) {
+FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC &C) {
assert(FAM->empty() && "Function analyses ran prior to the CGSCC proxy!");
return Result(*FAM);
}
@@ -152,7 +57,7 @@ FunctionAnalysisManagerCGSCCProxy::Result::~Result() {
}
bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate(
- LazyCallGraph::SCC *C, const PreservedAnalyses &PA) {
+ LazyCallGraph::SCC &C, const PreservedAnalyses &PA) {
// If this proxy isn't marked as preserved, then we can't even invalidate
// individual function analyses, there may be an invalid set of Function
// objects in the cache making it impossible to incrementally preserve them.
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 4e9664f..d840037 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -5,7 +5,7 @@ add_llvm_library(LLVMAnalysis
AliasDebugger.cpp
AliasSetTracker.cpp
Analysis.cpp
- AssumptionTracker.cpp
+ AssumptionCache.cpp
BasicAliasAnalysis.cpp
BlockFrequencyInfo.cpp
BlockFrequencyInfoImpl.cpp
@@ -22,7 +22,6 @@ add_llvm_library(LLVMAnalysis
DependenceAnalysis.cpp
DomPrinter.cpp
DominanceFrontier.cpp
- FunctionTargetTransformInfo.cpp
IVUsers.cpp
InstCount.cpp
InstructionSimplify.cpp
@@ -35,9 +34,11 @@ add_llvm_library(LLVMAnalysis
LibCallSemantics.cpp
Lint.cpp
Loads.cpp
+ LoopAccessAnalysis.cpp
LoopInfo.cpp
LoopPass.cpp
MemDepPrinter.cpp
+ MemDerefPrinter.cpp
MemoryBuiltins.cpp
MemoryDependenceAnalysis.cpp
ModuleDebugInfoPrinter.cpp
@@ -53,11 +54,15 @@ add_llvm_library(LLVMAnalysis
ScalarEvolutionExpander.cpp
ScalarEvolutionNormalization.cpp
SparsePropagation.cpp
+ TargetLibraryInfo.cpp
TargetTransformInfo.cpp
Trace.cpp
TypeBasedAliasAnalysis.cpp
ScopedNoAliasAA.cpp
ValueTracking.cpp
+
+ ADDITIONAL_HEADER_DIRS
+ ${LLVM_MAIN_INCLUDE_DIR}/llvm/Analysis
)
add_dependencies(LLVMAnalysis intrinsics_gen)
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index a271729..5a54754 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -19,8 +19,8 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
index f29e4a2..fa5683c 100644
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/AssumptionTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -66,11 +66,16 @@ static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet,
}
// Find all ephemeral values.
-void CodeMetrics::collectEphemeralValues(const Loop *L, AssumptionTracker *AT,
- SmallPtrSetImpl<const Value*> &EphValues) {
+void CodeMetrics::collectEphemeralValues(
+ const Loop *L, AssumptionCache *AC,
+ SmallPtrSetImpl<const Value *> &EphValues) {
SmallVector<const Value *, 16> WorkSet;
- for (auto &I : AT->assumptions(L->getHeader()->getParent())) {
+ for (auto &AssumeVH : AC->assumptions()) {
+ if (!AssumeVH)
+ continue;
+ Instruction *I = cast<Instruction>(AssumeVH);
+
// Filter out call sites outside of the loop so we don't to a function's
// worth of work for each of its loops (and, in the common case, ephemeral
// values in the loop are likely due to @llvm.assume calls in the loop).
@@ -83,12 +88,19 @@ void CodeMetrics::collectEphemeralValues(const Loop *L, AssumptionTracker *AT,
completeEphemeralValues(WorkSet, EphValues);
}
-void CodeMetrics::collectEphemeralValues(const Function *F, AssumptionTracker *AT,
- SmallPtrSetImpl<const Value*> &EphValues) {
+void CodeMetrics::collectEphemeralValues(
+ const Function *F, AssumptionCache *AC,
+ SmallPtrSetImpl<const Value *> &EphValues) {
SmallVector<const Value *, 16> WorkSet;
- for (auto &I : AT->assumptions(const_cast<Function*>(F)))
+ for (auto &AssumeVH : AC->assumptions()) {
+ if (!AssumeVH)
+ continue;
+ Instruction *I = cast<Instruction>(AssumeVH);
+ assert(I->getParent()->getParent() == F &&
+ "Found assumption for the wrong function!");
WorkSet.push_back(I);
+ }
completeEphemeralValues(WorkSet, EphValues);
}
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index fd8f2ae..fcafb41 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/config.h"
#include "llvm/IR/Constants.h"
@@ -33,7 +34,6 @@
#include "llvm/IR/Operator.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include <cerrno>
#include <cmath>
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
index 1b74f8c..b529c1a 100644
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -83,7 +83,8 @@ CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
bool
CostModelAnalysis::runOnFunction(Function &F) {
this->F = &F;
- TTI = getAnalysisIfAvailable<TargetTransformInfo>();
+ auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
+ TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr;
return false;
}
diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp
index 9334ceb..d603b7b 100644
--- a/lib/Analysis/Delinearization.cpp
+++ b/lib/Analysis/Delinearization.cpp
@@ -59,14 +59,14 @@ public:
void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequired<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<ScalarEvolution>();
}
bool Delinearization::runOnFunction(Function &F) {
this->F = &F;
SE = &getAnalysis<ScalarEvolution>();
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
return false;
}
@@ -141,7 +141,7 @@ char Delinearization::ID = 0;
static const char delinearization_name[] = "Delinearization";
INITIALIZE_PASS_BEGIN(Delinearization, DL_NAME, delinearization_name, true,
true)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(Delinearization, DL_NAME, delinearization_name, true, true)
FunctionPass *llvm::createDelinearizationPass() { return new Delinearization; }
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index 092df5c..fda664b 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -114,7 +114,7 @@ Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore,
INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da",
"Dependence Analysis", true, true)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(DependenceAnalysis, "da",
@@ -132,7 +132,7 @@ bool DependenceAnalysis::runOnFunction(Function &F) {
this->F = &F;
AA = &getAnalysis<AliasAnalysis>();
SE = &getAnalysis<ScalarEvolution>();
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
return false;
}
@@ -145,7 +145,7 @@ void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequiredTransitive<AliasAnalysis>();
AU.addRequiredTransitive<ScalarEvolution>();
- AU.addRequiredTransitive<LoopInfo>();
+ AU.addRequiredTransitive<LoopInfoWrapperPass>();
}
diff --git a/lib/Analysis/FunctionTargetTransformInfo.cpp b/lib/Analysis/FunctionTargetTransformInfo.cpp
deleted file mode 100644
index a686bec..0000000
--- a/lib/Analysis/FunctionTargetTransformInfo.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-//===- llvm/Analysis/FunctionTargetTransformInfo.h --------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass wraps a TargetTransformInfo in a FunctionPass so that it can
-// forward along the current Function so that we can make target specific
-// decisions based on the particular subtarget specified for each Function.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/InitializePasses.h"
-#include "llvm/Analysis/FunctionTargetTransformInfo.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "function-tti"
-static const char ftti_name[] = "Function TargetTransformInfo";
-INITIALIZE_PASS_BEGIN(FunctionTargetTransformInfo, "function_tti", ftti_name, false, true)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
-INITIALIZE_PASS_END(FunctionTargetTransformInfo, "function_tti", ftti_name, false, true)
-char FunctionTargetTransformInfo::ID = 0;
-
-namespace llvm {
-FunctionPass *createFunctionTargetTransformInfoPass() {
- return new FunctionTargetTransformInfo();
-}
-}
-
-FunctionTargetTransformInfo::FunctionTargetTransformInfo()
- : FunctionPass(ID), Fn(nullptr), TTI(nullptr) {
- initializeFunctionTargetTransformInfoPass(*PassRegistry::getPassRegistry());
-}
-
-void FunctionTargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<TargetTransformInfo>();
-}
-
-void FunctionTargetTransformInfo::releaseMemory() {}
-
-bool FunctionTargetTransformInfo::runOnFunction(Function &F) {
- Fn = &F;
- TTI = &getAnalysis<TargetTransformInfo>();
- return false;
-}
diff --git a/lib/Analysis/IPA/Android.mk b/lib/Analysis/IPA/Android.mk
index d56d931..2e5e571 100644
--- a/lib/Analysis/IPA/Android.mk
+++ b/lib/Analysis/IPA/Android.mk
@@ -4,7 +4,6 @@ analysis_ipa_SRC_FILES := \
CallGraph.cpp \
CallGraphSCCPass.cpp \
CallPrinter.cpp \
- FindUsedTypes.cpp \
GlobalsModRef.cpp \
IPA.cpp \
InlineCost.cpp
diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt
index 67b4135..6095136 100644
--- a/lib/Analysis/IPA/CMakeLists.txt
+++ b/lib/Analysis/IPA/CMakeLists.txt
@@ -2,7 +2,6 @@ add_llvm_library(LLVMipa
CallGraph.cpp
CallGraphSCCPass.cpp
CallPrinter.cpp
- FindUsedTypes.cpp
GlobalsModRef.cpp
IPA.cpp
InlineCost.cpp
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index 665aa7f..ded1de7 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -21,8 +21,8 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LegacyPassManagers.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManagers.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp
deleted file mode 100644
index b37344b..0000000
--- a/lib/Analysis/IPA/FindUsedTypes.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-//===- FindUsedTypes.cpp - Find all Types used by a module ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass is used to seek out all of the types in use by the program. Note
-// that this analysis explicitly does not include types only used by the symbol
-// table.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/FindUsedTypes.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-char FindUsedTypes::ID = 0;
-INITIALIZE_PASS(FindUsedTypes, "print-used-types",
- "Find Used Types", false, true)
-
-// IncorporateType - Incorporate one type and all of its subtypes into the
-// collection of used types.
-//
-void FindUsedTypes::IncorporateType(Type *Ty) {
- // If ty doesn't already exist in the used types map, add it now, otherwise
- // return.
- if (!UsedTypes.insert(Ty)) return; // Already contain Ty.
-
- // Make sure to add any types this type references now.
- //
- for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
- I != E; ++I)
- IncorporateType(*I);
-}
-
-void FindUsedTypes::IncorporateValue(const Value *V) {
- IncorporateType(V->getType());
-
- // If this is a constant, it could be using other types...
- if (const Constant *C = dyn_cast<Constant>(V)) {
- if (!isa<GlobalValue>(C))
- for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
- OI != OE; ++OI)
- IncorporateValue(*OI);
- }
-}
-
-
-// run - This incorporates all types used by the specified module
-//
-bool FindUsedTypes::runOnModule(Module &m) {
- UsedTypes.clear(); // reset if run multiple times...
-
- // Loop over global variables, incorporating their types
- for (Module::const_global_iterator I = m.global_begin(), E = m.global_end();
- I != E; ++I) {
- IncorporateType(I->getType());
- if (I->hasInitializer())
- IncorporateValue(I->getInitializer());
- }
-
- for (Module::iterator MI = m.begin(), ME = m.end(); MI != ME; ++MI) {
- IncorporateType(MI->getType());
- const Function &F = *MI;
-
- // Loop over all of the instructions in the function, adding their return
- // type as well as the types of their operands.
- //
- for (const_inst_iterator II = inst_begin(F), IE = inst_end(F);
- II != IE; ++II) {
- const Instruction &I = *II;
-
- IncorporateType(I.getType()); // Incorporate the type of the instruction
- for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
- OI != OE; ++OI)
- IncorporateValue(*OI); // Insert inst operand types as well
- }
- }
-
- return false;
-}
-
-// Print the types found in the module. If the optional Module parameter is
-// passed in, then the types are printed symbolically if possible, using the
-// symbol table from the module.
-//
-void FindUsedTypes::print(raw_ostream &OS, const Module *M) const {
- OS << "Types in use by this module:\n";
- for (SetVector<Type *>::const_iterator I = UsedTypes.begin(),
- E = UsedTypes.end(); I != E; ++I) {
- OS << " " << **I << '\n';
- }
-}
diff --git a/lib/Analysis/IPA/IPA.cpp b/lib/Analysis/IPA/IPA.cpp
index b26c052..806bfb8 100644
--- a/lib/Analysis/IPA/IPA.cpp
+++ b/lib/Analysis/IPA/IPA.cpp
@@ -22,7 +22,6 @@ void llvm::initializeIPA(PassRegistry &Registry) {
initializeCallGraphWrapperPassPass(Registry);
initializeCallGraphPrinterPass(Registry);
initializeCallGraphViewerPass(Registry);
- initializeFindUsedTypesPass(Registry);
initializeGlobalsModRefPass(Registry);
}
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index 85db278..cd494ba 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -17,9 +17,9 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionTracker.h"
-#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CallSite.h"
@@ -52,7 +52,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
const TargetTransformInfo &TTI;
/// The cache of @llvm.assume intrinsics.
- AssumptionTracker *AT;
+ AssumptionCacheTracker *ACT;
// The called function.
Function &F;
@@ -146,8 +146,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
public:
CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI,
- AssumptionTracker *AT, Function &Callee, int Threshold)
- : DL(DL), TTI(TTI), AT(AT), F(Callee), Threshold(Threshold), Cost(0),
+ AssumptionCacheTracker *ACT, Function &Callee, int Threshold)
+ : DL(DL), TTI(TTI), ACT(ACT), F(Callee), Threshold(Threshold), Cost(0),
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
@@ -601,7 +601,13 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
if (!isa<Constant>(RHS))
if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
RHS = SimpleRHS;
- Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
+ Value *SimpleV = nullptr;
+ if (auto FI = dyn_cast<FPMathOperator>(&I))
+ SimpleV =
+ SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
+ else
+ SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
+
if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
SimplifiedValues[&I] = C;
return true;
@@ -713,8 +719,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
bool CallAnalyzer::visitCallSite(CallSite CS) {
if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
- !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::ReturnsTwice)) {
+ !F.hasFnAttribute(Attribute::ReturnsTwice)) {
// This aborts the entire analysis.
ExposesReturnsTwice = true;
return false;
@@ -783,7 +788,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
// during devirtualization and so we want to give it a hefty bonus for
// inlining, but cap that bonus in the event that inlining wouldn't pan
// out. Pretend to inline the function, with a custom threshold.
- CallAnalyzer CA(DL, TTI, AT, *F, InlineConstants::IndirectCallThreshold);
+ CallAnalyzer CA(DL, TTI, ACT, *F, InlineConstants::IndirectCallThreshold);
if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the
// bonus we want to apply, but don't go below zero.
@@ -907,6 +912,25 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
++NumVectorInstructions;
+ // If the instruction is floating point, and the target says this operation is
+ // expensive or the function has the "use-soft-float" attribute, this may
+ // eventually become a library call. Treat the cost as such.
+ if (I->getType()->isFloatingPointTy()) {
+ bool hasSoftFloatAttr = false;
+
+ // If the function has the "use-soft-float" attribute, mark it as expensive.
+ if (F.hasFnAttribute("use-soft-float")) {
+ Attribute Attr = F.getFnAttribute("use-soft-float");
+ StringRef Val = Attr.getValueAsString();
+ if (Val == "true")
+ hasSoftFloatAttr = true;
+ }
+
+ if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
+ hasSoftFloatAttr)
+ Cost += InlineConstants::CallPenalty;
+ }
+
// If the instruction simplified to a constant, there is no cost to this
// instruction. Visit the instructions using our InstVisitor to account for
// all of the per-instruction logic. The visit tree returns true if we
@@ -1110,7 +1134,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// the ephemeral values multiple times (and they're completely determined by
// the callee, so this is purely duplicate work).
SmallPtrSet<const Value *, 32> EphValues;
- CodeMetrics::collectEphemeralValues(&F, AT, EphValues);
+ CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F), EphValues);
// The worklist of live basic blocks in the callee *after* inlining. We avoid
// adding basic blocks of the callee which can be proven to be dead for this
@@ -1232,8 +1256,8 @@ void CallAnalyzer::dump() {
INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
true, true)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
-INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
true, true)
@@ -1245,14 +1269,14 @@ InlineCostAnalysis::~InlineCostAnalysis() {}
void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequired<AssumptionTracker>();
- AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
- TTI = &getAnalysis<TargetTransformInfo>();
- AT = &getAnalysis<AssumptionTracker>();
+ TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
+ ACT = &getAnalysis<AssumptionCacheTracker>();
return false;
}
@@ -1309,7 +1333,8 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "...\n");
- CallAnalyzer CA(Callee->getDataLayout(), *TTI, AT, *Callee, Threshold);
+ CallAnalyzer CA(Callee->getDataLayout(), TTIWP->getTTI(*Callee),
+ ACT, *Callee, Threshold);
bool ShouldInline = CA.analyzeCall(CS);
DEBUG(CA.dump());
@@ -1324,9 +1349,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
}
bool InlineCostAnalysis::isInlineViable(Function &F) {
- bool ReturnsTwice =
- F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::ReturnsTwice);
+ bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
// Disallow inlining of functions which contain indirect branches or
// blockaddresses.
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 6b5f370..140753c 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -33,7 +33,7 @@ using namespace llvm;
char IVUsers::ID = 0;
INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
"Induction Variable Users", false, true)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_END(IVUsers, "iv-users",
@@ -241,7 +241,7 @@ IVUsers::IVUsers()
}
void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolution>();
AU.setPreservesAll();
@@ -250,7 +250,7 @@ void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
L = l;
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &getAnalysis<ScalarEvolution>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index f151a3a..0cb0982 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -31,6 +32,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
+#include <algorithm>
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -46,19 +48,21 @@ struct Query {
const DataLayout *DL;
const TargetLibraryInfo *TLI;
const DominatorTree *DT;
- AssumptionTracker *AT;
+ AssumptionCache *AC;
const Instruction *CxtI;
Query(const DataLayout *DL, const TargetLibraryInfo *tli,
- const DominatorTree *dt, AssumptionTracker *at = nullptr,
+ const DominatorTree *dt, AssumptionCache *ac = nullptr,
const Instruction *cxti = nullptr)
- : DL(DL), TLI(tli), DT(dt), AT(at), CxtI(cxti) {}
+ : DL(DL), TLI(tli), DT(dt), AC(ac), CxtI(cxti) {}
};
} // end anonymous namespace
static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned);
static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &,
unsigned);
+static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &,
+ const Query &, unsigned);
static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &,
unsigned);
static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned);
@@ -581,10 +585,10 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT, AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW,
- Query (DL, TLI, DT, AT, CxtI), RecursionLimit);
+ return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
}
/// \brief Compute the base pointer and cumulative constant offsets for V.
@@ -683,17 +687,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
if (Op0 == Op1)
return Constant::getNullValue(Op0->getType());
- // X - (0 - Y) -> X if the second sub is NUW.
- // If Y != 0, 0 - Y is a poison value.
- // If Y == 0, 0 - Y simplifies to 0.
- if (BinaryOperator::isNeg(Op1)) {
- if (const auto *BO = dyn_cast<BinaryOperator>(Op1)) {
- assert(BO->getOpcode() == Instruction::Sub &&
- "Expected a subtraction operator!");
- if (BO->hasNoUnsignedWrap())
- return Op0;
- }
- }
+ // 0 - X -> 0 if the sub is NUW.
+ if (isNUW && match(Op0, m_Zero()))
+ return Op0;
// (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies.
// For example, (X + Y) - Y -> X; (Y + X) - Y -> X
@@ -788,10 +784,10 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT, AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifySubInst(Op0, Op1, isNSW, isNUW,
- Query (DL, TLI, DT, AT, CxtI), RecursionLimit);
+ return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
}
/// Given operands for an FAdd, see if we can fold the result. If not, this
@@ -966,37 +962,37 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
}
Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT, AssumptionTracker *AT,
- const Instruction *CxtI) {
- return ::SimplifyFAddInst(Op0, Op1, FMF, Query (DL, TLI, DT, AT, CxtI),
+ const DataLayout *DL,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyFAddInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT, AssumptionTracker *AT,
- const Instruction *CxtI) {
- return ::SimplifyFSubInst(Op0, Op1, FMF, Query (DL, TLI, DT, AT, CxtI),
+ const DataLayout *DL,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyFSubInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
-Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1,
- FastMathFlags FMF,
+Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyFMulInst(Op0, Op1, FMF, Query (DL, TLI, DT, AT, CxtI),
+ return ::SimplifyFMulInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT, AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyMulInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI),
+ return ::SimplifyMulInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -1017,6 +1013,10 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
if (match(Op1, m_Undef()))
return Op1;
+ // X / 0 -> undef, we don't need to preserve faults!
+ if (match(Op1, m_Zero()))
+ return UndefValue::get(Op1->getType());
+
// undef / X -> 0
if (match(Op0, m_Undef()))
return Constant::getNullValue(Op0->getType());
@@ -1094,10 +1094,9 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifySDivInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI),
+ return ::SimplifySDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -1113,15 +1112,14 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyUDivInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI),
+ return ::SimplifyUDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
-static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q,
- unsigned) {
+static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const Query &Q, unsigned) {
// undef / X -> undef (the undef could be a snan).
if (match(Op0, m_Undef()))
return Op0;
@@ -1130,15 +1128,21 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q,
if (match(Op1, m_Undef()))
return Op1;
+ // 0 / X -> 0
+ // Requires that NaNs are off (X could be zero) and signed zeroes are
+ // ignored (X could be positive or negative, so the output sign is unknown).
+ if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero()))
+ return Op0;
+
return nullptr;
}
-Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyFDivInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI),
+ return ::SimplifyFDivInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -1215,10 +1219,9 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifySRemInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI),
+ return ::SimplifySRemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -1234,15 +1237,14 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyURemInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI),
+ return ::SimplifyURemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
-static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &,
- unsigned) {
+static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const Query &, unsigned) {
// undef % X -> undef (the undef could be a snan).
if (match(Op0, m_Undef()))
return Op0;
@@ -1251,15 +1253,21 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &,
if (match(Op1, m_Undef()))
return Op1;
+ // 0 % X -> 0
+ // Requires that NaNs are off (X could be zero) and signed zeroes are
+ // ignored (X could be positive or negative, so the output sign is unknown).
+ if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero()))
+ return Op0;
+
return nullptr;
}
-Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyFRemInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI),
+ return ::SimplifyFRemInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -1340,13 +1348,18 @@ static Value *SimplifyRightShift(unsigned Opcode, Value *Op0, Value *Op1,
if (Op0 == Op1)
return Constant::getNullValue(Op0->getType());
+ // undef >> X -> 0
+ // undef >> X -> undef (if it's exact)
+ if (match(Op0, m_Undef()))
+ return isExact ? Op0 : Constant::getNullValue(Op0->getType());
+
// The low bit cannot be shifted out of an exact shift if it is set.
if (isExact) {
unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
APInt Op0KnownZero(BitWidth, 0);
APInt Op0KnownOne(BitWidth, 0);
- computeKnownBits(Op0, Op0KnownZero, Op0KnownOne, Q.DL, /*Depth=*/0, Q.AT, Q.CxtI,
- Q.DT);
+ computeKnownBits(Op0, Op0KnownZero, Op0KnownOne, Q.DL, /*Depth=*/0, Q.AC,
+ Q.CxtI, Q.DT);
if (Op0KnownOne[0])
return Op0;
}
@@ -1362,8 +1375,9 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
return V;
// undef << X -> 0
+ // undef << X -> undef if (if it's NSW/NUW)
if (match(Op0, m_Undef()))
- return Constant::getNullValue(Op0->getType());
+ return isNSW || isNUW ? Op0 : Constant::getNullValue(Op0->getType());
// (X >> A) << A -> X
Value *X;
@@ -1374,9 +1388,9 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT, AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT, AT, CxtI),
+ return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -1388,10 +1402,6 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
MaxRecurse))
return V;
- // undef >>l X -> 0
- if (match(Op0, m_Undef()))
- return Constant::getNullValue(Op0->getType());
-
// (X << A) >> A -> X
Value *X;
if (match(Op0, m_NUWShl(m_Value(X), m_Specific(Op1))))
@@ -1403,10 +1413,9 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyLShrInst(Op0, Op1, isExact, Query (DL, TLI, DT, AT, CxtI),
+ return ::SimplifyLShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -1422,17 +1431,13 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
if (match(Op0, m_AllOnes()))
return Op0;
- // undef >>a X -> all ones
- if (match(Op0, m_Undef()))
- return Constant::getAllOnesValue(Op0->getType());
-
// (X << A) >> A -> X
Value *X;
if (match(Op0, m_NSWShl(m_Value(X), m_Specific(Op1))))
return X;
// Arithmetic shifting an all-sign-bit value is a no-op.
- unsigned NumSignBits = ComputeNumSignBits(Op0, Q.DL, 0, Q.AT, Q.CxtI, Q.DT);
+ unsigned NumSignBits = ComputeNumSignBits(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
if (NumSignBits == Op0->getType()->getScalarSizeInBits())
return Op0;
@@ -1442,19 +1447,63 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyAShrInst(Op0, Op1, isExact, Query (DL, TLI, DT, AT, CxtI),
+ return ::SimplifyAShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
+static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
+ ICmpInst *UnsignedICmp, bool IsAnd) {
+ Value *X, *Y;
+
+ ICmpInst::Predicate EqPred;
+ if (!match(ZeroICmp, m_ICmp(EqPred, m_Value(Y), m_Zero())) ||
+ !ICmpInst::isEquality(EqPred))
+ return nullptr;
+
+ ICmpInst::Predicate UnsignedPred;
+ if (match(UnsignedICmp, m_ICmp(UnsignedPred, m_Value(X), m_Specific(Y))) &&
+ ICmpInst::isUnsigned(UnsignedPred))
+ ;
+ else if (match(UnsignedICmp,
+ m_ICmp(UnsignedPred, m_Value(Y), m_Specific(X))) &&
+ ICmpInst::isUnsigned(UnsignedPred))
+ UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred);
+ else
+ return nullptr;
+
+ // X < Y && Y != 0 --> X < Y
+ // X < Y || Y != 0 --> Y != 0
+ if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE)
+ return IsAnd ? UnsignedICmp : ZeroICmp;
+
+ // X >= Y || Y != 0 --> true
+ // X >= Y || Y == 0 --> X >= Y
+ if (UnsignedPred == ICmpInst::ICMP_UGE && !IsAnd) {
+ if (EqPred == ICmpInst::ICMP_NE)
+ return getTrue(UnsignedICmp->getType());
+ return UnsignedICmp;
+ }
+
+ // X < Y && Y == 0 --> false
+ if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_EQ &&
+ IsAnd)
+ return getFalse(UnsignedICmp->getType());
+
+ return nullptr;
+}
+
// Simplify (and (icmp ...) (icmp ...)) to true when we can tell that the range
// of possible values cannot be satisfied.
static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
ICmpInst::Predicate Pred0, Pred1;
ConstantInt *CI1, *CI2;
Value *V;
+
+ if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true))
+ return X;
+
if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_ConstantInt(CI1)),
m_ConstantInt(CI2))))
return nullptr;
@@ -1547,9 +1596,9 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
// A & (-A) = A if A is a power of two or zero.
if (match(Op0, m_Neg(m_Specific(Op1))) ||
match(Op1, m_Neg(m_Specific(Op0)))) {
- if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/true, 0, Q.AT, Q.CxtI, Q.DT))
+ if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
return Op0;
- if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true, 0, Q.AT, Q.CxtI, Q.DT))
+ if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
return Op1;
}
@@ -1596,9 +1645,9 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT, AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyAndInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI),
+ return ::SimplifyAndInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -1608,6 +1657,10 @@ static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
ICmpInst::Predicate Pred0, Pred1;
ConstantInt *CI1, *CI2;
Value *V;
+
+ if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false))
+ return X;
+
if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_ConstantInt(CI1)),
m_ConstantInt(CI2))))
return nullptr;
@@ -1748,22 +1801,22 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
if ((C2->getValue() & (C2->getValue() + 1)) == 0 && // C2 == 0+1+
match(A, m_Add(m_Value(V1), m_Value(V2)))) {
// Add commutes, try both ways.
- if (V1 == B && MaskedValueIsZero(V2, C2->getValue(), Q.DL,
- 0, Q.AT, Q.CxtI, Q.DT))
+ if (V1 == B &&
+ MaskedValueIsZero(V2, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return A;
- if (V2 == B && MaskedValueIsZero(V1, C2->getValue(), Q.DL,
- 0, Q.AT, Q.CxtI, Q.DT))
+ if (V2 == B &&
+ MaskedValueIsZero(V1, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return A;
}
// Or commutes, try both ways.
if ((C1->getValue() & (C1->getValue() + 1)) == 0 &&
match(B, m_Add(m_Value(V1), m_Value(V2)))) {
// Add commutes, try both ways.
- if (V1 == A && MaskedValueIsZero(V2, C1->getValue(), Q.DL,
- 0, Q.AT, Q.CxtI, Q.DT))
+ if (V1 == A &&
+ MaskedValueIsZero(V2, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return B;
- if (V2 == A && MaskedValueIsZero(V1, C1->getValue(), Q.DL,
- 0, Q.AT, Q.CxtI, Q.DT))
+ if (V2 == A &&
+ MaskedValueIsZero(V1, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return B;
}
}
@@ -1780,9 +1833,9 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT, AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyOrInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI),
+ return ::SimplifyOrInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -1837,9 +1890,9 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT, AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyXorInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI),
+ return ::SimplifyXorInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -2015,6 +2068,50 @@ static Constant *computePointerICmp(const DataLayout *DL,
return ConstantExpr::getICmp(Pred,
ConstantExpr::getAdd(LHSOffset, LHSNoBound),
ConstantExpr::getAdd(RHSOffset, RHSNoBound));
+
+ // If one side of the equality comparison must come from a noalias call
+ // (meaning a system memory allocation function), and the other side must
+ // come from a pointer that cannot overlap with dynamically-allocated
+ // memory within the lifetime of the current function (allocas, byval
+ // arguments, globals), then determine the comparison result here.
+ SmallVector<Value *, 8> LHSUObjs, RHSUObjs;
+ GetUnderlyingObjects(LHS, LHSUObjs, DL);
+ GetUnderlyingObjects(RHS, RHSUObjs, DL);
+
+ // Is the set of underlying objects all noalias calls?
+ auto IsNAC = [](SmallVectorImpl<Value *> &Objects) {
+ return std::all_of(Objects.begin(), Objects.end(),
+ [](Value *V){ return isNoAliasCall(V); });
+ };
+
+ // Is the set of underlying objects all things which must be disjoint from
+ // noalias calls. For allocas, we consider only static ones (dynamic
+ // allocas might be transformed into calls to malloc not simultaneously
+ // live with the compared-to allocation). For globals, we exclude symbols
+ // that might be resolve lazily to symbols in another dynamically-loaded
+ // library (and, thus, could be malloc'ed by the implementation).
+ auto IsAllocDisjoint = [](SmallVectorImpl<Value *> &Objects) {
+ return std::all_of(Objects.begin(), Objects.end(),
+ [](Value *V){
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
+ return AI->getParent() && AI->getParent()->getParent() &&
+ AI->isStaticAlloca();
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+ return (GV->hasLocalLinkage() ||
+ GV->hasHiddenVisibility() ||
+ GV->hasProtectedVisibility() ||
+ GV->hasUnnamedAddr()) &&
+ !GV->isThreadLocal();
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return A->hasByValAttr();
+ return false;
+ });
+ };
+
+ if ((IsNAC(LHSUObjs) && IsAllocDisjoint(RHSUObjs)) ||
+ (IsNAC(RHSUObjs) && IsAllocDisjoint(LHSUObjs)))
+ return ConstantInt::get(GetCompareTy(LHS),
+ !CmpInst::isTrueWhenEqual(Pred));
}
// Otherwise, fail.
@@ -2094,46 +2191,46 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getTrue(ITy);
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_ULE:
- if (isKnownNonZero(LHS, Q.DL, 0, Q.AT, Q.CxtI, Q.DT))
+ if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return getFalse(ITy);
break;
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_UGT:
- if (isKnownNonZero(LHS, Q.DL, 0, Q.AT, Q.CxtI, Q.DT))
+ if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return getTrue(ITy);
break;
case ICmpInst::ICMP_SLT:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL,
- 0, Q.AT, Q.CxtI, Q.DT);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+ Q.CxtI, Q.DT);
if (LHSKnownNegative)
return getTrue(ITy);
if (LHSKnownNonNegative)
return getFalse(ITy);
break;
case ICmpInst::ICMP_SLE:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL,
- 0, Q.AT, Q.CxtI, Q.DT);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+ Q.CxtI, Q.DT);
if (LHSKnownNegative)
return getTrue(ITy);
- if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL,
- 0, Q.AT, Q.CxtI, Q.DT))
+ if (LHSKnownNonNegative &&
+ isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return getFalse(ITy);
break;
case ICmpInst::ICMP_SGE:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL,
- 0, Q.AT, Q.CxtI, Q.DT);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+ Q.CxtI, Q.DT);
if (LHSKnownNegative)
return getFalse(ITy);
if (LHSKnownNonNegative)
return getTrue(ITy);
break;
case ICmpInst::ICMP_SGT:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL,
- 0, Q.AT, Q.CxtI, Q.DT);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+ Q.CxtI, Q.DT);
if (LHSKnownNegative)
return getFalse(ITy);
- if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL,
- 0, Q.AT, Q.CxtI, Q.DT))
+ if (LHSKnownNonNegative &&
+ isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return getTrue(ITy);
break;
}
@@ -2485,6 +2582,40 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
+ // icmp pred (or X, Y), X
+ if (LBO && match(LBO, m_CombineOr(m_Or(m_Value(), m_Specific(RHS)),
+ m_Or(m_Specific(RHS), m_Value())))) {
+ if (Pred == ICmpInst::ICMP_ULT)
+ return getFalse(ITy);
+ if (Pred == ICmpInst::ICMP_UGE)
+ return getTrue(ITy);
+ }
+ // icmp pred X, (or X, Y)
+ if (RBO && match(RBO, m_CombineOr(m_Or(m_Value(), m_Specific(LHS)),
+ m_Or(m_Specific(LHS), m_Value())))) {
+ if (Pred == ICmpInst::ICMP_ULE)
+ return getTrue(ITy);
+ if (Pred == ICmpInst::ICMP_UGT)
+ return getFalse(ITy);
+ }
+
+ // icmp pred (and X, Y), X
+ if (LBO && match(LBO, m_CombineOr(m_And(m_Value(), m_Specific(RHS)),
+ m_And(m_Specific(RHS), m_Value())))) {
+ if (Pred == ICmpInst::ICMP_UGT)
+ return getFalse(ITy);
+ if (Pred == ICmpInst::ICMP_ULE)
+ return getTrue(ITy);
+ }
+ // icmp pred X, (and X, Y)
+ if (RBO && match(RBO, m_CombineOr(m_And(m_Value(), m_Specific(LHS)),
+ m_And(m_Specific(LHS), m_Value())))) {
+ if (Pred == ICmpInst::ICMP_UGE)
+ return getTrue(ITy);
+ if (Pred == ICmpInst::ICMP_ULT)
+ return getFalse(ITy);
+ }
+
// 0 - (zext X) pred C
if (!CmpInst::isUnsigned(Pred) && match(LHS, m_Neg(m_ZExt(m_Value())))) {
if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
@@ -2515,8 +2646,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
break;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL,
- 0, Q.AT, Q.CxtI, Q.DT);
+ ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC,
+ Q.CxtI, Q.DT);
if (!KnownNonNegative)
break;
// fall-through
@@ -2526,8 +2657,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getFalse(ITy);
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL,
- 0, Q.AT, Q.CxtI, Q.DT);
+ ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC,
+ Q.CxtI, Q.DT);
if (!KnownNonNegative)
break;
// fall-through
@@ -2546,8 +2677,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
break;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL,
- 0, Q.AT, Q.CxtI, Q.DT);
+ ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC,
+ Q.CxtI, Q.DT);
if (!KnownNonNegative)
break;
// fall-through
@@ -2557,8 +2688,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getTrue(ITy);
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL,
- 0, Q.AT, Q.CxtI, Q.DT);
+ ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC,
+ Q.CxtI, Q.DT);
if (!KnownNonNegative)
break;
// fall-through
@@ -2867,7 +2998,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
uint32_t BitWidth = CI->getBitWidth();
APInt LHSKnownZero(BitWidth, 0);
APInt LHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, Q.DL, /*Depth=*/0, Q.AT,
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, Q.DL, /*Depth=*/0, Q.AC,
Q.CxtI, Q.DT);
const APInt &RHSVal = CI->getValue();
if (((LHSKnownZero & RHSVal) != 0) || ((LHSKnownOne & ~RHSVal) != 0))
@@ -2895,10 +3026,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
Instruction *CxtI) {
- return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT, AT, CxtI),
+ return ::SimplifyICmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -2936,44 +3066,57 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
// Handle fcmp with constant RHS
- if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
// If the constant is a nan, see if we can fold the comparison based on it.
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
- if (CFP->getValueAPF().isNaN()) {
- if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo"
+ if (CFP->getValueAPF().isNaN()) {
+ if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo"
+ return ConstantInt::getFalse(CFP->getContext());
+ assert(FCmpInst::isUnordered(Pred) &&
+ "Comparison must be either ordered or unordered!");
+ // True if unordered.
+ return ConstantInt::getTrue(CFP->getContext());
+ }
+ // Check whether the constant is an infinity.
+ if (CFP->getValueAPF().isInfinity()) {
+ if (CFP->getValueAPF().isNegative()) {
+ switch (Pred) {
+ case FCmpInst::FCMP_OLT:
+ // No value is ordered and less than negative infinity.
return ConstantInt::getFalse(CFP->getContext());
- assert(FCmpInst::isUnordered(Pred) &&
- "Comparison must be either ordered or unordered!");
- // True if unordered.
- return ConstantInt::getTrue(CFP->getContext());
- }
- // Check whether the constant is an infinity.
- if (CFP->getValueAPF().isInfinity()) {
- if (CFP->getValueAPF().isNegative()) {
- switch (Pred) {
- case FCmpInst::FCMP_OLT:
- // No value is ordered and less than negative infinity.
- return ConstantInt::getFalse(CFP->getContext());
- case FCmpInst::FCMP_UGE:
- // All values are unordered with or at least negative infinity.
- return ConstantInt::getTrue(CFP->getContext());
- default:
- break;
- }
- } else {
- switch (Pred) {
- case FCmpInst::FCMP_OGT:
- // No value is ordered and greater than infinity.
- return ConstantInt::getFalse(CFP->getContext());
- case FCmpInst::FCMP_ULE:
- // All values are unordered with and at most infinity.
- return ConstantInt::getTrue(CFP->getContext());
- default:
- break;
- }
+ case FCmpInst::FCMP_UGE:
+ // All values are unordered with or at least negative infinity.
+ return ConstantInt::getTrue(CFP->getContext());
+ default:
+ break;
+ }
+ } else {
+ switch (Pred) {
+ case FCmpInst::FCMP_OGT:
+ // No value is ordered and greater than infinity.
+ return ConstantInt::getFalse(CFP->getContext());
+ case FCmpInst::FCMP_ULE:
+ // All values are unordered with and at most infinity.
+ return ConstantInt::getTrue(CFP->getContext());
+ default:
+ break;
}
}
}
+ if (CFP->getValueAPF().isZero()) {
+ switch (Pred) {
+ case FCmpInst::FCMP_UGE:
+ if (CannotBeOrderedLessThanZero(LHS))
+ return ConstantInt::getTrue(CFP->getContext());
+ break;
+ case FCmpInst::FCMP_OLT:
+ // X < 0
+ if (CannotBeOrderedLessThanZero(LHS))
+ return ConstantInt::getFalse(CFP->getContext());
+ break;
+ default:
+ break;
+ }
+ }
}
// If the comparison is with the result of a select instruction, check whether
@@ -2994,10 +3137,9 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT, AT, CxtI),
+ return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -3029,17 +3171,71 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X
return TrueVal;
+ const auto *ICI = dyn_cast<ICmpInst>(CondVal);
+ unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits();
+ if (ICI && BitWidth) {
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ APInt MinSignedValue = APInt::getSignBit(BitWidth);
+ Value *X;
+ const APInt *Y;
+ bool TrueWhenUnset;
+ bool IsBitTest = false;
+ if (ICmpInst::isEquality(Pred) &&
+ match(ICI->getOperand(0), m_And(m_Value(X), m_APInt(Y))) &&
+ match(ICI->getOperand(1), m_Zero())) {
+ IsBitTest = true;
+ TrueWhenUnset = Pred == ICmpInst::ICMP_EQ;
+ } else if (Pred == ICmpInst::ICMP_SLT &&
+ match(ICI->getOperand(1), m_Zero())) {
+ X = ICI->getOperand(0);
+ Y = &MinSignedValue;
+ IsBitTest = true;
+ TrueWhenUnset = false;
+ } else if (Pred == ICmpInst::ICMP_SGT &&
+ match(ICI->getOperand(1), m_AllOnes())) {
+ X = ICI->getOperand(0);
+ Y = &MinSignedValue;
+ IsBitTest = true;
+ TrueWhenUnset = true;
+ }
+ if (IsBitTest) {
+ const APInt *C;
+ // (X & Y) == 0 ? X & ~Y : X --> X
+ // (X & Y) != 0 ? X & ~Y : X --> X & ~Y
+ if (FalseVal == X && match(TrueVal, m_And(m_Specific(X), m_APInt(C))) &&
+ *Y == ~*C)
+ return TrueWhenUnset ? FalseVal : TrueVal;
+ // (X & Y) == 0 ? X : X & ~Y --> X & ~Y
+ // (X & Y) != 0 ? X : X & ~Y --> X
+ if (TrueVal == X && match(FalseVal, m_And(m_Specific(X), m_APInt(C))) &&
+ *Y == ~*C)
+ return TrueWhenUnset ? FalseVal : TrueVal;
+
+ if (Y->isPowerOf2()) {
+ // (X & Y) == 0 ? X | Y : X --> X | Y
+ // (X & Y) != 0 ? X | Y : X --> X
+ if (FalseVal == X && match(TrueVal, m_Or(m_Specific(X), m_APInt(C))) &&
+ *Y == *C)
+ return TrueWhenUnset ? TrueVal : FalseVal;
+ // (X & Y) == 0 ? X : X | Y --> X
+ // (X & Y) != 0 ? X : X | Y --> X | Y
+ if (TrueVal == X && match(FalseVal, m_Or(m_Specific(X), m_APInt(C))) &&
+ *Y == *C)
+ return TrueWhenUnset ? TrueVal : FalseVal;
+ }
+ }
+ }
+
return nullptr;
}
Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
return ::SimplifySelectInst(Cond, TrueVal, FalseVal,
- Query (DL, TLI, DT, AT, CxtI), RecursionLimit);
+ Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
}
/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
@@ -3126,9 +3322,9 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT, AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyGEPInst(Ops, Query (DL, TLI, DT, AT, CxtI), RecursionLimit);
+ return ::SimplifyGEPInst(Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
}
/// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
@@ -3160,15 +3356,11 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
return nullptr;
}
-Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
- ArrayRef<unsigned> Idxs,
- const DataLayout *DL,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- AssumptionTracker *AT,
- const Instruction *CxtI) {
- return ::SimplifyInsertValueInst(Agg, Val, Idxs,
- Query (DL, TLI, DT, AT, CxtI),
+Value *llvm::SimplifyInsertValueInst(
+ Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const DataLayout *DL,
+ const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -3215,10 +3407,9 @@ static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) {
Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyTruncInst(Op, Ty, Query (DL, TLI, DT, AT, CxtI),
+ return ::SimplifyTruncInst(Op, Ty, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -3246,10 +3437,12 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return SimplifyFMulInst (LHS, RHS, FastMathFlags(), Q, MaxRecurse);
case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse);
case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse);
- case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::FDiv:
+ return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
case Instruction::SRem: return SimplifySRemInst(LHS, RHS, Q, MaxRecurse);
case Instruction::URem: return SimplifyURemInst(LHS, RHS, Q, MaxRecurse);
- case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::FRem:
+ return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
case Instruction::Shl:
return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
Q, MaxRecurse);
@@ -3289,14 +3482,42 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
}
}
+/// SimplifyFPBinOp - Given operands for a BinaryOperator, see if we can
+/// fold the result. If not, this returns null.
+/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the
+/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp.
+static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ const FastMathFlags &FMF, const Query &Q,
+ unsigned MaxRecurse) {
+ switch (Opcode) {
+ case Instruction::FAdd:
+ return SimplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse);
+ case Instruction::FSub:
+ return SimplifyFSubInst(LHS, RHS, FMF, Q, MaxRecurse);
+ case Instruction::FMul:
+ return SimplifyFMulInst(LHS, RHS, FMF, Q, MaxRecurse);
+ default:
+ return SimplifyBinOp(Opcode, LHS, RHS, Q, MaxRecurse);
+ }
+}
+
Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT, AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyBinOp(Opcode, LHS, RHS, Query (DL, TLI, DT, AT, CxtI),
+ return ::SimplifyBinOp(Opcode, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
+Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ const FastMathFlags &FMF, const DataLayout *DL,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
+}
+
/// SimplifyCmpInst - Given operands for a CmpInst, see if we can
/// fold the result.
static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
@@ -3308,9 +3529,9 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT, AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT, AT, CxtI),
+ return ::SimplifyCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -3384,27 +3605,25 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin,
User::op_iterator ArgEnd, const DataLayout *DL,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT, AssumptionTracker *AT,
- const Instruction *CxtI) {
- return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, AT, CxtI),
+ const TargetLibraryInfo *TLI, const DominatorTree *DT,
+ AssumptionCache *AC, const Instruction *CxtI) {
+ return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT, AssumptionTracker *AT,
+ const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
return ::SimplifyCall(V, Args.begin(), Args.end(),
- Query(DL, TLI, DT, AT, CxtI), RecursionLimit);
+ Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
}
/// SimplifyInstruction - See if we can compute a simplified version of this
/// instruction. If not, this returns null.
Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- AssumptionTracker *AT) {
+ const DominatorTree *DT, AssumptionCache *AC) {
Value *Result;
switch (I->getOpcode()) {
@@ -3413,122 +3632,122 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL,
break;
case Instruction::FAdd:
Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1),
- I->getFastMathFlags(), DL, TLI, DT, AT, I);
+ I->getFastMathFlags(), DL, TLI, DT, AC, I);
break;
case Instruction::Add:
Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->hasNoSignedWrap(),
- cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
- DL, TLI, DT, AT, I);
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL,
+ TLI, DT, AC, I);
break;
case Instruction::FSub:
Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1),
- I->getFastMathFlags(), DL, TLI, DT, AT, I);
+ I->getFastMathFlags(), DL, TLI, DT, AC, I);
break;
case Instruction::Sub:
Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->hasNoSignedWrap(),
- cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
- DL, TLI, DT, AT, I);
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL,
+ TLI, DT, AC, I);
break;
case Instruction::FMul:
Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1),
- I->getFastMathFlags(), DL, TLI, DT, AT, I);
+ I->getFastMathFlags(), DL, TLI, DT, AC, I);
break;
case Instruction::Mul:
- Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1),
- DL, TLI, DT, AT, I);
+ Result =
+ SimplifyMulInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I);
break;
case Instruction::SDiv:
- Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1),
- DL, TLI, DT, AT, I);
+ Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
+ AC, I);
break;
case Instruction::UDiv:
- Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1),
- DL, TLI, DT, AT, I);
+ Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
+ AC, I);
break;
case Instruction::FDiv:
Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1),
- DL, TLI, DT, AT, I);
+ I->getFastMathFlags(), DL, TLI, DT, AC, I);
break;
case Instruction::SRem:
- Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1),
- DL, TLI, DT, AT, I);
+ Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
+ AC, I);
break;
case Instruction::URem:
- Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1),
- DL, TLI, DT, AT, I);
+ Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
+ AC, I);
break;
case Instruction::FRem:
Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1),
- DL, TLI, DT, AT, I);
+ I->getFastMathFlags(), DL, TLI, DT, AC, I);
break;
case Instruction::Shl:
Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->hasNoSignedWrap(),
- cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
- DL, TLI, DT, AT, I);
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL,
+ TLI, DT, AC, I);
break;
case Instruction::LShr:
Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
- cast<BinaryOperator>(I)->isExact(),
- DL, TLI, DT, AT, I);
+ cast<BinaryOperator>(I)->isExact(), DL, TLI, DT,
+ AC, I);
break;
case Instruction::AShr:
Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
- cast<BinaryOperator>(I)->isExact(),
- DL, TLI, DT, AT, I);
+ cast<BinaryOperator>(I)->isExact(), DL, TLI, DT,
+ AC, I);
break;
case Instruction::And:
- Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1),
- DL, TLI, DT, AT, I);
+ Result =
+ SimplifyAndInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I);
break;
case Instruction::Or:
- Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
- AT, I);
+ Result =
+ SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I);
break;
case Instruction::Xor:
- Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1),
- DL, TLI, DT, AT, I);
+ Result =
+ SimplifyXorInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I);
break;
case Instruction::ICmp:
- Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
- I->getOperand(0), I->getOperand(1),
- DL, TLI, DT, AT, I);
+ Result =
+ SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), I->getOperand(0),
+ I->getOperand(1), DL, TLI, DT, AC, I);
break;
case Instruction::FCmp:
- Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
- I->getOperand(0), I->getOperand(1),
- DL, TLI, DT, AT, I);
+ Result =
+ SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), I->getOperand(0),
+ I->getOperand(1), DL, TLI, DT, AC, I);
break;
case Instruction::Select:
Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
- I->getOperand(2), DL, TLI, DT, AT, I);
+ I->getOperand(2), DL, TLI, DT, AC, I);
break;
case Instruction::GetElementPtr: {
SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
- Result = SimplifyGEPInst(Ops, DL, TLI, DT, AT, I);
+ Result = SimplifyGEPInst(Ops, DL, TLI, DT, AC, I);
break;
}
case Instruction::InsertValue: {
InsertValueInst *IV = cast<InsertValueInst>(I);
Result = SimplifyInsertValueInst(IV->getAggregateOperand(),
IV->getInsertedValueOperand(),
- IV->getIndices(), DL, TLI, DT, AT, I);
+ IV->getIndices(), DL, TLI, DT, AC, I);
break;
}
case Instruction::PHI:
- Result = SimplifyPHINode(cast<PHINode>(I), Query (DL, TLI, DT, AT, I));
+ Result = SimplifyPHINode(cast<PHINode>(I), Query(DL, TLI, DT, AC, I));
break;
case Instruction::Call: {
CallSite CS(cast<CallInst>(I));
- Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(),
- DL, TLI, DT, AT, I);
+ Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), DL,
+ TLI, DT, AC, I);
break;
}
case Instruction::Trunc:
- Result = SimplifyTruncInst(I->getOperand(0), I->getType(), DL, TLI, DT,
- AT, I);
+ Result =
+ SimplifyTruncInst(I->getOperand(0), I->getType(), DL, TLI, DT, AC, I);
break;
}
@@ -3553,7 +3772,7 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT,
- AssumptionTracker *AT) {
+ AssumptionCache *AC) {
bool Simplified = false;
SmallSetVector<Instruction *, 8> Worklist;
@@ -3580,7 +3799,7 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
I = Worklist[Idx];
// See if this instruction simplifies.
- SimpleV = SimplifyInstruction(I, DL, TLI, DT, AT);
+ SimpleV = SimplifyInstruction(I, DL, TLI, DT, AC);
if (!SimpleV)
continue;
@@ -3603,20 +3822,19 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
return Simplified;
}
-bool llvm::recursivelySimplifyInstruction(Instruction *I,
- const DataLayout *DL,
+bool llvm::recursivelySimplifyInstruction(Instruction *I, const DataLayout *DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT,
- AssumptionTracker *AT) {
- return replaceAndRecursivelySimplifyImpl(I, nullptr, DL, TLI, DT, AT);
+ AssumptionCache *AC) {
+ return replaceAndRecursivelySimplifyImpl(I, nullptr, DL, TLI, DT, AC);
}
bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT,
- AssumptionTracker *AT) {
+ AssumptionCache *AC) {
assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!");
assert(SimpleV && "Must provide a simplified value.");
- return replaceAndRecursivelySimplifyImpl(I, SimpleV, DL, TLI, DT, AT);
+ return replaceAndRecursivelySimplifyImpl(I, SimpleV, DL, TLI, DT, AC);
}
diff --git a/lib/Analysis/LLVMBuild.txt b/lib/Analysis/LLVMBuild.txt
index a8a8079..3039dde 100644
--- a/lib/Analysis/LLVMBuild.txt
+++ b/lib/Analysis/LLVMBuild.txt
@@ -22,4 +22,4 @@ subdirectories = IPA
type = Library
name = Analysis
parent = Libraries
-required_libraries = Core Support Target
+required_libraries = Core Support
diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp
index 767da4e..c8d0410 100644
--- a/lib/Analysis/LazyCallGraph.cpp
+++ b/lib/Analysis/LazyCallGraph.cpp
@@ -708,11 +708,11 @@ static void printSCC(raw_ostream &OS, LazyCallGraph::SCC &SCC) {
OS << "\n";
}
-PreservedAnalyses LazyCallGraphPrinterPass::run(Module *M,
+PreservedAnalyses LazyCallGraphPrinterPass::run(Module &M,
ModuleAnalysisManager *AM) {
LazyCallGraph &G = AM->getResult<LazyCallGraphAnalysis>(M);
- OS << "Printing the call graph for module: " << M->getModuleIdentifier()
+ OS << "Printing the call graph for module: " << M.getModuleIdentifier()
<< "\n\n";
SmallPtrSet<LazyCallGraph::Node *, 16> Printed;
@@ -724,5 +724,4 @@ PreservedAnalyses LazyCallGraphPrinterPass::run(Module *M,
printSCC(OS, SCC);
return PreservedAnalyses::all();
-
}
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index c712c9f..87c31fd 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -1,4 +1,4 @@
-//===- LazyValueInfo.cpp - Value constraint analysis ----------------------===//
+//===- LazyValueInfo.cpp - Value constraint analysis ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,8 +15,9 @@
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/AssumptionTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/ConstantRange.h"
@@ -29,7 +30,6 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include <map>
#include <stack>
using namespace llvm;
@@ -40,8 +40,8 @@ using namespace PatternMatch;
char LazyValueInfo::ID = 0;
INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info",
"Lazy Value Information Analysis", false, true)
-INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(LazyValueInfo, "lazy-value-info",
"Lazy Value Information Analysis", false, true)
@@ -54,8 +54,7 @@ namespace llvm {
// LVILatticeVal
//===----------------------------------------------------------------------===//
-/// LVILatticeVal - This is the information tracked by LazyValueInfo for each
-/// value.
+/// This is the information tracked by LazyValueInfo for each value.
///
/// FIXME: This is basically just for bringup, this can be made a lot more rich
/// in the future.
@@ -63,19 +62,19 @@ namespace llvm {
namespace {
class LVILatticeVal {
enum LatticeValueTy {
- /// undefined - This Value has no known value yet.
+ /// This Value has no known value yet.
undefined,
- /// constant - This Value has a specific constant value.
+ /// This Value has a specific constant value.
constant,
- /// notconstant - This Value is known to not have the specified value.
+
+ /// This Value is known to not have the specified value.
notconstant,
- /// constantrange - The Value falls within this range.
+ /// The Value falls within this range.
constantrange,
- /// overdefined - This value is not known to be constant, and we know that
- /// it has a value.
+ /// This value is not known to be constant, and we know that it has a value.
overdefined
};
@@ -128,7 +127,7 @@ public:
return Range;
}
- /// markOverdefined - Return true if this is a change in status.
+ /// Return true if this is a change in status.
bool markOverdefined() {
if (isOverdefined())
return false;
@@ -136,7 +135,7 @@ public:
return true;
}
- /// markConstant - Return true if this is a change in status.
+ /// Return true if this is a change in status.
bool markConstant(Constant *V) {
assert(V && "Marking constant with NULL");
if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
@@ -152,7 +151,7 @@ public:
return true;
}
- /// markNotConstant - Return true if this is a change in status.
+ /// Return true if this is a change in status.
bool markNotConstant(Constant *V) {
assert(V && "Marking constant with NULL");
if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
@@ -170,7 +169,7 @@ public:
return true;
}
- /// markConstantRange - Return true if this is a change in status.
+ /// Return true if this is a change in status.
bool markConstantRange(const ConstantRange NewR) {
if (isConstantRange()) {
if (NewR.isEmptySet())
@@ -190,7 +189,7 @@ public:
return true;
}
- /// mergeIn - Merge the specified lattice value into this one, updating this
+ /// Merge the specified lattice value into this one, updating this
/// one and returning true if anything changed.
bool mergeIn(const LVILatticeVal &RHS) {
if (RHS.isUndefined() || isOverdefined()) return false;
@@ -298,8 +297,7 @@ raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
//===----------------------------------------------------------------------===//
namespace {
- /// LVIValueHandle - A callback value handle updates the cache when
- /// values are erased.
+ /// A callback value handle updates the cache when values are erased.
class LazyValueInfoCache;
struct LVIValueHandle : public CallbackVH {
LazyValueInfoCache *Parent;
@@ -315,62 +313,62 @@ namespace {
}
namespace {
- /// LazyValueInfoCache - This is the cache kept by LazyValueInfo which
+ /// This is the cache kept by LazyValueInfo which
/// maintains information about queries across the clients' queries.
class LazyValueInfoCache {
- /// ValueCacheEntryTy - This is all of the cached block information for
- /// exactly one Value*. The entries are sorted by the BasicBlock* of the
+ /// This is all of the cached block information for exactly one Value*.
+ /// The entries are sorted by the BasicBlock* of the
/// entries, allowing us to do a lookup with a binary search.
typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy;
- /// ValueCache - This is all of the cached information for all values,
+ /// This is all of the cached information for all values,
/// mapped from Value* to key information.
std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache;
- /// OverDefinedCache - This tracks, on a per-block basis, the set of
- /// values that are over-defined at the end of that block. This is required
+ /// This tracks, on a per-block basis, the set of values that are
+ /// over-defined at the end of that block. This is required
/// for cache updating.
typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
DenseSet<OverDefinedPairTy> OverDefinedCache;
- /// SeenBlocks - Keep track of all blocks that we have ever seen, so we
+ /// Keep track of all blocks that we have ever seen, so we
/// don't spend time removing unused blocks from our caches.
DenseSet<AssertingVH<BasicBlock> > SeenBlocks;
- /// BlockValueStack - This stack holds the state of the value solver
- /// during a query. It basically emulates the callstack of the naive
+ /// This stack holds the state of the value solver during a query.
+ /// It basically emulates the callstack of the naive
/// recursive value lookup process.
std::stack<std::pair<BasicBlock*, Value*> > BlockValueStack;
+ /// Keeps track of which block-value pairs are in BlockValueStack.
+ DenseSet<std::pair<BasicBlock*, Value*> > BlockValueSet;
+
+ /// Push BV onto BlockValueStack unless it's already in there.
+ /// Returns true on success.
+ bool pushBlockValue(const std::pair<BasicBlock *, Value *> &BV) {
+ if (BlockValueSet.count(BV))
+ return false; // It's already in the stack.
+
+ BlockValueStack.push(BV);
+ BlockValueSet.insert(BV);
+ return true;
+ }
+
/// A pointer to the cache of @llvm.assume calls.
- AssumptionTracker *AT;
+ AssumptionCache *AC;
/// An optional DL pointer.
const DataLayout *DL;
/// An optional DT pointer.
DominatorTree *DT;
friend struct LVIValueHandle;
-
- /// OverDefinedCacheUpdater - A helper object that ensures that the
- /// OverDefinedCache is updated whenever solveBlockValue returns.
- struct OverDefinedCacheUpdater {
- LazyValueInfoCache *Parent;
- Value *Val;
- BasicBlock *BB;
- LVILatticeVal &BBLV;
-
- OverDefinedCacheUpdater(Value *V, BasicBlock *B, LVILatticeVal &LV,
- LazyValueInfoCache *P)
- : Parent(P), Val(V), BB(B), BBLV(LV) { }
-
- bool markResult(bool changed) {
- if (changed && BBLV.isOverdefined())
- Parent->OverDefinedCache.insert(std::make_pair(BB, Val));
- return changed;
- }
- };
-
+ void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) {
+ SeenBlocks.insert(BB);
+ lookup(Val)[BB] = Result;
+ if (Result.isOverdefined())
+ OverDefinedCache.insert(std::make_pair(BB, Val));
+ }
LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB);
bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T,
@@ -398,27 +396,26 @@ namespace {
}
public:
- /// getValueInBlock - This is the query interface to determine the lattice
+ /// This is the query interface to determine the lattice
/// value for the specified Value* at the end of the specified block.
LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB,
Instruction *CxtI = nullptr);
- /// getValueAt - This is the query interface to determine the lattice
+ /// This is the query interface to determine the lattice
/// value for the specified Value* at the specified instruction (generally
/// from an assume intrinsic).
LVILatticeVal getValueAt(Value *V, Instruction *CxtI);
- /// getValueOnEdge - This is the query interface to determine the lattice
+ /// This is the query interface to determine the lattice
/// value for the specified Value* that is true on the specified edge.
LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB,
Instruction *CxtI = nullptr);
- /// threadEdge - This is the update interface to inform the cache that an
- /// edge from PredBB to OldSucc has been threaded to be from PredBB to
- /// NewSucc.
+ /// This is the update interface to inform the cache that an edge from
+ /// PredBB to OldSucc has been threaded to be from PredBB to NewSucc.
void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc);
- /// eraseBlock - This is part of the update interface to inform the cache
+ /// This is part of the update interface to inform the cache
/// that a block has been deleted.
void eraseBlock(BasicBlock *BB);
@@ -429,9 +426,9 @@ namespace {
OverDefinedCache.clear();
}
- LazyValueInfoCache(AssumptionTracker *AT,
- const DataLayout *DL = nullptr,
- DominatorTree *DT = nullptr) : AT(AT), DL(DL), DT(DT) {}
+ LazyValueInfoCache(AssumptionCache *AC, const DataLayout *DL = nullptr,
+ DominatorTree *DT = nullptr)
+ : AC(AC), DL(DL), DT(DT) {}
};
} // end anonymous namespace
@@ -439,17 +436,11 @@ void LVIValueHandle::deleted() {
typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
SmallVector<OverDefinedPairTy, 4> ToErase;
- for (DenseSet<OverDefinedPairTy>::iterator
- I = Parent->OverDefinedCache.begin(),
- E = Parent->OverDefinedCache.end();
- I != E; ++I) {
- if (I->second == getValPtr())
- ToErase.push_back(*I);
- }
-
- for (SmallVectorImpl<OverDefinedPairTy>::iterator I = ToErase.begin(),
- E = ToErase.end(); I != E; ++I)
- Parent->OverDefinedCache.erase(*I);
+ for (const OverDefinedPairTy &P : Parent->OverDefinedCache)
+ if (P.second == getValPtr())
+ ToErase.push_back(P);
+ for (const OverDefinedPairTy &P : ToErase)
+ Parent->OverDefinedCache.erase(P);
// This erasure deallocates *this, so it MUST happen after we're done
// using any and all members of *this.
@@ -464,15 +455,11 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
SeenBlocks.erase(I);
SmallVector<OverDefinedPairTy, 4> ToErase;
- for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(),
- E = OverDefinedCache.end(); I != E; ++I) {
- if (I->first == BB)
- ToErase.push_back(*I);
- }
-
- for (SmallVectorImpl<OverDefinedPairTy>::iterator I = ToErase.begin(),
- E = ToErase.end(); I != E; ++I)
- OverDefinedCache.erase(*I);
+ for (const OverDefinedPairTy& P : OverDefinedCache)
+ if (P.first == BB)
+ ToErase.push_back(P);
+ for (const OverDefinedPairTy &P : ToErase)
+ OverDefinedCache.erase(P);
for (std::map<LVIValueHandle, ValueCacheEntryTy>::iterator
I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)
@@ -482,9 +469,18 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
void LazyValueInfoCache::solve() {
while (!BlockValueStack.empty()) {
std::pair<BasicBlock*, Value*> &e = BlockValueStack.top();
+ assert(BlockValueSet.count(e) && "Stack value should be in BlockValueSet!");
+
if (solveBlockValue(e.second, e.first)) {
- assert(BlockValueStack.top() == e);
+ // The work item was completely processed.
+ assert(BlockValueStack.top() == e && "Nothing should have been pushed!");
+ assert(lookup(e.second).count(e.first) && "Result should be in cache!");
+
BlockValueStack.pop();
+ BlockValueSet.erase(e);
+ } else {
+ // More work needs to be done before revisiting.
+ assert(BlockValueStack.top() != e && "Stack should have been pushed!");
}
}
}
@@ -514,43 +510,40 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
if (isa<Constant>(Val))
return true;
- ValueCacheEntryTy &Cache = lookup(Val);
- SeenBlocks.insert(BB);
- LVILatticeVal &BBLV = Cache[BB];
-
- // OverDefinedCacheUpdater is a helper object that will update
- // the OverDefinedCache for us when this method exits. Make sure to
- // call markResult on it as we exist, passing a bool to indicate if the
- // cache needs updating, i.e. if we have solve a new value or not.
- OverDefinedCacheUpdater ODCacheUpdater(Val, BB, BBLV, this);
-
- if (!BBLV.isUndefined()) {
- DEBUG(dbgs() << " reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n');
-
- // Since we're reusing a cached value here, we don't need to update the
- // OverDefinedCahce. The cache will have been properly updated
- // whenever the cached value was inserted.
- ODCacheUpdater.markResult(false);
+ if (lookup(Val).count(BB)) {
+ // If we have a cached value, use that.
+ DEBUG(dbgs() << " reuse BB '" << BB->getName()
+ << "' val=" << lookup(Val)[BB] << '\n');
+
+ // Since we're reusing a cached value, we don't need to update the
+ // OverDefinedCache. The cache will have been properly updated whenever the
+ // cached value was inserted.
return true;
}
- // Otherwise, this is the first time we're seeing this block. Reset the
- // lattice value to overdefined, so that cycles will terminate and be
- // conservatively correct.
- BBLV.markOverdefined();
+ // Hold off inserting this value into the Cache in case we have to return
+ // false and come back later.
+ LVILatticeVal Res;
Instruction *BBI = dyn_cast<Instruction>(Val);
if (!BBI || BBI->getParent() != BB) {
- return ODCacheUpdater.markResult(solveBlockValueNonLocal(BBLV, Val, BB));
+ if (!solveBlockValueNonLocal(Res, Val, BB))
+ return false;
+ insertResult(Val, BB, Res);
+ return true;
}
if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
- return ODCacheUpdater.markResult(solveBlockValuePHINode(BBLV, PN, BB));
+ if (!solveBlockValuePHINode(Res, PN, BB))
+ return false;
+ insertResult(Val, BB, Res);
+ return true;
}
if (AllocaInst *AI = dyn_cast<AllocaInst>(BBI)) {
- BBLV = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType()));
- return ODCacheUpdater.markResult(true);
+ Res = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType()));
+ insertResult(Val, BB, Res);
+ return true;
}
// We can only analyze the definitions of certain classes of instructions
@@ -560,8 +553,9 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
!BBI->getType()->isIntegerTy()) {
DEBUG(dbgs() << " compute BB '" << BB->getName()
<< "' - overdefined because inst def found.\n");
- BBLV.markOverdefined();
- return ODCacheUpdater.markResult(true);
+ Res.markOverdefined();
+ insertResult(Val, BB, Res);
+ return true;
}
// FIXME: We're currently limited to binops with a constant RHS. This should
@@ -571,11 +565,15 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
DEBUG(dbgs() << " compute BB '" << BB->getName()
<< "' - overdefined because inst def found.\n");
- BBLV.markOverdefined();
- return ODCacheUpdater.markResult(true);
+ Res.markOverdefined();
+ insertResult(Val, BB, Res);
+ return true;
}
- return ODCacheUpdater.markResult(solveBlockValueConstantRange(BBLV, BBI, BB));
+ if (!solveBlockValueConstantRange(Res, BBI, BB))
+ return false;
+ insertResult(Val, BB, Res);
+ return true;
}
static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) {
@@ -620,9 +618,8 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
// If 'GetUnderlyingObject' didn't converge, skip it. It won't converge
// inside InstructionDereferencesPointer either.
if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, nullptr, 1)) {
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
- BI != BE; ++BI) {
- if (InstructionDereferencesPointer(BI, UnderlyingVal)) {
+ for (Instruction &I : *BB) {
+ if (InstructionDereferencesPointer(&I, UnderlyingVal)) {
NotNull = true;
break;
}
@@ -724,16 +721,20 @@ static bool getValueFromFromCondition(Value *Val, ICmpInst *ICI,
LVILatticeVal &Result,
bool isTrueDest = true);
-// If we can determine a constant range for the value Val at the context
+// If we can determine a constant range for the value Val in the context
// provided by the instruction BBI, then merge it into BBLV. If we did find a
// constant range, return true.
-void LazyValueInfoCache::mergeAssumeBlockValueConstantRange(
- Value *Val, LVILatticeVal &BBLV, Instruction *BBI) {
+void LazyValueInfoCache::mergeAssumeBlockValueConstantRange(Value *Val,
+ LVILatticeVal &BBLV,
+ Instruction *BBI) {
BBI = BBI ? BBI : dyn_cast<Instruction>(Val);
if (!BBI)
return;
- for (auto &I : AT->assumptions(BBI->getParent()->getParent())) {
+ for (auto &AssumeVH : AC->assumptions()) {
+ if (!AssumeVH)
+ continue;
+ auto *I = cast<CallInst>(AssumeVH);
if (!isValidAssumeForContext(I, BBI, DL, DT))
continue;
@@ -755,8 +756,10 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
BasicBlock *BB) {
// Figure out the range of the LHS. If that fails, bail.
if (!hasBlockValue(BBI->getOperand(0), BB)) {
- BlockValueStack.push(std::make_pair(BB, BBI->getOperand(0)));
- return false;
+ if (pushBlockValue(std::make_pair(BB, BBI->getOperand(0))))
+ return false;
+ BBLV.markOverdefined();
+ return true;
}
LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
@@ -881,7 +884,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
// know that v != 0.
if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) {
// If this is a conditional branch and only one successor goes to BBTo, then
- // we maybe able to infer something from the condition.
+ // we may be able to infer something from the condition.
if (BI->isConditional() &&
BI->getSuccessor(0) != BI->getSuccessor(1)) {
bool isTrueDest = BI->getSuccessor(0) == BBTo;
@@ -898,9 +901,9 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
// If the condition of the branch is an equality comparison, we may be
// able to infer the value.
- ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition());
- if (getValueFromFromCondition(Val, ICI, Result, isTrueDest))
- return true;
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
+ if (getValueFromFromCondition(Val, ICI, Result, isTrueDest))
+ return true;
}
}
@@ -914,8 +917,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
unsigned BitWidth = Val->getType()->getIntegerBitWidth();
ConstantRange EdgesVals(BitWidth, DefaultCase/*isFullSet*/);
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i) {
+ for (SwitchInst::CaseIt i : SI->cases()) {
ConstantRange EdgeVal(i.getCaseValue()->getValue());
if (DefaultCase) {
// It is possible that the default destination is the destination of
@@ -931,8 +933,8 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
return false;
}
-/// \brief Compute the value of Val on the edge BBFrom -> BBTo, or the value at
-/// the basic block if the edge does not constraint Val.
+/// \brief Compute the value of Val on the edge BBFrom -> BBTo or the value at
+/// the basic block if the edge does not constrain Val.
bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
BasicBlock *BBTo, LVILatticeVal &Result,
Instruction *CxtI) {
@@ -944,15 +946,17 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
if (getEdgeValueLocal(Val, BBFrom, BBTo, Result)) {
if (!Result.isConstantRange() ||
- Result.getConstantRange().getSingleElement())
+ Result.getConstantRange().getSingleElement())
return true;
// FIXME: this check should be moved to the beginning of the function when
// LVI better supports recursive values. Even for the single value case, we
// can intersect to detect dead code (an empty range).
if (!hasBlockValue(Val, BBFrom)) {
- BlockValueStack.push(std::make_pair(BBFrom, Val));
- return false;
+ if (pushBlockValue(std::make_pair(BBFrom, Val)))
+ return false;
+ Result.markOverdefined();
+ return true;
}
// Try to intersect ranges of the BB and the constraint on the edge.
@@ -971,11 +975,13 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
}
if (!hasBlockValue(Val, BBFrom)) {
- BlockValueStack.push(std::make_pair(BBFrom, Val));
- return false;
+ if (pushBlockValue(std::make_pair(BBFrom, Val)))
+ return false;
+ Result.markOverdefined();
+ return true;
}
- // if we couldn't compute the value on the edge, use the value from the BB
+ // If we couldn't compute the value on the edge, use the value from the BB.
Result = getBlockValue(Val, BBFrom);
mergeAssumeBlockValueConstantRange(Val, Result, BBFrom->getTerminator());
// We can use the context instruction (generically the ultimate instruction
@@ -995,7 +1001,9 @@ LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB,
DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
<< BB->getName() << "'\n");
- BlockValueStack.push(std::make_pair(BB, V));
+ assert(BlockValueStack.empty() && BlockValueSet.empty());
+ pushBlockValue(std::make_pair(BB, V));
+
solve();
LVILatticeVal Result = getBlockValue(V, BB);
mergeAssumeBlockValueConstantRange(V, Result, CxtI);
@@ -1041,7 +1049,7 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
// we clear their entries from the cache, and allow lazy updating to recompute
// them when needed.
- // The updating process is fairly simple: we need to dropped cached info
+ // The updating process is fairly simple: we need to drop cached info
// for all values that were marked overdefined in OldSucc, and for those same
// values in any successor of OldSucc (except NewSucc) in which they were
// also marked overdefined.
@@ -1049,11 +1057,9 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
worklist.push_back(OldSucc);
DenseSet<Value*> ClearSet;
- for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(),
- E = OverDefinedCache.end(); I != E; ++I) {
- if (I->first == OldSucc)
- ClearSet.insert(I->second);
- }
+ for (OverDefinedPairTy &P : OverDefinedCache)
+ if (P.first == OldSucc)
+ ClearSet.insert(P.second);
// Use a worklist to perform a depth-first search of OldSucc's successors.
// NOTE: We do not need a visited list since any blocks we have already
@@ -1067,15 +1073,14 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
if (ToUpdate == NewSucc) continue;
bool changed = false;
- for (DenseSet<Value*>::iterator I = ClearSet.begin(), E = ClearSet.end();
- I != E; ++I) {
+ for (Value *V : ClearSet) {
// If a value was marked overdefined in OldSucc, and is here too...
DenseSet<OverDefinedPairTy>::iterator OI =
- OverDefinedCache.find(std::make_pair(ToUpdate, *I));
+ OverDefinedCache.find(std::make_pair(ToUpdate, V));
if (OI == OverDefinedCache.end()) continue;
// Remove it from the caches.
- ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)];
+ ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(V, this)];
ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate);
assert(CI != Entry.end() && "Couldn't find entry to update?");
@@ -1097,18 +1102,17 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
// LazyValueInfo Impl
//===----------------------------------------------------------------------===//
-/// getCache - This lazily constructs the LazyValueInfoCache.
-static LazyValueInfoCache &getCache(void *&PImpl,
- AssumptionTracker *AT,
+/// This lazily constructs the LazyValueInfoCache.
+static LazyValueInfoCache &getCache(void *&PImpl, AssumptionCache *AC,
const DataLayout *DL = nullptr,
DominatorTree *DT = nullptr) {
if (!PImpl)
- PImpl = new LazyValueInfoCache(AT, DL, DT);
+ PImpl = new LazyValueInfoCache(AC, DL, DT);
return *static_cast<LazyValueInfoCache*>(PImpl);
}
bool LazyValueInfo::runOnFunction(Function &F) {
- AT = &getAnalysis<AssumptionTracker>();
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
@@ -1116,10 +1120,11 @@ bool LazyValueInfo::runOnFunction(Function &F) {
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
- TLI = &getAnalysis<TargetLibraryInfo>();
+
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
if (PImpl)
- getCache(PImpl, AT, DL, DT).clear();
+ getCache(PImpl, AC, DL, DT).clear();
// Fully lazy.
return false;
@@ -1127,14 +1132,14 @@ bool LazyValueInfo::runOnFunction(Function &F) {
void LazyValueInfo::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequired<AssumptionTracker>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
void LazyValueInfo::releaseMemory() {
// If the cache was allocated, free it.
if (PImpl) {
- delete &getCache(PImpl, AT);
+ delete &getCache(PImpl, AC);
PImpl = nullptr;
}
}
@@ -1142,8 +1147,8 @@ void LazyValueInfo::releaseMemory() {
Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
Instruction *CxtI) {
LVILatticeVal Result =
- getCache(PImpl, AT, DL, DT).getValueInBlock(V, BB, CxtI);
-
+ getCache(PImpl, AC, DL, DT).getValueInBlock(V, BB, CxtI);
+
if (Result.isConstant())
return Result.getConstant();
if (Result.isConstantRange()) {
@@ -1154,14 +1159,14 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
return nullptr;
}
-/// getConstantOnEdge - Determine whether the specified value is known to be a
+/// Determine whether the specified value is known to be a
/// constant on the specified edge. Return null if not.
Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
BasicBlock *ToBB,
Instruction *CxtI) {
LVILatticeVal Result =
- getCache(PImpl, AT, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
-
+ getCache(PImpl, AC, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+
if (Result.isConstant())
return Result.getConstant();
if (Result.isConstantRange()) {
@@ -1239,15 +1244,14 @@ getPredicateResult(unsigned Pred, Constant *C, LVILatticeVal &Result,
return LazyValueInfo::Unknown;
}
-/// getPredicateOnEdge - Determine whether the specified value comparison
-/// with a constant is known to be true or false on the specified CFG edge.
-/// Pred is a CmpInst predicate.
+/// Determine whether the specified value comparison with a constant is known to
+/// be true or false on the specified CFG edge. Pred is a CmpInst predicate.
LazyValueInfo::Tristate
LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
BasicBlock *FromBB, BasicBlock *ToBB,
Instruction *CxtI) {
LVILatticeVal Result =
- getCache(PImpl, AT, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+ getCache(PImpl, AC, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
return getPredicateResult(Pred, C, Result, DL, TLI);
}
@@ -1255,17 +1259,18 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
LazyValueInfo::Tristate
LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
Instruction *CxtI) {
- LVILatticeVal Result =
- getCache(PImpl, AT, DL, DT).getValueAt(V, CxtI);
+ LVILatticeVal Result = getCache(PImpl, AC, DL, DT).getValueAt(V, CxtI);
return getPredicateResult(Pred, C, Result, DL, TLI);
}
void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
BasicBlock *NewSucc) {
- if (PImpl) getCache(PImpl, AT, DL, DT).threadEdge(PredBB, OldSucc, NewSucc);
+ if (PImpl)
+ getCache(PImpl, AC, DL, DT).threadEdge(PredBB, OldSucc, NewSucc);
}
void LazyValueInfo::eraseBlock(BasicBlock *BB) {
- if (PImpl) getCache(PImpl, AT, DL, DT).eraseBlock(BB);
+ if (PImpl)
+ getCache(PImpl, AC, DL, DT).eraseBlock(BB);
}
diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp
index 23639e7..cf752dd 100644
--- a/lib/Analysis/LibCallSemantics.cpp
+++ b/lib/Analysis/LibCallSemantics.cpp
@@ -15,6 +15,7 @@
#include "llvm/Analysis/LibCallSemantics.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/Function.h"
using namespace llvm;
@@ -61,3 +62,41 @@ LibCallInfo::getFunctionInfo(const Function *F) const {
return Map->lookup(F->getName());
}
+/// See if the given exception handling personality function is one that we
+/// understand. If so, return a description of it; otherwise return Unknown.
+EHPersonality llvm::classifyEHPersonality(const Value *Pers) {
+ const Function *F = dyn_cast<Function>(Pers->stripPointerCasts());
+ if (!F)
+ return EHPersonality::Unknown;
+ return StringSwitch<EHPersonality>(F->getName())
+ .Case("__gnat_eh_personality", EHPersonality::GNU_Ada)
+ .Case("__gxx_personality_v0", EHPersonality::GNU_CXX)
+ .Case("__gcc_personality_v0", EHPersonality::GNU_C)
+ .Case("__objc_personality_v0", EHPersonality::GNU_ObjC)
+ .Case("__except_handler3", EHPersonality::MSVC_X86SEH)
+ .Case("__except_handler4", EHPersonality::MSVC_X86SEH)
+ .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH)
+ .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX)
+ .Default(EHPersonality::Unknown);
+}
+
+bool llvm::isAsynchronousEHPersonality(EHPersonality Pers) {
+ // The two SEH personality functions can catch asynch exceptions. We assume
+ // unknown personalities don't catch asynch exceptions.
+ switch (Pers) {
+ case EHPersonality::MSVC_X86SEH:
+ case EHPersonality::MSVC_Win64SEH:
+ return true;
+ default: return false;
+ }
+ llvm_unreachable("invalid enum");
+}
+
+bool llvm::canSimplifyInvokeNoUnwind(const InvokeInst *II) {
+ const LandingPadInst *LP = II->getLandingPadInst();
+ EHPersonality Personality = classifyEHPersonality(LP->getPersonalityFn());
+ // We can't simplify any invokes to nounwind functions if the personality
+ // function wants to catch asynch exceptions. The nounwind attribute only
+ // implies that the function does not throw synchronous exceptions.
+ return !isAsynchronousEHPersonality(Personality);
+}
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 8ee9b8a..874ed0a 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -36,12 +36,14 @@
#include "llvm/Analysis/Lint.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AssumptionTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
@@ -49,11 +51,10 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Pass.h"
-#include "llvm/PassManager.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
using namespace llvm;
namespace {
@@ -73,6 +74,8 @@ namespace {
void visitMemoryReference(Instruction &I, Value *Ptr,
uint64_t Size, unsigned Align,
Type *Ty, unsigned Flags);
+ void visitEHBeginCatch(IntrinsicInst *II);
+ void visitEHEndCatch(IntrinsicInst *II);
void visitCallInst(CallInst &I);
void visitInvokeInst(InvokeInst &I);
@@ -102,7 +105,7 @@ namespace {
public:
Module *Mod;
AliasAnalysis *AA;
- AssumptionTracker *AT;
+ AssumptionCache *AC;
DominatorTree *DT;
const DataLayout *DL;
TargetLibraryInfo *TLI;
@@ -120,8 +123,8 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
AU.addRequired<AliasAnalysis>();
- AU.addRequired<AssumptionTracker>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
}
void print(raw_ostream &O, const Module *M) const override {}
@@ -154,8 +157,8 @@ namespace {
char Lint::ID = 0;
INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
false, true)
-INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
@@ -179,11 +182,11 @@ INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
bool Lint::runOnFunction(Function &F) {
Mod = F.getParent();
AA = &getAnalysis<AliasAnalysis>();
- AT = &getAnalysis<AssumptionTracker>();
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
- TLI = &getAnalysis<TargetLibraryInfo>();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
visit(F);
dbgs() << MessagesStr.str();
Messages.clear();
@@ -346,6 +349,13 @@ void Lint::visitCallSite(CallSite CS) {
visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
0, nullptr, MemRef::Read | MemRef::Write);
break;
+
+ case Intrinsic::eh_begincatch:
+ visitEHBeginCatch(II);
+ break;
+ case Intrinsic::eh_endcatch:
+ visitEHEndCatch(II);
+ break;
}
}
@@ -509,8 +519,190 @@ void Lint::visitShl(BinaryOperator &I) {
"Undefined result: Shift count out of range", &I);
}
+static bool
+allPredsCameFromLandingPad(BasicBlock *BB,
+ SmallSet<BasicBlock *, 4> &VisitedBlocks) {
+ VisitedBlocks.insert(BB);
+ if (BB->isLandingPad())
+ return true;
+ // If we find a block with no predecessors, the search failed.
+ if (pred_empty(BB))
+ return false;
+ for (BasicBlock *Pred : predecessors(BB)) {
+ if (VisitedBlocks.count(Pred))
+ continue;
+ if (!allPredsCameFromLandingPad(Pred, VisitedBlocks))
+ return false;
+ }
+ return true;
+}
+
+static bool
+allSuccessorsReachEndCatch(BasicBlock *BB, BasicBlock::iterator InstBegin,
+ IntrinsicInst **SecondBeginCatch,
+ SmallSet<BasicBlock *, 4> &VisitedBlocks) {
+ VisitedBlocks.insert(BB);
+ for (BasicBlock::iterator I = InstBegin, E = BB->end(); I != E; ++I) {
+ IntrinsicInst *IC = dyn_cast<IntrinsicInst>(I);
+ if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch)
+ return true;
+ // If we find another begincatch while looking for an endcatch,
+ // that's also an error.
+ if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) {
+ *SecondBeginCatch = IC;
+ return false;
+ }
+ }
+
+ // If we reach a block with no successors while searching, the
+ // search has failed.
+ if (succ_empty(BB))
+ return false;
+ // Otherwise, search all of the successors.
+ for (BasicBlock *Succ : successors(BB)) {
+ if (VisitedBlocks.count(Succ))
+ continue;
+ if (!allSuccessorsReachEndCatch(Succ, Succ->begin(), SecondBeginCatch,
+ VisitedBlocks))
+ return false;
+ }
+ return true;
+}
+
+void Lint::visitEHBeginCatch(IntrinsicInst *II) {
+ // The checks in this function make a potentially dubious assumption about
+ // the CFG, namely that any block involved in a catch is only used for the
+ // catch. This will very likely be true of IR generated by a front end,
+ // but it may cease to be true, for example, if the IR is run through a
+ // pass which combines similar blocks.
+ //
+ // In general, if we encounter a block the isn't dominated by the catch
+ // block while we are searching the catch block's successors for a call
+ // to end catch intrinsic, then it is possible that it will be legal for
+ // a path through this block to never reach a call to llvm.eh.endcatch.
+ // An analogous statement could be made about our search for a landing
+ // pad among the catch block's predecessors.
+ //
+ // What is actually required is that no path is possible at runtime that
+ // reaches a call to llvm.eh.begincatch without having previously visited
+ // a landingpad instruction and that no path is possible at runtime that
+ // calls llvm.eh.begincatch and does not subsequently call llvm.eh.endcatch
+ // (mentally adjusting for the fact that in reality these calls will be
+ // removed before code generation).
+ //
+ // Because this is a lint check, we take a pessimistic approach and warn if
+ // the control flow is potentially incorrect.
+
+ SmallSet<BasicBlock *, 4> VisitedBlocks;
+ BasicBlock *CatchBB = II->getParent();
+
+ // The begin catch must occur in a landing pad block or all paths
+ // to it must have come from a landing pad.
+ Assert1(allPredsCameFromLandingPad(CatchBB, VisitedBlocks),
+ "llvm.eh.begincatch may be reachable without passing a landingpad",
+ II);
+
+ // Reset the visited block list.
+ VisitedBlocks.clear();
+
+ IntrinsicInst *SecondBeginCatch = nullptr;
+
+ // This has to be called before it is asserted. Otherwise, the first assert
+ // below can never be hit.
+ bool EndCatchFound = allSuccessorsReachEndCatch(
+ CatchBB, std::next(static_cast<BasicBlock::iterator>(II)),
+ &SecondBeginCatch, VisitedBlocks);
+ Assert2(
+ SecondBeginCatch == nullptr,
+ "llvm.eh.begincatch may be called a second time before llvm.eh.endcatch",
+ II, SecondBeginCatch);
+ Assert1(EndCatchFound,
+ "Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch",
+ II);
+}
+
+static bool allPredCameFromBeginCatch(
+ BasicBlock *BB, BasicBlock::reverse_iterator InstRbegin,
+ IntrinsicInst **SecondEndCatch, SmallSet<BasicBlock *, 4> &VisitedBlocks) {
+ VisitedBlocks.insert(BB);
+ // Look for a begincatch in this block.
+ for (BasicBlock::reverse_iterator RI = InstRbegin, RE = BB->rend(); RI != RE;
+ ++RI) {
+ IntrinsicInst *IC = dyn_cast<IntrinsicInst>(&*RI);
+ if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch)
+ return true;
+ // If we find another end catch before we find a begin catch, that's
+ // an error.
+ if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) {
+ *SecondEndCatch = IC;
+ return false;
+ }
+ // If we encounter a landingpad instruction, the search failed.
+ if (isa<LandingPadInst>(*RI))
+ return false;
+ }
+ // If while searching we find a block with no predeccesors,
+ // the search failed.
+ if (pred_empty(BB))
+ return false;
+ // Search any predecessors we haven't seen before.
+ for (BasicBlock *Pred : predecessors(BB)) {
+ if (VisitedBlocks.count(Pred))
+ continue;
+ if (!allPredCameFromBeginCatch(Pred, Pred->rbegin(), SecondEndCatch,
+ VisitedBlocks))
+ return false;
+ }
+ return true;
+}
+
+void Lint::visitEHEndCatch(IntrinsicInst *II) {
+ // The check in this function makes a potentially dubious assumption about
+ // the CFG, namely that any block involved in a catch is only used for the
+ // catch. This will very likely be true of IR generated by a front end,
+ // but it may cease to be true, for example, if the IR is run through a
+ // pass which combines similar blocks.
+ //
+ // In general, if we encounter a block the isn't post-dominated by the
+ // end catch block while we are searching the end catch block's predecessors
+ // for a call to the begin catch intrinsic, then it is possible that it will
+ // be legal for a path to reach the end catch block without ever having
+ // called llvm.eh.begincatch.
+ //
+ // What is actually required is that no path is possible at runtime that
+ // reaches a call to llvm.eh.endcatch without having previously visited
+ // a call to llvm.eh.begincatch (mentally adjusting for the fact that in
+ // reality these calls will be removed before code generation).
+ //
+ // Because this is a lint check, we take a pessimistic approach and warn if
+ // the control flow is potentially incorrect.
+
+ BasicBlock *EndCatchBB = II->getParent();
+
+ // Alls paths to the end catch call must pass through a begin catch call.
+
+ // If llvm.eh.begincatch wasn't called in the current block, we'll use this
+ // lambda to recursively look for it in predecessors.
+ SmallSet<BasicBlock *, 4> VisitedBlocks;
+ IntrinsicInst *SecondEndCatch = nullptr;
+
+ // This has to be called before it is asserted. Otherwise, the first assert
+ // below can never be hit.
+ bool BeginCatchFound =
+ allPredCameFromBeginCatch(EndCatchBB, BasicBlock::reverse_iterator(II),
+ &SecondEndCatch, VisitedBlocks);
+ Assert2(
+ SecondEndCatch == nullptr,
+ "llvm.eh.endcatch may be called a second time after llvm.eh.begincatch",
+ II, SecondEndCatch);
+ Assert1(
+ BeginCatchFound,
+ "llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch",
+ II);
+}
+
static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT,
- AssumptionTracker *AT) {
+ AssumptionCache *AC) {
// Assume undef could be zero.
if (isa<UndefValue>(V))
return true;
@@ -519,8 +711,8 @@ static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT,
if (!VecTy) {
unsigned BitWidth = V->getType()->getIntegerBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(V, KnownZero, KnownOne, DL,
- 0, AT, dyn_cast<Instruction>(V), DT);
+ computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC,
+ dyn_cast<Instruction>(V), DT);
return KnownZero.isAllOnesValue();
}
@@ -550,22 +742,22 @@ static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT,
}
void Lint::visitSDiv(BinaryOperator &I) {
- Assert1(!isZero(I.getOperand(1), DL, DT, AT),
+ Assert1(!isZero(I.getOperand(1), DL, DT, AC),
"Undefined behavior: Division by zero", &I);
}
void Lint::visitUDiv(BinaryOperator &I) {
- Assert1(!isZero(I.getOperand(1), DL, DT, AT),
+ Assert1(!isZero(I.getOperand(1), DL, DT, AC),
"Undefined behavior: Division by zero", &I);
}
void Lint::visitSRem(BinaryOperator &I) {
- Assert1(!isZero(I.getOperand(1), DL, DT, AT),
+ Assert1(!isZero(I.getOperand(1), DL, DT, AC),
"Undefined behavior: Division by zero", &I);
}
void Lint::visitURem(BinaryOperator &I) {
- Assert1(!isZero(I.getOperand(1), DL, DT, AT),
+ Assert1(!isZero(I.getOperand(1), DL, DT, AC),
"Undefined behavior: Division by zero", &I);
}
@@ -686,7 +878,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
// As a last resort, try SimplifyInstruction or constant folding.
if (Instruction *Inst = dyn_cast<Instruction>(V)) {
- if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AT))
+ if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AC))
return findValueImpl(W, OffsetOk, Visited);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI))
@@ -711,7 +903,7 @@ void llvm::lintFunction(const Function &f) {
Function &F = const_cast<Function&>(f);
assert(!F.isDeclaration() && "Cannot lint external functions");
- FunctionPassManager FPM(F.getParent());
+ legacy::FunctionPassManager FPM(F.getParent());
Lint *V = new Lint();
FPM.add(V);
FPM.run(F);
@@ -720,7 +912,7 @@ void llvm::lintFunction(const Function &f) {
/// lintModule - Check a module for errors, printing messages on stderr.
///
void llvm::lintModule(const Module &M) {
- PassManager PM;
+ legacy::PassManager PM;
Lint *V = new Lint();
PM.add(V);
PM.run(const_cast<Module&>(M));
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index bb0d60e..5042eb9 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -176,8 +176,13 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
- // If we're using alias analysis to disambiguate get the size of *Ptr.
- uint64_t AccessSize = AA ? AA->getTypeStoreSize(AccessTy) : 0;
+ // Try to get the DataLayout for this module. This may be null, in which case
+ // the optimizations will be limited.
+ const DataLayout *DL = ScanBB->getDataLayout();
+
+ // Try to get the store size for the type.
+ uint64_t AccessSize = DL ? DL->getTypeStoreSize(AccessTy)
+ : AA ? AA->getTypeStoreSize(AccessTy) : 0;
Value *StrippedPtr = Ptr->stripPointerCasts();
@@ -202,7 +207,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
if (AreEquivalentAddressValues(
LI->getPointerOperand()->stripPointerCasts(), StrippedPtr) &&
- CastInst::isBitCastable(LI->getType(), AccessTy)) {
+ CastInst::isBitOrNoopPointerCastable(LI->getType(), AccessTy, DL)) {
if (AATags)
LI->getAAMetadata(*AATags);
return LI;
@@ -214,7 +219,8 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
// (This is true even if the store is volatile or atomic, although
// those cases are unlikely.)
if (AreEquivalentAddressValues(StorePtr, StrippedPtr) &&
- CastInst::isBitCastable(SI->getValueOperand()->getType(), AccessTy)) {
+ CastInst::isBitOrNoopPointerCastable(SI->getValueOperand()->getType(),
+ AccessTy, DL)) {
if (AATags)
SI->getAAMetadata(*AATags);
return SI->getOperand(0);
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
new file mode 100644
index 0000000..7bedd40
--- /dev/null
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -0,0 +1,1396 @@
+//===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The implementation for the loop memory dependence that was originally
+// developed for the loop vectorizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/VectorUtils.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-accesses"
+
+static cl::opt<unsigned, true>
+VectorizationFactor("force-vector-width", cl::Hidden,
+ cl::desc("Sets the SIMD width. Zero is autoselect."),
+ cl::location(VectorizerParams::VectorizationFactor));
+unsigned VectorizerParams::VectorizationFactor;
+
+static cl::opt<unsigned, true>
+VectorizationInterleave("force-vector-interleave", cl::Hidden,
+ cl::desc("Sets the vectorization interleave count. "
+ "Zero is autoselect."),
+ cl::location(
+ VectorizerParams::VectorizationInterleave));
+unsigned VectorizerParams::VectorizationInterleave;
+
+static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
+ "runtime-memory-check-threshold", cl::Hidden,
+ cl::desc("When performing memory disambiguation checks at runtime do not "
+ "generate more than this number of comparisons (default = 8)."),
+ cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
+unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
+
+/// Maximum SIMD width.
+const unsigned VectorizerParams::MaxVectorWidth = 64;
+
+bool VectorizerParams::isInterleaveForced() {
+ return ::VectorizationInterleave.getNumOccurrences() > 0;
+}
+
+void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message,
+ const Function *TheFunction,
+ const Loop *TheLoop,
+ const char *PassName) {
+ DebugLoc DL = TheLoop->getStartLoc();
+ if (const Instruction *I = Message.getInstr())
+ DL = I->getDebugLoc();
+ emitOptimizationRemarkAnalysis(TheFunction->getContext(), PassName,
+ *TheFunction, DL, Message.str());
+}
+
+Value *llvm::stripIntegerCast(Value *V) {
+ if (CastInst *CI = dyn_cast<CastInst>(V))
+ if (CI->getOperand(0)->getType()->isIntegerTy())
+ return CI->getOperand(0);
+ return V;
+}
+
+const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,
+ const ValueToValueMap &PtrToStride,
+ Value *Ptr, Value *OrigPtr) {
+
+ const SCEV *OrigSCEV = SE->getSCEV(Ptr);
+
+ // If there is an entry in the map return the SCEV of the pointer with the
+ // symbolic stride replaced by one.
+ ValueToValueMap::const_iterator SI =
+ PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
+ if (SI != PtrToStride.end()) {
+ Value *StrideVal = SI->second;
+
+ // Strip casts.
+ StrideVal = stripIntegerCast(StrideVal);
+
+ // Replace symbolic stride by one.
+ Value *One = ConstantInt::get(StrideVal->getType(), 1);
+ ValueToValueMap RewriteMap;
+ RewriteMap[StrideVal] = One;
+
+ const SCEV *ByOne =
+ SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true);
+ DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne
+ << "\n");
+ return ByOne;
+ }
+
+ // Otherwise, just return the SCEV of the original pointer.
+ return SE->getSCEV(Ptr);
+}
+
+void LoopAccessInfo::RuntimePointerCheck::insert(
+ ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
+ unsigned ASId, const ValueToValueMap &Strides) {
+ // Get the stride replaced scev.
+ const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
+ assert(AR && "Invalid addrec expression");
+ const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
+ const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
+ Pointers.push_back(Ptr);
+ Starts.push_back(AR->getStart());
+ Ends.push_back(ScEnd);
+ IsWritePtr.push_back(WritePtr);
+ DependencySetId.push_back(DepSetId);
+ AliasSetId.push_back(ASId);
+}
+
+bool LoopAccessInfo::RuntimePointerCheck::needsChecking(unsigned I,
+ unsigned J) const {
+ // No need to check if two readonly pointers intersect.
+ if (!IsWritePtr[I] && !IsWritePtr[J])
+ return false;
+
+ // Only need to check pointers between two different dependency sets.
+ if (DependencySetId[I] == DependencySetId[J])
+ return false;
+
+ // Only need to check pointers in the same alias set.
+ if (AliasSetId[I] != AliasSetId[J])
+ return false;
+
+ return true;
+}
+
+void LoopAccessInfo::RuntimePointerCheck::print(raw_ostream &OS,
+ unsigned Depth) const {
+ unsigned NumPointers = Pointers.size();
+ if (NumPointers == 0)
+ return;
+
+ OS.indent(Depth) << "Run-time memory checks:\n";
+ unsigned N = 0;
+ for (unsigned I = 0; I < NumPointers; ++I)
+ for (unsigned J = I + 1; J < NumPointers; ++J)
+ if (needsChecking(I, J)) {
+ OS.indent(Depth) << N++ << ":\n";
+ OS.indent(Depth + 2) << *Pointers[I] << "\n";
+ OS.indent(Depth + 2) << *Pointers[J] << "\n";
+ }
+}
+
+namespace {
+/// \brief Analyses memory accesses in a loop.
+///
+/// Checks whether run time pointer checks are needed and builds sets for data
+/// dependence checking.
+class AccessAnalysis {
+public:
+ /// \brief Read or write access location.
+ typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
+ typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
+
+ /// \brief Set of potential dependent memory accesses.
+ typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
+
+ AccessAnalysis(const DataLayout *Dl, AliasAnalysis *AA, DepCandidates &DA) :
+ DL(Dl), AST(*AA), DepCands(DA), IsRTCheckNeeded(false) {}
+
+ /// \brief Register a load and whether it is only read from.
+ void addLoad(AliasAnalysis::Location &Loc, bool IsReadOnly) {
+ Value *Ptr = const_cast<Value*>(Loc.Ptr);
+ AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags);
+ Accesses.insert(MemAccessInfo(Ptr, false));
+ if (IsReadOnly)
+ ReadOnlyPtr.insert(Ptr);
+ }
+
+ /// \brief Register a store.
+ void addStore(AliasAnalysis::Location &Loc) {
+ Value *Ptr = const_cast<Value*>(Loc.Ptr);
+ AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags);
+ Accesses.insert(MemAccessInfo(Ptr, true));
+ }
+
+ /// \brief Check whether we can check the pointers at runtime for
+ /// non-intersection.
+ bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck,
+ unsigned &NumComparisons, ScalarEvolution *SE,
+ Loop *TheLoop, const ValueToValueMap &Strides,
+ bool ShouldCheckStride = false);
+
+ /// \brief Goes over all memory accesses, checks whether a RT check is needed
+ /// and builds sets of dependent accesses.
+ void buildDependenceSets() {
+ processMemAccesses();
+ }
+
+ bool isRTCheckNeeded() { return IsRTCheckNeeded; }
+
+ bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
+ void resetDepChecks() { CheckDeps.clear(); }
+
+ MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
+
+private:
+ typedef SetVector<MemAccessInfo> PtrAccessSet;
+
+ /// \brief Go over all memory access and check whether runtime pointer checks
+ /// are needed /// and build sets of dependency check candidates.
+ void processMemAccesses();
+
+ /// Set of all accesses.
+ PtrAccessSet Accesses;
+
+ /// Set of accesses that need a further dependence check.
+ MemAccessInfoSet CheckDeps;
+
+ /// Set of pointers that are read only.
+ SmallPtrSet<Value*, 16> ReadOnlyPtr;
+
+ const DataLayout *DL;
+
+ /// An alias set tracker to partition the access set by underlying object and
+ //intrinsic property (such as TBAA metadata).
+ AliasSetTracker AST;
+
+ /// Sets of potentially dependent accesses - members of one set share an
+ /// underlying pointer. The set "CheckDeps" identfies which sets really need a
+ /// dependence check.
+ DepCandidates &DepCands;
+
+ bool IsRTCheckNeeded;
+};
+
+} // end anonymous namespace
+
+/// \brief Check whether a pointer can participate in a runtime bounds check.
+static bool hasComputableBounds(ScalarEvolution *SE,
+ const ValueToValueMap &Strides, Value *Ptr) {
+ const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
+ if (!AR)
+ return false;
+
+ return AR->isAffine();
+}
+
+/// \brief Check the stride of the pointer and ensure that it does not wrap in
+/// the address space.
+static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
+ const Loop *Lp, const ValueToValueMap &StridesMap);
+
+bool AccessAnalysis::canCheckPtrAtRT(
+ LoopAccessInfo::RuntimePointerCheck &RtCheck, unsigned &NumComparisons,
+ ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap,
+ bool ShouldCheckStride) {
+ // Find pointers with computable bounds. We are going to use this information
+ // to place a runtime bound check.
+ bool CanDoRT = true;
+
+ bool IsDepCheckNeeded = isDependencyCheckNeeded();
+ NumComparisons = 0;
+
+ // We assign a consecutive id to access from different alias sets.
+ // Accesses between different groups doesn't need to be checked.
+ unsigned ASId = 1;
+ for (auto &AS : AST) {
+ unsigned NumReadPtrChecks = 0;
+ unsigned NumWritePtrChecks = 0;
+
+ // We assign consecutive id to access from different dependence sets.
+ // Accesses within the same set don't need a runtime check.
+ unsigned RunningDepId = 1;
+ DenseMap<Value *, unsigned> DepSetId;
+
+ for (auto A : AS) {
+ Value *Ptr = A.getValue();
+ bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
+ MemAccessInfo Access(Ptr, IsWrite);
+
+ if (IsWrite)
+ ++NumWritePtrChecks;
+ else
+ ++NumReadPtrChecks;
+
+ if (hasComputableBounds(SE, StridesMap, Ptr) &&
+ // When we run after a failing dependency check we have to make sure we
+ // don't have wrapping pointers.
+ (!ShouldCheckStride ||
+ isStridedPtr(SE, DL, Ptr, TheLoop, StridesMap) == 1)) {
+ // The id of the dependence set.
+ unsigned DepId;
+
+ if (IsDepCheckNeeded) {
+ Value *Leader = DepCands.getLeaderValue(Access).getPointer();
+ unsigned &LeaderId = DepSetId[Leader];
+ if (!LeaderId)
+ LeaderId = RunningDepId++;
+ DepId = LeaderId;
+ } else
+ // Each access has its own dependence set.
+ DepId = RunningDepId++;
+
+ RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
+
+ DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
+ } else {
+ CanDoRT = false;
+ }
+ }
+
+ if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2)
+ NumComparisons += 0; // Only one dependence set.
+ else {
+ NumComparisons += (NumWritePtrChecks * (NumReadPtrChecks +
+ NumWritePtrChecks - 1));
+ }
+
+ ++ASId;
+ }
+
+ // If the pointers that we would use for the bounds comparison have different
+ // address spaces, assume the values aren't directly comparable, so we can't
+ // use them for the runtime check. We also have to assume they could
+ // overlap. In the future there should be metadata for whether address spaces
+ // are disjoint.
+ unsigned NumPointers = RtCheck.Pointers.size();
+ for (unsigned i = 0; i < NumPointers; ++i) {
+ for (unsigned j = i + 1; j < NumPointers; ++j) {
+ // Only need to check pointers between two different dependency sets.
+ if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
+ continue;
+ // Only need to check pointers in the same alias set.
+ if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j])
+ continue;
+
+ Value *PtrI = RtCheck.Pointers[i];
+ Value *PtrJ = RtCheck.Pointers[j];
+
+ unsigned ASi = PtrI->getType()->getPointerAddressSpace();
+ unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
+ if (ASi != ASj) {
+ DEBUG(dbgs() << "LAA: Runtime check would require comparison between"
+ " different address spaces\n");
+ return false;
+ }
+ }
+ }
+
+ return CanDoRT;
+}
+
+void AccessAnalysis::processMemAccesses() {
+ // We process the set twice: first we process read-write pointers, last we
+ // process read-only pointers. This allows us to skip dependence tests for
+ // read-only pointers.
+
+ DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
+ DEBUG(dbgs() << " AST: "; AST.dump());
+ DEBUG(dbgs() << "LAA: Accesses:\n");
+ DEBUG({
+ for (auto A : Accesses)
+ dbgs() << "\t" << *A.getPointer() << " (" <<
+ (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
+ "read-only" : "read")) << ")\n";
+ });
+
+ // The AliasSetTracker has nicely partitioned our pointers by metadata
+ // compatibility and potential for underlying-object overlap. As a result, we
+ // only need to check for potential pointer dependencies within each alias
+ // set.
+ for (auto &AS : AST) {
+ // Note that both the alias-set tracker and the alias sets themselves used
+ // linked lists internally and so the iteration order here is deterministic
+ // (matching the original instruction order within each set).
+
+ bool SetHasWrite = false;
+
+ // Map of pointers to last access encountered.
+ typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
+ UnderlyingObjToAccessMap ObjToLastAccess;
+
+ // Set of access to check after all writes have been processed.
+ PtrAccessSet DeferredAccesses;
+
+ // Iterate over each alias set twice, once to process read/write pointers,
+ // and then to process read-only pointers.
+ for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
+ bool UseDeferred = SetIteration > 0;
+ PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
+
+ for (auto AV : AS) {
+ Value *Ptr = AV.getValue();
+
+ // For a single memory access in AliasSetTracker, Accesses may contain
+ // both read and write, and they both need to be handled for CheckDeps.
+ for (auto AC : S) {
+ if (AC.getPointer() != Ptr)
+ continue;
+
+ bool IsWrite = AC.getInt();
+
+ // If we're using the deferred access set, then it contains only
+ // reads.
+ bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
+ if (UseDeferred && !IsReadOnlyPtr)
+ continue;
+ // Otherwise, the pointer must be in the PtrAccessSet, either as a
+ // read or a write.
+ assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
+ S.count(MemAccessInfo(Ptr, false))) &&
+ "Alias-set pointer not in the access set?");
+
+ MemAccessInfo Access(Ptr, IsWrite);
+ DepCands.insert(Access);
+
+ // Memorize read-only pointers for later processing and skip them in
+ // the first round (they need to be checked after we have seen all
+ // write pointers). Note: we also mark pointer that are not
+ // consecutive as "read-only" pointers (so that we check
+ // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
+ if (!UseDeferred && IsReadOnlyPtr) {
+ DeferredAccesses.insert(Access);
+ continue;
+ }
+
+ // If this is a write - check other reads and writes for conflicts. If
+ // this is a read only check other writes for conflicts (but only if
+ // there is no other write to the ptr - this is an optimization to
+ // catch "a[i] = a[i] + " without having to do a dependence check).
+ if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
+ CheckDeps.insert(Access);
+ IsRTCheckNeeded = true;
+ }
+
+ if (IsWrite)
+ SetHasWrite = true;
+
+ // Create sets of pointers connected by a shared alias set and
+ // underlying object.
+ typedef SmallVector<Value *, 16> ValueVector;
+ ValueVector TempObjects;
+ GetUnderlyingObjects(Ptr, TempObjects, DL);
+ for (Value *UnderlyingObj : TempObjects) {
+ UnderlyingObjToAccessMap::iterator Prev =
+ ObjToLastAccess.find(UnderlyingObj);
+ if (Prev != ObjToLastAccess.end())
+ DepCands.unionSets(Access, Prev->second);
+
+ ObjToLastAccess[UnderlyingObj] = Access;
+ }
+ }
+ }
+ }
+ }
+}
+
+namespace {
+/// \brief Checks memory dependences among accesses to the same underlying
+/// object to determine whether there vectorization is legal or not (and at
+/// which vectorization factor).
+///
+/// This class works under the assumption that we already checked that memory
+/// locations with different underlying pointers are "must-not alias".
+/// We use the ScalarEvolution framework to symbolically evalutate access
+/// functions pairs. Since we currently don't restructure the loop we can rely
+/// on the program order of memory accesses to determine their safety.
+/// At the moment we will only deem accesses as safe for:
+/// * A negative constant distance assuming program order.
+///
+/// Safe: tmp = a[i + 1]; OR a[i + 1] = x;
+/// a[i] = tmp; y = a[i];
+///
+/// The latter case is safe because later checks guarantuee that there can't
+/// be a cycle through a phi node (that is, we check that "x" and "y" is not
+/// the same variable: a header phi can only be an induction or a reduction, a
+/// reduction can't have a memory sink, an induction can't have a memory
+/// source). This is important and must not be violated (or we have to
+/// resort to checking for cycles through memory).
+///
+/// * A positive constant distance assuming program order that is bigger
+/// than the biggest memory access.
+///
+/// tmp = a[i] OR b[i] = x
+/// a[i+2] = tmp y = b[i+2];
+///
+/// Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively.
+///
+/// * Zero distances and all accesses have the same size.
+///
+class MemoryDepChecker {
+public:
+ typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
+ typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
+
+ MemoryDepChecker(ScalarEvolution *Se, const DataLayout *Dl, const Loop *L)
+ : SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0),
+ ShouldRetryWithRuntimeCheck(false) {}
+
+ /// \brief Register the location (instructions are given increasing numbers)
+ /// of a write access.
+ void addAccess(StoreInst *SI) {
+ Value *Ptr = SI->getPointerOperand();
+ Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
+ InstMap.push_back(SI);
+ ++AccessIdx;
+ }
+
+ /// \brief Register the location (instructions are given increasing numbers)
+ /// of a write access.
+ void addAccess(LoadInst *LI) {
+ Value *Ptr = LI->getPointerOperand();
+ Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
+ InstMap.push_back(LI);
+ ++AccessIdx;
+ }
+
+ /// \brief Check whether the dependencies between the accesses are safe.
+ ///
+ /// Only checks sets with elements in \p CheckDeps.
+ bool areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
+ MemAccessInfoSet &CheckDeps, const ValueToValueMap &Strides);
+
+ /// \brief The maximum number of bytes of a vector register we can vectorize
+ /// the accesses safely with.
+ unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
+
+ /// \brief In same cases when the dependency check fails we can still
+ /// vectorize the loop with a dynamic array access check.
+ bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; }
+
+private:
+ ScalarEvolution *SE;
+ const DataLayout *DL;
+ const Loop *InnermostLoop;
+
+ /// \brief Maps access locations (ptr, read/write) to program order.
+ DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses;
+
+ /// \brief Memory access instructions in program order.
+ SmallVector<Instruction *, 16> InstMap;
+
+ /// \brief The program order index to be used for the next instruction.
+ unsigned AccessIdx;
+
+ // We can access this many bytes in parallel safely.
+ unsigned MaxSafeDepDistBytes;
+
+ /// \brief If we see a non-constant dependence distance we can still try to
+ /// vectorize this loop with runtime checks.
+ bool ShouldRetryWithRuntimeCheck;
+
+ /// \brief Check whether there is a plausible dependence between the two
+ /// accesses.
+ ///
+ /// Access \p A must happen before \p B in program order. The two indices
+ /// identify the index into the program order map.
+ ///
+ /// This function checks whether there is a plausible dependence (or the
+ /// absence of such can't be proved) between the two accesses. If there is a
+ /// plausible dependence but the dependence distance is bigger than one
+ /// element access it records this distance in \p MaxSafeDepDistBytes (if this
+ /// distance is smaller than any other distance encountered so far).
+ /// Otherwise, this function returns true signaling a possible dependence.
+ bool isDependent(const MemAccessInfo &A, unsigned AIdx,
+ const MemAccessInfo &B, unsigned BIdx,
+ const ValueToValueMap &Strides);
+
+ /// \brief Check whether the data dependence could prevent store-load
+ /// forwarding.
+ bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize);
+};
+
+} // end anonymous namespace
+
+static bool isInBoundsGep(Value *Ptr) {
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
+ return GEP->isInBounds();
+ return false;
+}
+
+/// \brief Check whether the access through \p Ptr has a constant stride.
+static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
+ const Loop *Lp, const ValueToValueMap &StridesMap) {
+ const Type *Ty = Ptr->getType();
+ assert(Ty->isPointerTy() && "Unexpected non-ptr");
+
+ // Make sure that the pointer does not point to aggregate types.
+ const PointerType *PtrTy = cast<PointerType>(Ty);
+ if (PtrTy->getElementType()->isAggregateType()) {
+ DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
+ << *Ptr << "\n");
+ return 0;
+ }
+
+ const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr);
+
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
+ if (!AR) {
+ DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer "
+ << *Ptr << " SCEV: " << *PtrScev << "\n");
+ return 0;
+ }
+
+ // The accesss function must stride over the innermost loop.
+ if (Lp != AR->getLoop()) {
+ DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " <<
+ *Ptr << " SCEV: " << *PtrScev << "\n");
+ }
+
+ // The address calculation must not wrap. Otherwise, a dependence could be
+ // inverted.
+ // An inbounds getelementptr that is a AddRec with a unit stride
+ // cannot wrap per definition. The unit stride requirement is checked later.
+ // An getelementptr without an inbounds attribute and unit stride would have
+ // to access the pointer value "0" which is undefined behavior in address
+ // space 0, therefore we can also vectorize this case.
+ bool IsInBoundsGEP = isInBoundsGep(Ptr);
+ bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask);
+ bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
+ if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
+ DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
+ << *Ptr << " SCEV: " << *PtrScev << "\n");
+ return 0;
+ }
+
+ // Check the step is constant.
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+
+ // Calculate the pointer stride and check if it is consecutive.
+ const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
+ if (!C) {
+ DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr <<
+ " SCEV: " << *PtrScev << "\n");
+ return 0;
+ }
+
+ int64_t Size = DL->getTypeAllocSize(PtrTy->getElementType());
+ const APInt &APStepVal = C->getValue()->getValue();
+
+ // Huge step value - give up.
+ if (APStepVal.getBitWidth() > 64)
+ return 0;
+
+ int64_t StepVal = APStepVal.getSExtValue();
+
+ // Strided access.
+ int64_t Stride = StepVal / Size;
+ int64_t Rem = StepVal % Size;
+ if (Rem)
+ return 0;
+
+ // If the SCEV could wrap but we have an inbounds gep with a unit stride we
+ // know we can't "wrap around the address space". In case of address space
+ // zero we know that this won't happen without triggering undefined behavior.
+ if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
+ Stride != 1 && Stride != -1)
+ return 0;
+
+ return Stride;
+}
+
+bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
+ unsigned TypeByteSize) {
+ // If loads occur at a distance that is not a multiple of a feasible vector
+ // factor store-load forwarding does not take place.
+ // Positive dependences might cause troubles because vectorizing them might
+ // prevent store-load forwarding making vectorized code run a lot slower.
+ // a[i] = a[i-3] ^ a[i-8];
+ // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
+ // hence on your typical architecture store-load forwarding does not take
+ // place. Vectorizing in such cases does not make sense.
+ // Store-load forwarding distance.
+ const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize;
+ // Maximum vector factor.
+ unsigned MaxVFWithoutSLForwardIssues =
+ VectorizerParams::MaxVectorWidth * TypeByteSize;
+ if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)
+ MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes;
+
+ for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues;
+ vf *= 2) {
+ if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) {
+ MaxVFWithoutSLForwardIssues = (vf >>=1);
+ break;
+ }
+ }
+
+ if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) {
+ DEBUG(dbgs() << "LAA: Distance " << Distance <<
+ " that could cause a store-load forwarding conflict\n");
+ return true;
+ }
+
+ if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
+ MaxVFWithoutSLForwardIssues !=
+ VectorizerParams::MaxVectorWidth * TypeByteSize)
+ MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
+ return false;
+}
+
+bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
+ const MemAccessInfo &B, unsigned BIdx,
+ const ValueToValueMap &Strides) {
+ assert (AIdx < BIdx && "Must pass arguments in program order");
+
+ Value *APtr = A.getPointer();
+ Value *BPtr = B.getPointer();
+ bool AIsWrite = A.getInt();
+ bool BIsWrite = B.getInt();
+
+ // Two reads are independent.
+ if (!AIsWrite && !BIsWrite)
+ return false;
+
+ // We cannot check pointers in different address spaces.
+ if (APtr->getType()->getPointerAddressSpace() !=
+ BPtr->getType()->getPointerAddressSpace())
+ return true;
+
+ const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr);
+ const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr);
+
+ int StrideAPtr = isStridedPtr(SE, DL, APtr, InnermostLoop, Strides);
+ int StrideBPtr = isStridedPtr(SE, DL, BPtr, InnermostLoop, Strides);
+
+ const SCEV *Src = AScev;
+ const SCEV *Sink = BScev;
+
+ // If the induction step is negative we have to invert source and sink of the
+ // dependence.
+ if (StrideAPtr < 0) {
+ //Src = BScev;
+ //Sink = AScev;
+ std::swap(APtr, BPtr);
+ std::swap(Src, Sink);
+ std::swap(AIsWrite, BIsWrite);
+ std::swap(AIdx, BIdx);
+ std::swap(StrideAPtr, StrideBPtr);
+ }
+
+ const SCEV *Dist = SE->getMinusSCEV(Sink, Src);
+
+ DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
+ << "(Induction step: " << StrideAPtr << ")\n");
+ DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
+ << *InstMap[BIdx] << ": " << *Dist << "\n");
+
+ // Need consecutive accesses. We don't want to vectorize
+ // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
+ // the address space.
+ if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
+ DEBUG(dbgs() << "Non-consecutive pointer access\n");
+ return true;
+ }
+
+ const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
+ if (!C) {
+ DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
+ ShouldRetryWithRuntimeCheck = true;
+ return true;
+ }
+
+ Type *ATy = APtr->getType()->getPointerElementType();
+ Type *BTy = BPtr->getType()->getPointerElementType();
+ unsigned TypeByteSize = DL->getTypeAllocSize(ATy);
+
+ // Negative distances are not plausible dependencies.
+ const APInt &Val = C->getValue()->getValue();
+ if (Val.isNegative()) {
+ bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
+ if (IsTrueDataDependence &&
+ (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
+ ATy != BTy))
+ return true;
+
+ DEBUG(dbgs() << "LAA: Dependence is negative: NoDep\n");
+ return false;
+ }
+
+ // Write to the same location with the same size.
+ // Could be improved to assert type sizes are the same (i32 == float, etc).
+ if (Val == 0) {
+ if (ATy == BTy)
+ return false;
+ DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n");
+ return true;
+ }
+
+ assert(Val.isStrictlyPositive() && "Expect a positive value");
+
+ if (ATy != BTy) {
+ DEBUG(dbgs() <<
+ "LAA: ReadWrite-Write positive dependency with different types\n");
+ return true;
+ }
+
+ unsigned Distance = (unsigned) Val.getZExtValue();
+
+ // Bail out early if passed-in parameters make vectorization not feasible.
+ unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
+ VectorizerParams::VectorizationFactor : 1);
+ unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
+ VectorizerParams::VectorizationInterleave : 1);
+
+ // The distance must be bigger than the size needed for a vectorized version
+ // of the operation and the size of the vectorized operation must not be
+ // bigger than the currrent maximum size.
+ if (Distance < 2*TypeByteSize ||
+ 2*TypeByteSize > MaxSafeDepDistBytes ||
+ Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
+ DEBUG(dbgs() << "LAA: Failure because of Positive distance "
+ << Val.getSExtValue() << '\n');
+ return true;
+ }
+
+ // Positive distance bigger than max vectorization factor.
+ MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ?
+ Distance : MaxSafeDepDistBytes;
+
+ bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
+ if (IsTrueDataDependence &&
+ couldPreventStoreLoadForward(Distance, TypeByteSize))
+ return true;
+
+ DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() <<
+ " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
+
+ return false;
+}
+
+bool MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
+ MemAccessInfoSet &CheckDeps,
+ const ValueToValueMap &Strides) {
+
+ MaxSafeDepDistBytes = -1U;
+ while (!CheckDeps.empty()) {
+ MemAccessInfo CurAccess = *CheckDeps.begin();
+
+ // Get the relevant memory access set.
+ EquivalenceClasses<MemAccessInfo>::iterator I =
+ AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
+
+ // Check accesses within this set.
+ EquivalenceClasses<MemAccessInfo>::member_iterator AI, AE;
+ AI = AccessSets.member_begin(I), AE = AccessSets.member_end();
+
+ // Check every access pair.
+ while (AI != AE) {
+ CheckDeps.erase(*AI);
+ EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI);
+ while (OI != AE) {
+ // Check every accessing instruction pair in program order.
+ for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
+ I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
+ for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
+ I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
+ if (*I1 < *I2 && isDependent(*AI, *I1, *OI, *I2, Strides))
+ return false;
+ if (*I2 < *I1 && isDependent(*OI, *I2, *AI, *I1, Strides))
+ return false;
+ }
+ ++OI;
+ }
+ AI++;
+ }
+ }
+ return true;
+}
+
+bool LoopAccessInfo::canAnalyzeLoop() {
+ // We can only analyze innermost loops.
+ if (!TheLoop->empty()) {
+ emitAnalysis(LoopAccessReport() << "loop is not the innermost loop");
+ return false;
+ }
+
+ // We must have a single backedge.
+ if (TheLoop->getNumBackEdges() != 1) {
+ emitAnalysis(
+ LoopAccessReport() <<
+ "loop control flow is not understood by analyzer");
+ return false;
+ }
+
+ // We must have a single exiting block.
+ if (!TheLoop->getExitingBlock()) {
+ emitAnalysis(
+ LoopAccessReport() <<
+ "loop control flow is not understood by analyzer");
+ return false;
+ }
+
+ // We only handle bottom-tested loops, i.e. loop in which the condition is
+ // checked at the end of each iteration. With that we can assume that all
+ // instructions in the loop are executed the same number of times.
+ if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
+ emitAnalysis(
+ LoopAccessReport() <<
+ "loop control flow is not understood by analyzer");
+ return false;
+ }
+
+ // We need to have a loop header.
+ DEBUG(dbgs() << "LAA: Found a loop: " <<
+ TheLoop->getHeader()->getName() << '\n');
+
+ // ScalarEvolution needs to be able to find the exit count.
+ const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
+ if (ExitCount == SE->getCouldNotCompute()) {
+ emitAnalysis(LoopAccessReport() <<
+ "could not determine number of loop iterations");
+ DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
+ return false;
+ }
+
+ return true;
+}
+
+void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
+
+ typedef SmallVector<Value*, 16> ValueVector;
+ typedef SmallPtrSet<Value*, 16> ValueSet;
+
+ // Holds the Load and Store *instructions*.
+ ValueVector Loads;
+ ValueVector Stores;
+
+ // Holds all the different accesses in the loop.
+ unsigned NumReads = 0;
+ unsigned NumReadWrites = 0;
+
+ PtrRtCheck.Pointers.clear();
+ PtrRtCheck.Need = false;
+
+ const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
+ MemoryDepChecker DepChecker(SE, DL, TheLoop);
+
+ // For each block.
+ for (Loop::block_iterator bb = TheLoop->block_begin(),
+ be = TheLoop->block_end(); bb != be; ++bb) {
+
+ // Scan the BB and collect legal loads and stores.
+ for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
+ ++it) {
+
+ // If this is a load, save it. If this instruction can read from memory
+ // but is not a load, then we quit. Notice that we don't handle function
+ // calls that read or write.
+ if (it->mayReadFromMemory()) {
+ // Many math library functions read the rounding mode. We will only
+ // vectorize a loop if it contains known function calls that don't set
+ // the flag. Therefore, it is safe to ignore this read from memory.
+ CallInst *Call = dyn_cast<CallInst>(it);
+ if (Call && getIntrinsicIDForCall(Call, TLI))
+ continue;
+
+ LoadInst *Ld = dyn_cast<LoadInst>(it);
+ if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
+ emitAnalysis(LoopAccessReport(Ld)
+ << "read with atomic ordering or volatile read");
+ DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
+ CanVecMem = false;
+ return;
+ }
+ NumLoads++;
+ Loads.push_back(Ld);
+ DepChecker.addAccess(Ld);
+ continue;
+ }
+
+ // Save 'store' instructions. Abort if other instructions write to memory.
+ if (it->mayWriteToMemory()) {
+ StoreInst *St = dyn_cast<StoreInst>(it);
+ if (!St) {
+ emitAnalysis(LoopAccessReport(it) <<
+ "instruction cannot be vectorized");
+ CanVecMem = false;
+ return;
+ }
+ if (!St->isSimple() && !IsAnnotatedParallel) {
+ emitAnalysis(LoopAccessReport(St)
+ << "write with atomic ordering or volatile write");
+ DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
+ CanVecMem = false;
+ return;
+ }
+ NumStores++;
+ Stores.push_back(St);
+ DepChecker.addAccess(St);
+ }
+ } // Next instr.
+ } // Next block.
+
+ // Now we have two lists that hold the loads and the stores.
+ // Next, we find the pointers that they use.
+
+ // Check if we see any stores. If there are no stores, then we don't
+ // care if the pointers are *restrict*.
+ if (!Stores.size()) {
+ DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
+ CanVecMem = true;
+ return;
+ }
+
+ AccessAnalysis::DepCandidates DependentAccesses;
+ AccessAnalysis Accesses(DL, AA, DependentAccesses);
+
+ // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
+ // multiple times on the same object. If the ptr is accessed twice, once
+ // for read and once for write, it will only appear once (on the write
+ // list). This is okay, since we are going to check for conflicts between
+ // writes and between reads and writes, but not between reads and reads.
+ ValueSet Seen;
+
+ ValueVector::iterator I, IE;
+ for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
+ StoreInst *ST = cast<StoreInst>(*I);
+ Value* Ptr = ST->getPointerOperand();
+
+ if (isUniform(Ptr)) {
+ emitAnalysis(
+ LoopAccessReport(ST)
+ << "write to a loop invariant address could not be vectorized");
+ DEBUG(dbgs() << "LAA: We don't allow storing to uniform addresses\n");
+ CanVecMem = false;
+ return;
+ }
+
+ // If we did *not* see this pointer before, insert it to the read-write
+ // list. At this phase it is only a 'write' list.
+ if (Seen.insert(Ptr).second) {
+ ++NumReadWrites;
+
+ AliasAnalysis::Location Loc = AA->getLocation(ST);
+ // The TBAA metadata could have a control dependency on the predication
+ // condition, so we cannot rely on it when determining whether or not we
+ // need runtime pointer checks.
+ if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
+ Loc.AATags.TBAA = nullptr;
+
+ Accesses.addStore(Loc);
+ }
+ }
+
+ if (IsAnnotatedParallel) {
+ DEBUG(dbgs()
+ << "LAA: A loop annotated parallel, ignore memory dependency "
+ << "checks.\n");
+ CanVecMem = true;
+ return;
+ }
+
+ for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
+ LoadInst *LD = cast<LoadInst>(*I);
+ Value* Ptr = LD->getPointerOperand();
+ // If we did *not* see this pointer before, insert it to the
+ // read list. If we *did* see it before, then it is already in
+ // the read-write list. This allows us to vectorize expressions
+ // such as A[i] += x; Because the address of A[i] is a read-write
+ // pointer. This only works if the index of A[i] is consecutive.
+ // If the address of i is unknown (for example A[B[i]]) then we may
+ // read a few words, modify, and write a few words, and some of the
+ // words may be written to the same address.
+ bool IsReadOnlyPtr = false;
+ if (Seen.insert(Ptr).second ||
+ !isStridedPtr(SE, DL, Ptr, TheLoop, Strides)) {
+ ++NumReads;
+ IsReadOnlyPtr = true;
+ }
+
+ AliasAnalysis::Location Loc = AA->getLocation(LD);
+ // The TBAA metadata could have a control dependency on the predication
+ // condition, so we cannot rely on it when determining whether or not we
+ // need runtime pointer checks.
+ if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
+ Loc.AATags.TBAA = nullptr;
+
+ Accesses.addLoad(Loc, IsReadOnlyPtr);
+ }
+
+ // If we write (or read-write) to a single destination and there are no
+ // other reads in this loop then is it safe to vectorize.
+ if (NumReadWrites == 1 && NumReads == 0) {
+ DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
+ CanVecMem = true;
+ return;
+ }
+
+ // Build dependence sets and check whether we need a runtime pointer bounds
+ // check.
+ Accesses.buildDependenceSets();
+ bool NeedRTCheck = Accesses.isRTCheckNeeded();
+
+ // Find pointers with computable bounds. We are going to use this information
+ // to place a runtime bound check.
+ unsigned NumComparisons = 0;
+ bool CanDoRT = false;
+ if (NeedRTCheck)
+ CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop,
+ Strides);
+
+ DEBUG(dbgs() << "LAA: We need to do " << NumComparisons <<
+ " pointer comparisons.\n");
+
+ // If we only have one set of dependences to check pointers among we don't
+ // need a runtime check.
+ if (NumComparisons == 0 && NeedRTCheck)
+ NeedRTCheck = false;
+
+ // Check that we did not collect too many pointers or found an unsizeable
+ // pointer.
+ if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
+ PtrRtCheck.reset();
+ CanDoRT = false;
+ }
+
+ if (CanDoRT) {
+ DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
+ }
+
+ if (NeedRTCheck && !CanDoRT) {
+ emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
+ DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " <<
+ "the array bounds.\n");
+ PtrRtCheck.reset();
+ CanVecMem = false;
+ return;
+ }
+
+ PtrRtCheck.Need = NeedRTCheck;
+
+ CanVecMem = true;
+ if (Accesses.isDependencyCheckNeeded()) {
+ DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
+ CanVecMem = DepChecker.areDepsSafe(
+ DependentAccesses, Accesses.getDependenciesToCheck(), Strides);
+ MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
+
+ if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
+ DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
+ NeedRTCheck = true;
+
+ // Clear the dependency checks. We assume they are not needed.
+ Accesses.resetDepChecks();
+
+ PtrRtCheck.reset();
+ PtrRtCheck.Need = true;
+
+ CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE,
+ TheLoop, Strides, true);
+ // Check that we did not collect too many pointers or found an unsizeable
+ // pointer.
+ if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
+ if (!CanDoRT && NumComparisons > 0)
+ emitAnalysis(LoopAccessReport()
+ << "cannot check memory dependencies at runtime");
+ else
+ emitAnalysis(LoopAccessReport()
+ << NumComparisons << " exceeds limit of "
+ << RuntimeMemoryCheckThreshold
+ << " dependent memory operations checked at runtime");
+ DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
+ PtrRtCheck.reset();
+ CanVecMem = false;
+ return;
+ }
+
+ CanVecMem = true;
+ }
+ }
+
+ if (!CanVecMem)
+ emitAnalysis(LoopAccessReport() <<
+ "unsafe dependent memory operations in loop");
+
+ DEBUG(dbgs() << "LAA: We" << (NeedRTCheck ? "" : " don't") <<
+ " need a runtime memory check.\n");
+}
+
+bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
+ DominatorTree *DT) {
+ assert(TheLoop->contains(BB) && "Unknown block used");
+
+ // Blocks that do not dominate the latch need predication.
+ BasicBlock* Latch = TheLoop->getLoopLatch();
+ return !DT->dominates(BB, Latch);
+}
+
+void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) {
+ assert(!Report && "Multiple reports generated");
+ Report = Message;
+}
+
+bool LoopAccessInfo::isUniform(Value *V) const {
+ return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
+}
+
+// FIXME: this function is currently a duplicate of the one in
+// LoopVectorize.cpp.
+static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
+ Instruction *Loc) {
+ if (FirstInst)
+ return FirstInst;
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() == Loc->getParent() ? I : nullptr;
+ return nullptr;
+}
+
+std::pair<Instruction *, Instruction *>
+LoopAccessInfo::addRuntimeCheck(Instruction *Loc) const {
+ Instruction *tnullptr = nullptr;
+ if (!PtrRtCheck.Need)
+ return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
+
+ unsigned NumPointers = PtrRtCheck.Pointers.size();
+ SmallVector<TrackingVH<Value> , 2> Starts;
+ SmallVector<TrackingVH<Value> , 2> Ends;
+
+ LLVMContext &Ctx = Loc->getContext();
+ SCEVExpander Exp(*SE, "induction");
+ Instruction *FirstInst = nullptr;
+
+ for (unsigned i = 0; i < NumPointers; ++i) {
+ Value *Ptr = PtrRtCheck.Pointers[i];
+ const SCEV *Sc = SE->getSCEV(Ptr);
+
+ if (SE->isLoopInvariant(Sc, TheLoop)) {
+ DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" <<
+ *Ptr <<"\n");
+ Starts.push_back(Ptr);
+ Ends.push_back(Ptr);
+ } else {
+ DEBUG(dbgs() << "LAA: Adding RT check for range:" << *Ptr << '\n');
+ unsigned AS = Ptr->getType()->getPointerAddressSpace();
+
+ // Use this type for pointer arithmetic.
+ Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
+
+ Value *Start = Exp.expandCodeFor(PtrRtCheck.Starts[i], PtrArithTy, Loc);
+ Value *End = Exp.expandCodeFor(PtrRtCheck.Ends[i], PtrArithTy, Loc);
+ Starts.push_back(Start);
+ Ends.push_back(End);
+ }
+ }
+
+ IRBuilder<> ChkBuilder(Loc);
+ // Our instructions might fold to a constant.
+ Value *MemoryRuntimeCheck = nullptr;
+ for (unsigned i = 0; i < NumPointers; ++i) {
+ for (unsigned j = i+1; j < NumPointers; ++j) {
+ if (!PtrRtCheck.needsChecking(i, j))
+ continue;
+
+ unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
+ unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
+
+ assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) &&
+ (AS1 == Ends[i]->getType()->getPointerAddressSpace()) &&
+ "Trying to bounds check pointers with different address spaces");
+
+ Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
+ Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
+
+ Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc");
+ Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc");
+ Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc");
+ Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc");
+
+ Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
+ FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
+ Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
+ FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
+ Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
+ FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
+ if (MemoryRuntimeCheck) {
+ IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
+ "conflict.rdx");
+ FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
+ }
+ MemoryRuntimeCheck = IsConflict;
+ }
+ }
+
+ // We have to do this trickery because the IRBuilder might fold the check to a
+ // constant expression in which case there is no Instruction anchored in a
+ // the block.
+ Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
+ ConstantInt::getTrue(Ctx));
+ ChkBuilder.Insert(Check, "memcheck.conflict");
+ FirstInst = getFirstInst(FirstInst, Check, Loc);
+ return std::make_pair(FirstInst, Check);
+}
+
+LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
+ const DataLayout *DL,
+ const TargetLibraryInfo *TLI, AliasAnalysis *AA,
+ DominatorTree *DT,
+ const ValueToValueMap &Strides)
+ : TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT), NumLoads(0),
+ NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false) {
+ if (canAnalyzeLoop())
+ analyzeLoop(Strides);
+}
+
+void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
+ if (CanVecMem) {
+ if (PtrRtCheck.empty())
+ OS.indent(Depth) << "Memory dependences are safe\n";
+ else
+ OS.indent(Depth) << "Memory dependences are safe with run-time checks\n";
+ }
+
+ if (Report)
+ OS.indent(Depth) << "Report: " << Report->str() << "\n";
+
+ // FIXME: Print unsafe dependences
+
+ // List the pair of accesses need run-time checks to prove independence.
+ PtrRtCheck.print(OS, Depth);
+ OS << "\n";
+}
+
+const LoopAccessInfo &
+LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) {
+ auto &LAI = LoopAccessInfoMap[L];
+
+#ifndef NDEBUG
+ assert((!LAI || LAI->NumSymbolicStrides == Strides.size()) &&
+ "Symbolic strides changed for loop");
+#endif
+
+ if (!LAI) {
+ LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, Strides);
+#ifndef NDEBUG
+ LAI->NumSymbolicStrides = Strides.size();
+#endif
+ }
+ return *LAI.get();
+}
+
+void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const {
+ LoopAccessAnalysis &LAA = *const_cast<LoopAccessAnalysis *>(this);
+
+ LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ ValueToValueMap NoSymbolicStrides;
+
+ for (Loop *TopLevelLoop : *LI)
+ for (Loop *L : depth_first(TopLevelLoop)) {
+ OS.indent(2) << L->getHeader()->getName() << ":\n";
+ auto &LAI = LAA.getInfo(L, NoSymbolicStrides);
+ LAI.print(OS, 4);
+ }
+}
+
+bool LoopAccessAnalysis::runOnFunction(Function &F) {
+ SE = &getAnalysis<ScalarEvolution>();
+ DL = F.getParent()->getDataLayout();
+ auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ TLI = TLIP ? &TLIP->getTLI() : nullptr;
+ AA = &getAnalysis<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ return false;
+}
+
+void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+
+ AU.setPreservesAll();
+}
+
+char LoopAccessAnalysis::ID = 0;
+static const char laa_name[] = "Loop Access Analysis";
+#define LAA_NAME "loop-accesses"
+
+INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
+
+namespace llvm {
+ Pass *createLAAPass() {
+ return new LoopAccessAnalysis();
+ }
+}
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index b1f62c4..95f6eb0 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include <algorithm>
@@ -45,11 +46,6 @@ static cl::opt<bool,true>
VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
cl::desc("Verify loop info (time consuming)"));
-char LoopInfo::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(LoopInfo, "loops", "Natural Loop Information", true, true)
-
// Loop identifier metadata name.
static const char *const LoopMDName = "llvm.loop";
@@ -609,15 +605,6 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
return NearLoop;
}
-//===----------------------------------------------------------------------===//
-// LoopInfo implementation
-//
-bool LoopInfo::runOnFunction(Function &) {
- releaseMemory();
- LI.Analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree());
- return false;
-}
-
/// updateUnloop - The last backedge has been removed from a loop--now the
/// "unloop". Find a new parent for the blocks contained within unloop and
/// update the loop tree. We don't necessarily have valid dominators at this
@@ -631,7 +618,8 @@ void LoopInfo::updateUnloop(Loop *Unloop) {
if (!Unloop->getParentLoop()) {
// Since BBLoop had no parent, Unloop blocks are no longer in a loop.
for (Loop::block_iterator I = Unloop->block_begin(),
- E = Unloop->block_end(); I != E; ++I) {
+ E = Unloop->block_end();
+ I != E; ++I) {
// Don't reparent blocks in subloops.
if (getLoopFor(*I) != Unloop)
@@ -639,21 +627,21 @@ void LoopInfo::updateUnloop(Loop *Unloop) {
// Blocks no longer have a parent but are still referenced by Unloop until
// the Unloop object is deleted.
- LI.changeLoopFor(*I, nullptr);
+ changeLoopFor(*I, nullptr);
}
// Remove the loop from the top-level LoopInfo object.
- for (LoopInfo::iterator I = LI.begin();; ++I) {
- assert(I != LI.end() && "Couldn't find loop");
+ for (iterator I = begin();; ++I) {
+ assert(I != end() && "Couldn't find loop");
if (*I == Unloop) {
- LI.removeLoop(I);
+ removeLoop(I);
break;
}
}
// Move all of the subloops to the top-level.
while (!Unloop->empty())
- LI.addTopLevelLoop(Unloop->removeChildLoop(std::prev(Unloop->end())));
+ addTopLevelLoop(Unloop->removeChildLoop(std::prev(Unloop->end())));
return;
}
@@ -680,35 +668,59 @@ void LoopInfo::updateUnloop(Loop *Unloop) {
}
}
-void LoopInfo::verifyAnalysis() const {
- // LoopInfo is a FunctionPass, but verifying every loop in the function
- // each time verifyAnalysis is called is very expensive. The
- // -verify-loop-info option can enable this. In order to perform some
- // checking by default, LoopPass has been taught to call verifyLoop
- // manually during loop pass sequences.
+char LoopAnalysis::PassID;
+
+LoopInfo LoopAnalysis::run(Function &F, AnalysisManager<Function> *AM) {
+ // FIXME: Currently we create a LoopInfo from scratch for every function.
+ // This may prove to be too wasteful due to deallocating and re-allocating
+ // memory each time for the underlying map and vector datastructures. At some
+ // point it may prove worthwhile to use a freelist and recycle LoopInfo
+ // objects. I don't want to add that kind of complexity until the scope of
+ // the problem is better understood.
+ LoopInfo LI;
+ LI.Analyze(AM->getResult<DominatorTreeAnalysis>(F));
+ return std::move(LI);
+}
+
+PreservedAnalyses LoopPrinterPass::run(Function &F,
+ AnalysisManager<Function> *AM) {
+ AM->getResult<LoopAnalysis>(F).print(OS);
+ return PreservedAnalyses::all();
+}
- if (!VerifyLoopInfo) return;
+//===----------------------------------------------------------------------===//
+// LoopInfo implementation
+//
- DenseSet<const Loop*> Loops;
- for (iterator I = begin(), E = end(); I != E; ++I) {
- assert(!(*I)->getParentLoop() && "Top-level loop has a parent!");
- (*I)->verifyLoopNest(&Loops);
- }
+char LoopInfoWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInfoWrapperPass, "loops", "Natural Loop Information",
+ true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(LoopInfoWrapperPass, "loops", "Natural Loop Information",
+ true, true)
- // Verify that blocks are mapped to valid loops.
- for (DenseMap<BasicBlock*, Loop*>::const_iterator I = LI.BBMap.begin(),
- E = LI.BBMap.end(); I != E; ++I) {
- assert(Loops.count(I->second) && "orphaned loop");
- assert(I->second->contains(I->first) && "orphaned block");
- }
+bool LoopInfoWrapperPass::runOnFunction(Function &) {
+ releaseMemory();
+ LI.Analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree());
+ return false;
+}
+
+void LoopInfoWrapperPass::verifyAnalysis() const {
+ // LoopInfoWrapperPass is a FunctionPass, but verifying every loop in the
+ // function each time verifyAnalysis is called is very expensive. The
+ // -verify-loop-info option can enable this. In order to perform some
+ // checking by default, LoopPass has been taught to call verifyLoop manually
+ // during loop pass sequences.
+ if (VerifyLoopInfo)
+ LI.verify();
}
-void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+void LoopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<DominatorTreeWrapperPass>();
}
-void LoopInfo::print(raw_ostream &OS, const Module*) const {
+void LoopInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
LI.print(OS);
}
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 190abc7..a99c949 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -187,14 +187,15 @@ static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) {
void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
// LPPassManager needs LoopInfo. In the long term LoopInfo class will
// become part of LPPassManager.
- Info.addRequired<LoopInfo>();
+ Info.addRequired<LoopInfoWrapperPass>();
Info.setPreservesAll();
}
/// run - Execute all of the passes scheduled for execution. Keep track of
/// whether any of the passes modifies the function, and if so, return true.
bool LPPassManager::runOnFunction(Function &F) {
- LI = &getAnalysis<LoopInfo>();
+ auto &LIWP = getAnalysis<LoopInfoWrapperPass>();
+ LI = &LIWP.getLoopInfo();
bool Changed = false;
// Collect inherited analysis from Module level pass manager.
@@ -262,7 +263,7 @@ bool LPPassManager::runOnFunction(Function &F) {
// loop in the function every time. That level of checking can be
// enabled with the -verify-loop-info option.
{
- TimeRegion PassTimer(getPassTimer(LI));
+ TimeRegion PassTimer(getPassTimer(&LIWP));
CurrentLoop->verifyLoop();
}
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index 10da3d5..e1b7b4b 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -92,13 +92,12 @@ const char *const MemDepPrinter::DepTypeStr[]
bool MemDepPrinter::runOnFunction(Function &F) {
this->F = &F;
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>();
// All this code uses non-const interfaces because MemDep is not
// const-friendly, though nothing is actually modified.
- for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
- Instruction *Inst = &*I;
+ for (auto &I : inst_range(F)) {
+ Instruction *Inst = &I;
if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory())
continue;
@@ -119,30 +118,9 @@ bool MemDepPrinter::runOnFunction(Function &F) {
}
} else {
SmallVector<NonLocalDepResult, 4> NLDI;
- if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- if (!LI->isUnordered()) {
- // FIXME: Handle atomic/volatile loads.
- Deps[Inst].insert(std::make_pair(getInstTypePair(nullptr, Unknown),
- static_cast<BasicBlock *>(nullptr)));
- continue;
- }
- AliasAnalysis::Location Loc = AA.getLocation(LI);
- MDA.getNonLocalPointerDependency(Loc, true, LI->getParent(), NLDI);
- } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (!SI->isUnordered()) {
- // FIXME: Handle atomic/volatile stores.
- Deps[Inst].insert(std::make_pair(getInstTypePair(nullptr, Unknown),
- static_cast<BasicBlock *>(nullptr)));
- continue;
- }
- AliasAnalysis::Location Loc = AA.getLocation(SI);
- MDA.getNonLocalPointerDependency(Loc, false, SI->getParent(), NLDI);
- } else if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) {
- AliasAnalysis::Location Loc = AA.getLocation(VI);
- MDA.getNonLocalPointerDependency(Loc, false, VI->getParent(), NLDI);
- } else {
- llvm_unreachable("Unknown memory instruction!");
- }
+ assert( (isa<LoadInst>(Inst) || isa<StoreInst>(Inst) ||
+ isa<VAArgInst>(Inst)) && "Unknown memory instruction!");
+ MDA.getNonLocalPointerDependency(Inst, NLDI);
DepSet &InstDeps = Deps[Inst];
for (SmallVectorImpl<NonLocalDepResult>::const_iterator
@@ -157,8 +135,8 @@ bool MemDepPrinter::runOnFunction(Function &F) {
}
void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
- for (const_inst_iterator I = inst_begin(*F), E = inst_end(*F); I != E; ++I) {
- const Instruction *Inst = &*I;
+ for (const auto &I : inst_range(*F)) {
+ const Instruction *Inst = &I;
DepSetMap::const_iterator DI = Deps.find(Inst);
if (DI == Deps.end())
@@ -166,11 +144,10 @@ void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
const DepSet &InstDeps = DI->second;
- for (DepSet::const_iterator I = InstDeps.begin(), E = InstDeps.end();
- I != E; ++I) {
- const Instruction *DepInst = I->first.getPointer();
- DepType type = I->first.getInt();
- const BasicBlock *DepBB = I->second;
+ for (const auto &I : InstDeps) {
+ const Instruction *DepInst = I.first.getPointer();
+ DepType type = I.first.getInt();
+ const BasicBlock *DepBB = I.second;
OS << " ";
OS << DepTypeStr[type];
diff --git a/lib/Analysis/MemDerefPrinter.cpp b/lib/Analysis/MemDerefPrinter.cpp
new file mode 100644
index 0000000..531d75e
--- /dev/null
+++ b/lib/Analysis/MemDerefPrinter.cpp
@@ -0,0 +1,70 @@
+//===- MemDerefPrinter.cpp - Printer for isDereferenceablePointer ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ struct MemDerefPrinter : public FunctionPass {
+ SmallVector<Value *, 4> Vec;
+
+ static char ID; // Pass identifcation, replacement for typeid
+ MemDerefPrinter() : FunctionPass(ID) {
+ initializeMemDerefPrinterPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DataLayoutPass>();
+ AU.setPreservesAll();
+ }
+ bool runOnFunction(Function &F) override;
+ void print(raw_ostream &OS, const Module * = nullptr) const override;
+ void releaseMemory() override {
+ Vec.clear();
+ }
+ };
+}
+
+char MemDerefPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(MemDerefPrinter, "print-memderefs",
+ "Memory Dereferenciblity of pointers in function", false, true)
+INITIALIZE_PASS_DEPENDENCY(DataLayoutPass)
+INITIALIZE_PASS_END(MemDerefPrinter, "print-memderefs",
+ "Memory Dereferenciblity of pointers in function", false, true)
+
+FunctionPass *llvm::createMemDerefPrinter() {
+ return new MemDerefPrinter();
+}
+
+bool MemDerefPrinter::runOnFunction(Function &F) {
+ const DataLayout *DL = &getAnalysis<DataLayoutPass>().getDataLayout();
+ for (auto &I: inst_range(F)) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ Value *PO = LI->getPointerOperand();
+ if (PO->isDereferenceablePointer(DL))
+ Vec.push_back(PO);
+ }
+ }
+ return false;
+}
+
+void MemDerefPrinter::print(raw_ostream &OS, const Module *M) const {
+ OS << "The following are dereferenceable:\n";
+ for (auto &V: Vec) {
+ V->print(OS);
+ OS << "\n\n";
+ }
+}
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 08b41fe..6108af3 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -15,6 +15,7 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalVariable.h"
@@ -25,7 +26,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -319,7 +319,7 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
if (!CI || isa<IntrinsicInst>(CI))
return nullptr;
Function *Callee = CI->getCalledFunction();
- if (Callee == nullptr || !Callee->isDeclaration())
+ if (Callee == nullptr)
return nullptr;
StringRef FnName = Callee->getName();
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 187eada..6d38863 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -18,7 +18,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AssumptionTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/PHITransAddr.h"
@@ -59,7 +59,7 @@ char MemoryDependenceAnalysis::ID = 0;
// Register this pass...
INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
"Memory Dependence Analysis", false, true)
-INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
"Memory Dependence Analysis", false, true)
@@ -82,19 +82,17 @@ void MemoryDependenceAnalysis::releaseMemory() {
PredCache->clear();
}
-
-
/// getAnalysisUsage - Does not modify anything. It uses Alias Analysis.
///
void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequired<AssumptionTracker>();
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequiredTransitive<AliasAnalysis>();
}
-bool MemoryDependenceAnalysis::runOnFunction(Function &) {
+bool MemoryDependenceAnalysis::runOnFunction(Function &F) {
AA = &getAnalysis<AliasAnalysis>();
- AT = &getAnalysis<AssumptionTracker>();
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
DominatorTreeWrapperPass *DTWP =
@@ -300,8 +298,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
// Load widening is hostile to ThreadSanitizer: it may cause false positives
// or make the reports more cryptic (access sizes are wrong).
- if (LI->getParent()->getParent()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread))
+ if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
return 0;
// Get the base of this load.
@@ -346,9 +343,9 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
!DL.fitsInLegalInteger(NewLoadByteSize*8))
return 0;
- if (LIOffs+NewLoadByteSize > MemLocEnd &&
- LI->getParent()->getParent()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeAddress))
+ if (LIOffs + NewLoadByteSize > MemLocEnd &&
+ LI->getParent()->getParent()->hasFnAttribute(
+ Attribute::SanitizeAddress))
// We will be reading past the location accessed by the original program.
// While this is safe in a regular build, Address Safety analysis tools
// may start reporting false warnings. So, don't do widening.
@@ -362,6 +359,17 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
}
}
+static bool isVolatile(Instruction *Inst) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ return LI->isVolatile();
+ else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ return SI->isVolatile();
+ else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
+ return AI->isVolatile();
+ return false;
+}
+
+
/// getPointerDependencyFrom - Return the instruction on which a memory
/// location depends. If isLoad is true, this routine ignores may-aliases with
/// read-only operations. If isLoad is false, this routine ignores may-aliases
@@ -448,12 +456,26 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// does not alias with when this atomic load indicates that another thread may
// be accessing the location.
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+
+ // While volatile access cannot be eliminated, they do not have to clobber
+ // non-aliasing locations, as normal accesses, for example, can be safely
+ // reordered with volatile accesses.
+ if (LI->isVolatile()) {
+ if (!QueryInst)
+ // Original QueryInst *may* be volatile
+ return MemDepResult::getClobber(LI);
+ if (isVolatile(QueryInst))
+ // Ordering required if QueryInst is itself volatile
+ return MemDepResult::getClobber(LI);
+ // Otherwise, volatile doesn't imply any special ordering
+ }
+
// Atomic loads have complications involved.
// A Monotonic (or higher) load is OK if the query inst is itself not atomic.
// An Acquire (or higher) load sets the HasSeenAcquire flag, so that any
// release store will know to return getClobber.
// FIXME: This is overly conservative.
- if (!LI->isUnordered()) {
+ if (LI->isAtomic() && LI->getOrdering() > Unordered) {
if (!QueryInst)
return MemDepResult::getClobber(LI);
if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
@@ -470,13 +492,6 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
HasSeenAcquire = true;
}
- // FIXME: this is overly conservative.
- // While volatile access cannot be eliminated, they do not have to clobber
- // non-aliasing locations, as normal accesses can for example be reordered
- // with volatile accesses.
- if (LI->isVolatile())
- return MemDepResult::getClobber(LI);
-
AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
// If we found a pointer, check if it could be the same as our pointer.
@@ -859,21 +874,65 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
/// own block.
///
void MemoryDependenceAnalysis::
-getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
- BasicBlock *FromBB,
+getNonLocalPointerDependency(Instruction *QueryInst,
SmallVectorImpl<NonLocalDepResult> &Result) {
+
+ auto getLocation = [](AliasAnalysis *AA, Instruction *Inst) {
+ if (auto *I = dyn_cast<LoadInst>(Inst))
+ return AA->getLocation(I);
+ else if (auto *I = dyn_cast<StoreInst>(Inst))
+ return AA->getLocation(I);
+ else if (auto *I = dyn_cast<VAArgInst>(Inst))
+ return AA->getLocation(I);
+ else if (auto *I = dyn_cast<AtomicCmpXchgInst>(Inst))
+ return AA->getLocation(I);
+ else if (auto *I = dyn_cast<AtomicRMWInst>(Inst))
+ return AA->getLocation(I);
+ else
+ llvm_unreachable("unsupported memory instruction");
+ };
+
+ const AliasAnalysis::Location Loc = getLocation(AA, QueryInst);
+ bool isLoad = isa<LoadInst>(QueryInst);
+ BasicBlock *FromBB = QueryInst->getParent();
+ assert(FromBB);
+
assert(Loc.Ptr->getType()->isPointerTy() &&
"Can't get pointer deps of a non-pointer!");
Result.clear();
+
+ // This routine does not expect to deal with volatile instructions.
+ // Doing so would require piping through the QueryInst all the way through.
+ // TODO: volatiles can't be elided, but they can be reordered with other
+ // non-volatile accesses.
+
+ // We currently give up on any instruction which is ordered, but we do handle
+ // atomic instructions which are unordered.
+ // TODO: Handle ordered instructions
+ auto isOrdered = [](Instruction *Inst) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ return !LI->isUnordered();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ return !SI->isUnordered();
+ }
+ return false;
+ };
+ if (isVolatile(QueryInst) || isOrdered(QueryInst)) {
+ Result.push_back(NonLocalDepResult(FromBB,
+ MemDepResult::getUnknown(),
+ const_cast<Value *>(Loc.Ptr)));
+ return;
+ }
+
- PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, AT);
+ PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, AC);
// This is the set of blocks we've inspected, and the pointer we consider in
// each block. Because of critical edges, we currently bail out if querying
// a block with multiple different pointers. This can happen during PHI
// translation.
DenseMap<BasicBlock*, Value*> Visited;
- if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB,
+ if (!getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
Result, Visited, true))
return;
Result.clear();
@@ -887,7 +946,8 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
/// lookup (which may use dirty cache info if available). If we do a lookup,
/// add the result to the cache.
MemDepResult MemoryDependenceAnalysis::
-GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
+GetNonLocalInfoForBlock(Instruction *QueryInst,
+ const AliasAnalysis::Location &Loc,
bool isLoad, BasicBlock *BB,
NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
@@ -928,7 +988,8 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
}
// Scan the block for the dependency.
- MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB);
+ MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB,
+ QueryInst);
// If we had a dirty entry for the block, update it. Otherwise, just add
// a new entry.
@@ -1001,7 +1062,8 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
/// not compute dependence information for some reason. This should be treated
/// as a clobber dependence on the first instruction in the predecessor block.
bool MemoryDependenceAnalysis::
-getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
+getNonLocalPointerDepFromBB(Instruction *QueryInst,
+ const PHITransAddr &Pointer,
const AliasAnalysis::Location &Loc,
bool isLoad, BasicBlock *StartBB,
SmallVectorImpl<NonLocalDepResult> &Result,
@@ -1040,7 +1102,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
} else if (CacheInfo->Size > Loc.Size) {
// This query's Size is less than the cached one. Conservatively restart
// the query using the greater size.
- return getNonLocalPointerDepFromBB(Pointer,
+ return getNonLocalPointerDepFromBB(QueryInst, Pointer,
Loc.getWithNewSize(CacheInfo->Size),
isLoad, StartBB, Result, Visited,
SkipFirstBlock);
@@ -1060,7 +1122,8 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
CacheInfo->NonLocalDeps.clear();
}
if (Loc.AATags)
- return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutAATags(),
+ return getNonLocalPointerDepFromBB(QueryInst,
+ Pointer, Loc.getWithoutAATags(),
isLoad, StartBB, Result, Visited,
SkipFirstBlock);
}
@@ -1145,7 +1208,6 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// cache value will only see properly sorted cache arrays.
if (Cache && NumSortedEntries != Cache->size()) {
SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
- NumSortedEntries = Cache->size();
}
// Since we bail out, the "Cache" set won't contain all of the
// results for the query. This is ok (we can still use it to accelerate
@@ -1164,7 +1226,8 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// Get the dependency info for Pointer in BB. If we have cached
// information, we will use it, otherwise we compute it.
DEBUG(AssertSorted(*Cache, NumSortedEntries));
- MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache,
+ MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst,
+ Loc, isLoad, BB, Cache,
NumSortedEntries);
// If we got a Def or Clobber, add this to the list of results.
@@ -1298,7 +1361,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// result conflicted with the Visited list; we have to conservatively
// assume it is unknown, but this also does not block PRE of the load.
if (!CanTranslate ||
- getNonLocalPointerDepFromBB(PredPointer,
+ getNonLocalPointerDepFromBB(QueryInst, PredPointer,
Loc.getWithNewPtr(PredPtrVal),
isLoad, Pred,
Result, Visited)) {
@@ -1361,7 +1424,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
if (I->getBB() != BB)
continue;
- assert(I->getResult().isNonLocal() &&
+ assert((I->getResult().isNonLocal() || !DT->isReachableFromEntry(BB)) &&
"Should only be here with transparent block");
I->setResult(MemDepResult::getUnknown());
Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index b3d060a..a534418 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -228,7 +228,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
return GEP;
// Simplify the GEP to handle 'gep x, 0' -> x etc.
- if (Value *V = SimplifyGEPInst(GEPOps, DL, TLI, DT, AT)) {
+ if (Value *V = SimplifyGEPInst(GEPOps, DL, TLI, DT, AC)) {
for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
RemoveInstInputs(GEPOps[i], InstInputs);
@@ -283,7 +283,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
}
// See if the add simplifies away.
- if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, DL, TLI, DT, AT)) {
+ if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, DL, TLI, DT, AC)) {
// If we simplified the operands, the LHS is no longer an input, but Res
// is.
RemoveInstInputs(LHS, InstInputs);
@@ -369,7 +369,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
SmallVectorImpl<Instruction*> &NewInsts) {
// See if we have a version of this value already available and dominating
// PredBB. If so, there is no need to insert a new instance of it.
- PHITransAddr Tmp(InVal, DL, AT);
+ PHITransAddr Tmp(InVal, DL, AC);
if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT))
return Tmp.getAddr();
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 08ebf0d..8cd8534 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -10,10 +10,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/RegionInfo.h"
-#include "llvm/Analysis/RegionInfoImpl.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/RegionInfoImpl.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index de34b72..6fa7b2e 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -15,9 +15,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/RegionPass.h"
#include "llvm/Analysis/RegionIterator.h"
-#include "llvm/Support/Timer.h"
-
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Timer.h"
using namespace llvm;
#define DEBUG_TYPE "regionpassmgr"
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 68549ef..9e4eb11 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -63,11 +63,12 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
@@ -87,7 +88,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include <algorithm>
using namespace llvm;
@@ -116,10 +116,10 @@ VerifySCEV("verify-scev",
INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
"Scalar Evolution Analysis", false, true)
-INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
"Scalar Evolution Analysis", false, true)
char ScalarEvolution::ID = 0;
@@ -675,62 +675,6 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
}
}
-static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) {
- APInt A = C1->getValue()->getValue();
- APInt B = C2->getValue()->getValue();
- uint32_t ABW = A.getBitWidth();
- uint32_t BBW = B.getBitWidth();
-
- if (ABW > BBW)
- B = B.sext(ABW);
- else if (ABW < BBW)
- A = A.sext(BBW);
-
- return APIntOps::srem(A, B);
-}
-
-static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) {
- APInt A = C1->getValue()->getValue();
- APInt B = C2->getValue()->getValue();
- uint32_t ABW = A.getBitWidth();
- uint32_t BBW = B.getBitWidth();
-
- if (ABW > BBW)
- B = B.sext(ABW);
- else if (ABW < BBW)
- A = A.sext(BBW);
-
- return APIntOps::sdiv(A, B);
-}
-
-static const APInt urem(const SCEVConstant *C1, const SCEVConstant *C2) {
- APInt A = C1->getValue()->getValue();
- APInt B = C2->getValue()->getValue();
- uint32_t ABW = A.getBitWidth();
- uint32_t BBW = B.getBitWidth();
-
- if (ABW > BBW)
- B = B.zext(ABW);
- else if (ABW < BBW)
- A = A.zext(BBW);
-
- return APIntOps::urem(A, B);
-}
-
-static const APInt udiv(const SCEVConstant *C1, const SCEVConstant *C2) {
- APInt A = C1->getValue()->getValue();
- APInt B = C2->getValue()->getValue();
- uint32_t ABW = A.getBitWidth();
- uint32_t BBW = B.getBitWidth();
-
- if (ABW > BBW)
- B = B.zext(ABW);
- else if (ABW < BBW)
- A = A.zext(BBW);
-
- return APIntOps::udiv(A, B);
-}
-
namespace {
struct FindSCEVSize {
int Size;
@@ -757,8 +701,7 @@ static inline int sizeOfSCEV(const SCEV *S) {
namespace {
-template <typename Derived>
-struct SCEVDivision : public SCEVVisitor<Derived, void> {
+struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> {
public:
// Computes the Quotient and Remainder of the division of Numerator by
// Denominator.
@@ -767,7 +710,7 @@ public:
const SCEV **Remainder) {
assert(Numerator && Denominator && "Uninitialized SCEV");
- Derived D(SE, Numerator, Denominator);
+ SCEVDivision D(SE, Numerator, Denominator);
// Check for the trivial case here to avoid having to check for it in the
// rest of the code.
@@ -819,6 +762,27 @@ public:
void visitUnknown(const SCEVUnknown *Numerator) {}
void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
+ void visitConstant(const SCEVConstant *Numerator) {
+ if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
+ APInt NumeratorVal = Numerator->getValue()->getValue();
+ APInt DenominatorVal = D->getValue()->getValue();
+ uint32_t NumeratorBW = NumeratorVal.getBitWidth();
+ uint32_t DenominatorBW = DenominatorVal.getBitWidth();
+
+ if (NumeratorBW > DenominatorBW)
+ DenominatorVal = DenominatorVal.sext(NumeratorBW);
+ else if (NumeratorBW < DenominatorBW)
+ NumeratorVal = NumeratorVal.sext(DenominatorBW);
+
+ APInt QuotientVal(NumeratorVal.getBitWidth(), 0);
+ APInt RemainderVal(NumeratorVal.getBitWidth(), 0);
+ APInt::sdivrem(NumeratorVal, DenominatorVal, QuotientVal, RemainderVal);
+ Quotient = SE.getConstant(QuotientVal);
+ Remainder = SE.getConstant(RemainderVal);
+ return;
+ }
+ }
+
void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
const SCEV *StartQ, *StartR, *StepQ, *StepR;
assert(Numerator->isAffine() && "Numerator should be affine");
@@ -956,37 +920,6 @@ private:
ScalarEvolution &SE;
const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
-
- friend struct SCEVSDivision;
- friend struct SCEVUDivision;
-};
-
-struct SCEVSDivision : public SCEVDivision<SCEVSDivision> {
- SCEVSDivision(ScalarEvolution &S, const SCEV *Numerator,
- const SCEV *Denominator)
- : SCEVDivision(S, Numerator, Denominator) {}
-
- void visitConstant(const SCEVConstant *Numerator) {
- if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
- Quotient = SE.getConstant(sdiv(Numerator, D));
- Remainder = SE.getConstant(srem(Numerator, D));
- return;
- }
- }
-};
-
-struct SCEVUDivision : public SCEVDivision<SCEVUDivision> {
- SCEVUDivision(ScalarEvolution &S, const SCEV *Numerator,
- const SCEV *Denominator)
- : SCEVDivision(S, Numerator, Denominator) {}
-
- void visitConstant(const SCEVConstant *Numerator) {
- if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
- Quotient = SE.getConstant(udiv(Numerator, D));
- Remainder = SE.getConstant(urem(Numerator, D));
- return;
- }
- }
};
}
@@ -1215,6 +1148,183 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
return S;
}
+// Get the limit of a recurrence such that incrementing by Step cannot cause
+// signed overflow as long as the value of the recurrence within the
+// loop does not exceed this limit before incrementing.
+static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step,
+ ICmpInst::Predicate *Pred,
+ ScalarEvolution *SE) {
+ unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
+ if (SE->isKnownPositive(Step)) {
+ *Pred = ICmpInst::ICMP_SLT;
+ return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
+ SE->getSignedRange(Step).getSignedMax());
+ }
+ if (SE->isKnownNegative(Step)) {
+ *Pred = ICmpInst::ICMP_SGT;
+ return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
+ SE->getSignedRange(Step).getSignedMin());
+ }
+ return nullptr;
+}
+
+// Get the limit of a recurrence such that incrementing by Step cannot cause
+// unsigned overflow as long as the value of the recurrence within the loop does
+// not exceed this limit before incrementing.
+static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step,
+ ICmpInst::Predicate *Pred,
+ ScalarEvolution *SE) {
+ unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
+ *Pred = ICmpInst::ICMP_ULT;
+
+ return SE->getConstant(APInt::getMinValue(BitWidth) -
+ SE->getUnsignedRange(Step).getUnsignedMax());
+}
+
+namespace {
+
+struct ExtendOpTraitsBase {
+ typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *);
+};
+
+// Used to make code generic over signed and unsigned overflow.
+template <typename ExtendOp> struct ExtendOpTraits {
+ // Members present:
+ //
+ // static const SCEV::NoWrapFlags WrapType;
+ //
+ // static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr;
+ //
+ // static const SCEV *getOverflowLimitForStep(const SCEV *Step,
+ // ICmpInst::Predicate *Pred,
+ // ScalarEvolution *SE);
+};
+
+template <>
+struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase {
+ static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW;
+
+ static const GetExtendExprTy GetExtendExpr;
+
+ static const SCEV *getOverflowLimitForStep(const SCEV *Step,
+ ICmpInst::Predicate *Pred,
+ ScalarEvolution *SE) {
+ return getSignedOverflowLimitForStep(Step, Pred, SE);
+ }
+};
+
+const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
+ SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr;
+
+template <>
+struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
+ static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW;
+
+ static const GetExtendExprTy GetExtendExpr;
+
+ static const SCEV *getOverflowLimitForStep(const SCEV *Step,
+ ICmpInst::Predicate *Pred,
+ ScalarEvolution *SE) {
+ return getUnsignedOverflowLimitForStep(Step, Pred, SE);
+ }
+};
+
+const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
+ SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr;
+}
+
+// The recurrence AR has been shown to have no signed/unsigned wrap or something
+// close to it. Typically, if we can prove NSW/NUW for AR, then we can just as
+// easily prove NSW/NUW for its preincrement or postincrement sibling. This
+// allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step +
+// Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the
+// expression "Step + sext/zext(PreIncAR)" is congruent with
+// "sext/zext(PostIncAR)"
+template <typename ExtendOpTy>
+static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
+ ScalarEvolution *SE) {
+ auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
+ auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
+
+ const Loop *L = AR->getLoop();
+ const SCEV *Start = AR->getStart();
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+
+ // Check for a simple looking step prior to loop entry.
+ const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
+ if (!SA)
+ return nullptr;
+
+ // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
+ // subtraction is expensive. For this purpose, perform a quick and dirty
+ // difference, by checking for Step in the operand list.
+ SmallVector<const SCEV *, 4> DiffOps;
+ for (const SCEV *Op : SA->operands())
+ if (Op != Step)
+ DiffOps.push_back(Op);
+
+ if (DiffOps.size() == SA->getNumOperands())
+ return nullptr;
+
+ // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` +
+ // `Step`:
+
+ // 1. NSW/NUW flags on the step increment.
+ const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags());
+ const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
+ SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
+
+ // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies
+ // "S+X does not sign/unsign-overflow".
+ //
+
+ const SCEV *BECount = SE->getBackedgeTakenCount(L);
+ if (PreAR && PreAR->getNoWrapFlags(WrapType) &&
+ !isa<SCEVCouldNotCompute>(BECount) && SE->isKnownPositive(BECount))
+ return PreStart;
+
+ // 2. Direct overflow check on the step operation's expression.
+ unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
+ Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
+ const SCEV *OperandExtendedStart =
+ SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy),
+ (SE->*GetExtendExpr)(Step, WideTy));
+ if ((SE->*GetExtendExpr)(Start, WideTy) == OperandExtendedStart) {
+ if (PreAR && AR->getNoWrapFlags(WrapType)) {
+ // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
+ // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
+ // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`. Cache this fact.
+ const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType);
+ }
+ return PreStart;
+ }
+
+ // 3. Loop precondition.
+ ICmpInst::Predicate Pred;
+ const SCEV *OverflowLimit =
+ ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);
+
+ if (OverflowLimit &&
+ SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
+ return PreStart;
+ }
+ return nullptr;
+}
+
+// Get the normalized zero or sign extended expression for this AddRec's Start.
+template <typename ExtendOpTy>
+static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty,
+ ScalarEvolution *SE) {
+ auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
+
+ const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE);
+ if (!PreStart)
+ return (SE->*GetExtendExpr)(AR->getStart(), Ty);
+
+ return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty),
+ (SE->*GetExtendExpr)(PreStart, Ty));
+}
+
const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
@@ -1268,9 +1378,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
// If we have special knowledge that this addrec won't overflow,
// we don't need to do any further analysis.
if (AR->getNoWrapFlags(SCEV::FlagNUW))
- return getAddRecExpr(getZeroExtendExpr(Start, Ty),
- getZeroExtendExpr(Step, Ty),
- L, AR->getNoWrapFlags());
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+ getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
// Check whether the backedge-taken count is SCEVCouldNotCompute.
// Note that this serves two purposes: It filters out loops that are
@@ -1307,9 +1417,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
// Cache knowledge of AR NUW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
// Return the expression with the addrec on the outside.
- return getAddRecExpr(getZeroExtendExpr(Start, Ty),
- getZeroExtendExpr(Step, Ty),
- L, AR->getNoWrapFlags());
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+ getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
}
// Similar to above, only this time treat the step value as signed.
// This covers loops that count down.
@@ -1322,9 +1432,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
// Negative step causes unsigned wrap, but it still can't self-wrap.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
// Return the expression with the addrec on the outside.
- return getAddRecExpr(getZeroExtendExpr(Start, Ty),
- getSignExtendExpr(Step, Ty),
- L, AR->getNoWrapFlags());
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+ getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
}
}
@@ -1342,9 +1452,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
// Cache knowledge of AR NUW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
// Return the expression with the addrec on the outside.
- return getAddRecExpr(getZeroExtendExpr(Start, Ty),
- getZeroExtendExpr(Step, Ty),
- L, AR->getNoWrapFlags());
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+ getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
}
} else if (isKnownNegative(Step)) {
const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
@@ -1357,9 +1467,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
// Negative step causes unsigned wrap, but it still can't self-wrap.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
// Return the expression with the addrec on the outside.
- return getAddRecExpr(getZeroExtendExpr(Start, Ty),
- getSignExtendExpr(Step, Ty),
- L, AR->getNoWrapFlags());
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+ getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
}
}
}
@@ -1374,104 +1484,6 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
return S;
}
-// Get the limit of a recurrence such that incrementing by Step cannot cause
-// signed overflow as long as the value of the recurrence within the loop does
-// not exceed this limit before incrementing.
-static const SCEV *getOverflowLimitForStep(const SCEV *Step,
- ICmpInst::Predicate *Pred,
- ScalarEvolution *SE) {
- unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
- if (SE->isKnownPositive(Step)) {
- *Pred = ICmpInst::ICMP_SLT;
- return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
- SE->getSignedRange(Step).getSignedMax());
- }
- if (SE->isKnownNegative(Step)) {
- *Pred = ICmpInst::ICMP_SGT;
- return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
- SE->getSignedRange(Step).getSignedMin());
- }
- return nullptr;
-}
-
-// The recurrence AR has been shown to have no signed wrap. Typically, if we can
-// prove NSW for AR, then we can just as easily prove NSW for its preincrement
-// or postincrement sibling. This allows normalizing a sign extended AddRec as
-// such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a
-// result, the expression "Step + sext(PreIncAR)" is congruent with
-// "sext(PostIncAR)"
-static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
- Type *Ty,
- ScalarEvolution *SE) {
- const Loop *L = AR->getLoop();
- const SCEV *Start = AR->getStart();
- const SCEV *Step = AR->getStepRecurrence(*SE);
-
- // Check for a simple looking step prior to loop entry.
- const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
- if (!SA)
- return nullptr;
-
- // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
- // subtraction is expensive. For this purpose, perform a quick and dirty
- // difference, by checking for Step in the operand list.
- SmallVector<const SCEV *, 4> DiffOps;
- for (const SCEV *Op : SA->operands())
- if (Op != Step)
- DiffOps.push_back(Op);
-
- if (DiffOps.size() == SA->getNumOperands())
- return nullptr;
-
- // This is a postinc AR. Check for overflow on the preinc recurrence using the
- // same three conditions that getSignExtendedExpr checks.
-
- // 1. NSW flags on the step increment.
- const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags());
- const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
- SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
-
- if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW))
- return PreStart;
-
- // 2. Direct overflow check on the step operation's expression.
- unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
- Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
- const SCEV *OperandExtendedStart =
- SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy),
- SE->getSignExtendExpr(Step, WideTy));
- if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) {
- // Cache knowledge of PreAR NSW.
- if (PreAR)
- const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW);
- // FIXME: this optimization needs a unit test
- DEBUG(dbgs() << "SCEV: untested prestart overflow check\n");
- return PreStart;
- }
-
- // 3. Loop precondition.
- ICmpInst::Predicate Pred;
- const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE);
-
- if (OverflowLimit &&
- SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
- return PreStart;
- }
- return nullptr;
-}
-
-// Get the normalized sign-extended expression for this AddRec's Start.
-static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR,
- Type *Ty,
- ScalarEvolution *SE) {
- const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE);
- if (!PreStart)
- return SE->getSignExtendExpr(AR->getStart(), Ty);
-
- return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty),
- SE->getSignExtendExpr(PreStart, Ty));
-}
-
const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
@@ -1550,9 +1562,9 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// If we have special knowledge that this addrec won't overflow,
// we don't need to do any further analysis.
if (AR->getNoWrapFlags(SCEV::FlagNSW))
- return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
- getSignExtendExpr(Step, Ty),
- L, SCEV::FlagNSW);
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+ getSignExtendExpr(Step, Ty), L, SCEV::FlagNSW);
// Check whether the backedge-taken count is SCEVCouldNotCompute.
// Note that this serves two purposes: It filters out loops that are
@@ -1589,9 +1601,9 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// Cache knowledge of AR NSW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
// Return the expression with the addrec on the outside.
- return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
- getSignExtendExpr(Step, Ty),
- L, AR->getNoWrapFlags());
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+ getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
}
// Similar to above, only this time treat the step value as unsigned.
// This covers loops that count up with an unsigned step.
@@ -1600,12 +1612,20 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
getMulExpr(WideMaxBECount,
getZeroExtendExpr(Step, WideTy)));
if (SAdd == OperandExtendedAdd) {
- // Cache knowledge of AR NSW, which is propagated to this AddRec.
- const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+ // If AR wraps around then
+ //
+ // abs(Step) * MaxBECount > unsigned-max(AR->getType())
+ // => SAdd != OperandExtendedAdd
+ //
+ // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=>
+ // (SAdd == OperandExtendedAdd => AR is NW)
+
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
+
// Return the expression with the addrec on the outside.
- return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
- getZeroExtendExpr(Step, Ty),
- L, AR->getNoWrapFlags());
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+ getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
}
}
@@ -1614,7 +1634,8 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// with the start value and the backedge is guarded by a comparison
// with the post-inc value, the addrec is safe.
ICmpInst::Predicate Pred;
- const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this);
+ const SCEV *OverflowLimit =
+ getSignedOverflowLimitForStep(Step, &Pred, this);
if (OverflowLimit &&
(isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
(isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
@@ -1622,9 +1643,9 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
OverflowLimit)))) {
// Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
- return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
- getSignExtendExpr(Step, Ty),
- L, AR->getNoWrapFlags());
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+ getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
}
}
// If Start and Step are constants, check if we can apply this
@@ -1804,6 +1825,36 @@ namespace {
};
}
+// We're trying to construct a SCEV of type `Type' with `Ops' as operands and
+// `OldFlags' as can't-wrap behavior. Infer a more aggressive set of
+// can't-overflow flags for the operation if possible.
+static SCEV::NoWrapFlags
+StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
+ const SmallVectorImpl<const SCEV *> &Ops,
+ SCEV::NoWrapFlags OldFlags) {
+ using namespace std::placeholders;
+
+ bool CanAnalyze =
+ Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr;
+ (void)CanAnalyze;
+ assert(CanAnalyze && "don't call from other places!");
+
+ int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+ SCEV::NoWrapFlags SignOrUnsignWrap =
+ ScalarEvolution::maskFlags(OldFlags, SignOrUnsignMask);
+
+ // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
+ auto IsKnownNonNegative =
+ std::bind(std::mem_fn(&ScalarEvolution::isKnownNonNegative), SE, _1);
+
+ if (SignOrUnsignWrap == SCEV::FlagNSW &&
+ std::all_of(Ops.begin(), Ops.end(), IsKnownNonNegative))
+ return ScalarEvolution::setFlags(OldFlags,
+ (SCEV::NoWrapFlags)SignOrUnsignMask);
+
+ return OldFlags;
+}
+
/// getAddExpr - Get a canonical add expression, or something simpler if
/// possible.
const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
@@ -1819,20 +1870,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
"SCEVAddExpr operand types don't match!");
#endif
- // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
- // And vice-versa.
- int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
- SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
- if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
- bool All = true;
- for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
- E = Ops.end(); I != E; ++I)
- if (!isKnownNonNegative(*I)) {
- All = false;
- break;
- }
- if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
- }
+ Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
// Sort by complexity, this groups all similar expression types together.
GroupByComplexity(Ops, LI);
@@ -2207,6 +2245,24 @@ static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
return r;
}
+/// Determine if any of the operands in this SCEV are a constant or if
+/// any of the add or multiply expressions in this SCEV contain a constant.
+static bool containsConstantSomewhere(const SCEV *StartExpr) {
+ SmallVector<const SCEV *, 4> Ops;
+ Ops.push_back(StartExpr);
+ while (!Ops.empty()) {
+ const SCEV *CurrentExpr = Ops.pop_back_val();
+ if (isa<SCEVConstant>(*CurrentExpr))
+ return true;
+
+ if (isa<SCEVAddExpr>(*CurrentExpr) || isa<SCEVMulExpr>(*CurrentExpr)) {
+ const auto *CurrentNAry = cast<SCEVNAryExpr>(CurrentExpr);
+ Ops.append(CurrentNAry->op_begin(), CurrentNAry->op_end());
+ }
+ }
+ return false;
+}
+
/// getMulExpr - Get a canonical multiply expression, or something simpler if
/// possible.
const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
@@ -2222,20 +2278,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
"SCEVMulExpr operand types don't match!");
#endif
- // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
- // And vice-versa.
- int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
- SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
- if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
- bool All = true;
- for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
- E = Ops.end(); I != E; ++I)
- if (!isKnownNonNegative(*I)) {
- All = false;
- break;
- }
- if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
- }
+ Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
// Sort by complexity, this groups all similar expression types together.
GroupByComplexity(Ops, LI);
@@ -2246,11 +2289,13 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// C1*(C2+V) -> C1*C2 + C1*V
if (Ops.size() == 2)
- if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
- if (Add->getNumOperands() == 2 &&
- isa<SCEVConstant>(Add->getOperand(0)))
- return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
- getMulExpr(LHSC, Add->getOperand(1)));
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
+ // If any of Add's ops are Adds or Muls with a constant,
+ // apply this transformation as well.
+ if (Add->getNumOperands() == 2)
+ if (containsConstantSomewhere(Add))
+ return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
+ getMulExpr(LHSC, Add->getOperand(1)));
++Idx;
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
@@ -2699,20 +2744,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
// meaningful BE count at this point (and if we don't, we'd be stuck
// with a SCEVCouldNotCompute as the cached BE count).
- // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
- // And vice-versa.
- int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
- SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
- if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
- bool All = true;
- for (SmallVectorImpl<const SCEV *>::const_iterator I = Operands.begin(),
- E = Operands.end(); I != E; ++I)
- if (!isKnownNonNegative(*I)) {
- All = false;
- break;
- }
- if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
- }
+ Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags);
// Canonicalize nested AddRecs in by nesting them in order of loop depth.
if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
@@ -3209,8 +3241,9 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
if (LHS == RHS)
return getConstant(LHS->getType(), 0);
- // X - Y --> X + -Y
- return getAddExpr(LHS, getNegativeSCEV(RHS), Flags);
+ // X - Y --> X + -Y.
+ // X -(nsw || nuw) Y --> X + -Y.
+ return getAddExpr(LHS, getNegativeSCEV(RHS));
}
/// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
@@ -3516,12 +3549,10 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
Flags = setFlags(Flags, SCEV::FlagNUW);
}
- } else if (const SubOperator *OBO =
- dyn_cast<SubOperator>(BEValueV)) {
- if (OBO->hasNoUnsignedWrap())
- Flags = setFlags(Flags, SCEV::FlagNUW);
- if (OBO->hasNoSignedWrap())
- Flags = setFlags(Flags, SCEV::FlagNSW);
+
+ // We cannot transfer nuw and nsw flags from subtraction
+ // operations -- sub nuw X, Y is not the same as add nuw X, -Y
+ // for instance.
}
const SCEV *StartVal = getSCEV(StartValueV);
@@ -3577,7 +3608,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
// PHI's incoming blocks are in a different loop, in which case doing so
// risks breaking LCSSA form. Instcombine would normally zap these, but
// it doesn't have DominatorTree information, so it may miss cases.
- if (Value *V = SimplifyInstruction(PN, DL, TLI, DT, AT))
+ if (Value *V = SimplifyInstruction(PN, DL, TLI, DT, AC))
if (LI->replacementPreservesLCSSAForm(PN, V))
return getSCEV(V);
@@ -3709,7 +3740,7 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
// For a SCEVUnknown, ask ValueTracking.
unsigned BitWidth = getTypeSizeInBits(U->getType());
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AT, nullptr, DT);
+ computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT);
return Zeros.countTrailingOnes();
}
@@ -3729,8 +3760,10 @@ static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
assert(NumRanges >= 1);
for (unsigned i = 0; i < NumRanges; ++i) {
- ConstantInt *Lower = cast<ConstantInt>(MD->getOperand(2*i + 0));
- ConstantInt *Upper = cast<ConstantInt>(MD->getOperand(2*i + 1));
+ ConstantInt *Lower =
+ mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 0));
+ ConstantInt *Upper =
+ mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 1));
ConstantRange Range(Lower->getValue(), Upper->getValue());
TotalRange = TotalRange.unionWith(Range);
}
@@ -3878,7 +3911,7 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
// For a SCEVUnknown, ask ValueTracking.
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AT, nullptr, DT);
+ computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT);
if (Ones == ~Zeros + 1)
return setUnsignedRange(U, ConservativeResult);
return setUnsignedRange(U,
@@ -4035,7 +4068,7 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
// For a SCEVUnknown, ask ValueTracking.
if (!U->getValue()->getType()->isIntegerTy() && !DL)
return setSignedRange(U, ConservativeResult);
- unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AT, nullptr, DT);
+ unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT);
if (NS <= 1)
return setSignedRange(U, ConservativeResult);
return setSignedRange(U, ConservativeResult.intersectWith(
@@ -4142,8 +4175,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
unsigned TZ = A.countTrailingZeros();
unsigned BitWidth = A.getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL,
- 0, AT, nullptr, DT);
+ computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL, 0, AC,
+ nullptr, DT);
APInt EffectiveMask =
APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
@@ -4334,9 +4367,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
case ICmpInst::ICMP_SGE:
// a >s b ? a+x : b+x -> smax(a, b)+x
// a >s b ? b+x : a+x -> smin(a, b)+x
- if (LHS->getType() == U->getType()) {
- const SCEV *LS = getSCEV(LHS);
- const SCEV *RS = getSCEV(RHS);
+ if (getTypeSizeInBits(LHS->getType()) <=
+ getTypeSizeInBits(U->getType())) {
+ const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), U->getType());
+ const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), U->getType());
const SCEV *LA = getSCEV(U->getOperand(1));
const SCEV *RA = getSCEV(U->getOperand(2));
const SCEV *LDiff = getMinusSCEV(LA, LS);
@@ -4357,9 +4391,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
case ICmpInst::ICMP_UGE:
// a >u b ? a+x : b+x -> umax(a, b)+x
// a >u b ? b+x : a+x -> umin(a, b)+x
- if (LHS->getType() == U->getType()) {
- const SCEV *LS = getSCEV(LHS);
- const SCEV *RS = getSCEV(RHS);
+ if (getTypeSizeInBits(LHS->getType()) <=
+ getTypeSizeInBits(U->getType())) {
+ const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
+ const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), U->getType());
const SCEV *LA = getSCEV(U->getOperand(1));
const SCEV *RA = getSCEV(U->getOperand(2));
const SCEV *LDiff = getMinusSCEV(LA, LS);
@@ -4374,11 +4409,11 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
break;
case ICmpInst::ICMP_NE:
// n != 0 ? n+x : 1+x -> umax(n, 1)+x
- if (LHS->getType() == U->getType() &&
- isa<ConstantInt>(RHS) &&
- cast<ConstantInt>(RHS)->isZero()) {
- const SCEV *One = getConstant(LHS->getType(), 1);
- const SCEV *LS = getSCEV(LHS);
+ if (getTypeSizeInBits(LHS->getType()) <=
+ getTypeSizeInBits(U->getType()) &&
+ isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
+ const SCEV *One = getConstant(U->getType(), 1);
+ const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
const SCEV *LA = getSCEV(U->getOperand(1));
const SCEV *RA = getSCEV(U->getOperand(2));
const SCEV *LDiff = getMinusSCEV(LA, LS);
@@ -4389,11 +4424,11 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
break;
case ICmpInst::ICMP_EQ:
// n == 0 ? 1+x : n+x -> umax(n, 1)+x
- if (LHS->getType() == U->getType() &&
- isa<ConstantInt>(RHS) &&
- cast<ConstantInt>(RHS)->isZero()) {
- const SCEV *One = getConstant(LHS->getType(), 1);
- const SCEV *LS = getSCEV(LHS);
+ if (getTypeSizeInBits(LHS->getType()) <=
+ getTypeSizeInBits(U->getType()) &&
+ isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
+ const SCEV *One = getConstant(U->getType(), 1);
+ const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
const SCEV *LA = getSCEV(U->getOperand(1));
const SCEV *RA = getSCEV(U->getOperand(2));
const SCEV *LDiff = getMinusSCEV(LA, One);
@@ -6138,15 +6173,18 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
return ExitLimit(Distance, MaxBECount);
}
- // If the step exactly divides the distance then unsigned divide computes the
- // backedge count.
- const SCEV *Q, *R;
- ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
- SCEVUDivision::divide(SE, Distance, Step, &Q, &R);
- if (R->isZero()) {
- const SCEV *Exact =
- getUDivExactExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
- return ExitLimit(Exact, Exact);
+ // As a special case, handle the instance where Step is a positive power of
+ // two. In this case, determining whether Step divides Distance evenly can be
+ // done by counting and comparing the number of trailing zeros of Step and
+ // Distance.
+ if (!CountDown) {
+ const APInt &StepV = StepC->getValue()->getValue();
+ // StepV.isPowerOf2() returns true if StepV is an positive power of two. It
+ // also returns true if StepV is maximally negative (eg, INT_MIN), but that
+ // case is not handled as this code is guarded by !CountDown.
+ if (StepV.isPowerOf2() &&
+ GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros())
+ return getUDivExactExpr(Distance, Step);
}
// If the condition controls loop exit (the loop exits only if the expression
@@ -6671,7 +6709,10 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
return true;
// Check conditions due to any @llvm.assume intrinsics.
- for (auto &CI : AT->assumptions(F)) {
+ for (auto &AssumeVH : AC->assumptions()) {
+ if (!AssumeVH)
+ continue;
+ auto *CI = cast<CallInst>(AssumeVH);
if (!DT->dominates(CI, Latch->getTerminator()))
continue;
@@ -6716,7 +6757,10 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
}
// Check conditions due to any @llvm.assume intrinsics.
- for (auto &CI : AT->assumptions(F)) {
+ for (auto &AssumeVH : AC->assumptions()) {
+ if (!AssumeVH)
+ continue;
+ auto *CI = cast<CallInst>(AssumeVH);
if (!DT->dominates(CI, L->getHeader()))
continue;
@@ -6927,6 +6971,85 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
getNotSCEV(FoundLHS));
}
+
+/// If Expr computes ~A, return A else return nullptr
+static const SCEV *MatchNotExpr(const SCEV *Expr) {
+ const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
+ if (!Add || Add->getNumOperands() != 2) return nullptr;
+
+ const SCEVConstant *AddLHS = dyn_cast<SCEVConstant>(Add->getOperand(0));
+ if (!(AddLHS && AddLHS->getValue()->getValue().isAllOnesValue()))
+ return nullptr;
+
+ const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
+ if (!AddRHS || AddRHS->getNumOperands() != 2) return nullptr;
+
+ const SCEVConstant *MulLHS = dyn_cast<SCEVConstant>(AddRHS->getOperand(0));
+ if (!(MulLHS && MulLHS->getValue()->getValue().isAllOnesValue()))
+ return nullptr;
+
+ return AddRHS->getOperand(1);
+}
+
+
+/// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values?
+template<typename MaxExprType>
+static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr,
+ const SCEV *Candidate) {
+ const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr);
+ if (!MaxExpr) return false;
+
+ auto It = std::find(MaxExpr->op_begin(), MaxExpr->op_end(), Candidate);
+ return It != MaxExpr->op_end();
+}
+
+
+/// Is MaybeMinExpr an SMin or UMin of Candidate and some other values?
+template<typename MaxExprType>
+static bool IsMinConsistingOf(ScalarEvolution &SE,
+ const SCEV *MaybeMinExpr,
+ const SCEV *Candidate) {
+ const SCEV *MaybeMaxExpr = MatchNotExpr(MaybeMinExpr);
+ if (!MaybeMaxExpr)
+ return false;
+
+ return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate));
+}
+
+
+/// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max
+/// expression?
+static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
+ ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+ switch (Pred) {
+ default:
+ return false;
+
+ case ICmpInst::ICMP_SGE:
+ std::swap(LHS, RHS);
+ // fall through
+ case ICmpInst::ICMP_SLE:
+ return
+ // min(A, ...) <= A
+ IsMinConsistingOf<SCEVSMaxExpr>(SE, LHS, RHS) ||
+ // A <= max(A, ...)
+ IsMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
+
+ case ICmpInst::ICMP_UGE:
+ std::swap(LHS, RHS);
+ // fall through
+ case ICmpInst::ICMP_ULE:
+ return
+ // min(A, ...) <= A
+ IsMinConsistingOf<SCEVUMaxExpr>(SE, LHS, RHS) ||
+ // A <= max(A, ...)
+ IsMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
+ }
+
+ llvm_unreachable("covered switch fell through?!");
+}
+
/// isImpliedCondOperandsHelper - Test whether the condition described by
/// Pred, LHS, and RHS is true whenever the condition described by Pred,
/// FoundLHS, and FoundRHS is true.
@@ -6935,6 +7058,12 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS,
const SCEV *FoundRHS) {
+ auto IsKnownPredicateFull =
+ [this](ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) {
+ return isKnownPredicateWithRanges(Pred, LHS, RHS) ||
+ IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS);
+ };
+
switch (Pred) {
default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
case ICmpInst::ICMP_EQ:
@@ -6944,26 +7073,26 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
break;
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- if (isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
- isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, RHS, FoundRHS))
+ if (IsKnownPredicateFull(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
+ IsKnownPredicateFull(ICmpInst::ICMP_SGE, RHS, FoundRHS))
return true;
break;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- if (isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
- isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, RHS, FoundRHS))
+ if (IsKnownPredicateFull(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
+ IsKnownPredicateFull(ICmpInst::ICMP_SLE, RHS, FoundRHS))
return true;
break;
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
- if (isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
- isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, RHS, FoundRHS))
+ if (IsKnownPredicateFull(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
+ IsKnownPredicateFull(ICmpInst::ICMP_UGE, RHS, FoundRHS))
return true;
break;
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
- if (isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
- isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, RHS, FoundRHS))
+ if (IsKnownPredicateFull(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
+ IsKnownPredicateFull(ICmpInst::ICMP_ULE, RHS, FoundRHS))
return true;
break;
}
@@ -6971,8 +7100,8 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
return false;
}
-// Verify if an linear IV with positive stride can overflow when in a
-// less-than comparison, knowing the invariant term of the comparison, the
+// Verify if an linear IV with positive stride can overflow when in a
+// less-than comparison, knowing the invariant term of the comparison, the
// stride and the knowledge of NSW/NUW flags on the recurrence.
bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
bool IsSigned, bool NoWrap) {
@@ -7000,7 +7129,7 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
return (MaxValue - MaxStrideMinusOne).ult(MaxRHS);
}
-// Verify if an linear IV with negative stride can overflow when in a
+// Verify if an linear IV with negative stride can overflow when in a
// greater-than comparison, knowing the invariant term of the comparison,
// the stride and the knowledge of NSW/NUW flags on the recurrence.
bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
@@ -7031,7 +7160,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
// Compute the backedge taken count knowing the interval difference, the
// stride and presence of the equality in the comparison.
-const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
+const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
bool Equality) {
const SCEV *One = getConstant(Step->getType(), 1);
Delta = Equality ? getAddExpr(Delta, Step)
@@ -7071,7 +7200,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
// Avoid proven overflow cases: this will ensure that the backedge taken count
// will not generate any unsigned overflow. Relaxed no-overflow conditions
- // exploit NoWrapFlags, allowing to optimize in presence of undefined
+ // exploit NoWrapFlags, allowing to optimize in presence of undefined
// behaviors like the case of C language.
if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
return getCouldNotCompute();
@@ -7151,7 +7280,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
// Avoid proven overflow cases: this will ensure that the backedge taken count
// will not generate any unsigned overflow. Relaxed no-overflow conditions
- // exploit NoWrapFlags, allowing to optimize in presence of undefined
+ // exploit NoWrapFlags, allowing to optimize in presence of undefined
// behaviors like the case of C language.
if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap))
return getCouldNotCompute();
@@ -7199,7 +7328,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
if (isa<SCEVConstant>(BECount))
MaxBECount = BECount;
else
- MaxBECount = computeBECount(getConstant(MaxStart - MinEnd),
+ MaxBECount = computeBECount(getConstant(MaxStart - MinEnd),
getConstant(MinStride), false);
if (isa<SCEVCouldNotCompute>(MaxBECount))
@@ -7457,7 +7586,7 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE,
for (const SCEV *&Term : Terms) {
// Normalize the terms before the next call to findArrayDimensionsRec.
const SCEV *Q, *R;
- SCEVSDivision::divide(SE, Term, Step, &Q, &R);
+ SCEVDivision::divide(SE, Term, Step, &Q, &R);
// Bail out when GCD does not evenly divide one of the terms.
if (!R->isZero())
@@ -7594,7 +7723,7 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
// Divide all terms by the element size.
for (const SCEV *&Term : Terms) {
const SCEV *Q, *R;
- SCEVSDivision::divide(SE, Term, ElementSize, &Q, &R);
+ SCEVDivision::divide(SE, Term, ElementSize, &Q, &R);
Term = Q;
}
@@ -7641,7 +7770,7 @@ void SCEVAddRecExpr::computeAccessFunctions(
int Last = Sizes.size() - 1;
for (int i = Last; i >= 0; i--) {
const SCEV *Q, *R;
- SCEVSDivision::divide(SE, Res, Sizes[i], &Q, &R);
+ SCEVDivision::divide(SE, Res, Sizes[i], &Q, &R);
DEBUG({
dbgs() << "Res: " << *Res << "\n";
@@ -7825,11 +7954,11 @@ ScalarEvolution::ScalarEvolution()
bool ScalarEvolution::runOnFunction(Function &F) {
this->F = &F;
- AT = &getAnalysis<AssumptionTracker>();
- LI = &getAnalysis<LoopInfo>();
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
- TLI = &getAnalysis<TargetLibraryInfo>();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
return false;
}
@@ -7866,10 +7995,10 @@ void ScalarEvolution::releaseMemory() {
void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequired<AssumptionTracker>();
- AU.addRequiredTransitive<LoopInfo>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequiredTransitive<LoopInfoWrapperPass>();
AU.addRequiredTransitive<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
@@ -7960,17 +8089,17 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
ScalarEvolution::LoopDisposition
ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
- SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values = LoopDispositions[S];
- for (unsigned u = 0; u < Values.size(); u++) {
- if (Values[u].first == L)
- return Values[u].second;
+ auto &Values = LoopDispositions[S];
+ for (auto &V : Values) {
+ if (V.getPointer() == L)
+ return V.getInt();
}
- Values.push_back(std::make_pair(L, LoopVariant));
+ Values.emplace_back(L, LoopVariant);
LoopDisposition D = computeLoopDisposition(S, L);
- SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values2 = LoopDispositions[S];
- for (unsigned u = Values2.size(); u > 0; u--) {
- if (Values2[u - 1].first == L) {
- Values2[u - 1].second = D;
+ auto &Values2 = LoopDispositions[S];
+ for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
+ if (V.getPointer() == L) {
+ V.setInt(D);
break;
}
}
@@ -8066,17 +8195,17 @@ bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
ScalarEvolution::BlockDisposition
ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
- SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values = BlockDispositions[S];
- for (unsigned u = 0; u < Values.size(); u++) {
- if (Values[u].first == BB)
- return Values[u].second;
+ auto &Values = BlockDispositions[S];
+ for (auto &V : Values) {
+ if (V.getPointer() == BB)
+ return V.getInt();
}
- Values.push_back(std::make_pair(BB, DoesNotDominateBlock));
+ Values.emplace_back(BB, DoesNotDominateBlock);
BlockDisposition D = computeBlockDisposition(S, BB);
- SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values2 = BlockDispositions[S];
- for (unsigned u = Values2.size(); u > 0; u--) {
- if (Values2[u - 1].first == BB) {
- Values2[u - 1].second = D;
+ auto &Values2 = BlockDispositions[S];
+ for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
+ if (V.getPointer() == BB) {
+ V.setInt(D);
break;
}
}
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index bee3685..2625cf3 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1063,6 +1063,34 @@ static bool canBeCheaplyTransformed(ScalarEvolution &SE,
return false;
}
+static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
+ if (!isa<IntegerType>(AR->getType()))
+ return false;
+
+ unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
+ Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy),
+ SE.getSignExtendExpr(AR, WideTy));
+ const SCEV *ExtendAfterOp =
+ SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy);
+ return ExtendAfterOp == OpAfterExtend;
+}
+
+static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
+ if (!isa<IntegerType>(AR->getType()))
+ return false;
+
+ unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
+ Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy),
+ SE.getZeroExtendExpr(AR, WideTy));
+ const SCEV *ExtendAfterOp =
+ SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy);
+ return ExtendAfterOp == OpAfterExtend;
+}
+
/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
/// the base addrec, which is the addrec without any non-loop-dominating
/// values, and return the PHI.
@@ -1188,6 +1216,12 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// Expand the step somewhere that dominates the loop header.
Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+ // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
+ // we actually do emit an addition. It does not apply if we emit a
+ // subtraction.
+ bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized);
+ bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized);
+
// Create the PHI.
BasicBlock *Header = L->getHeader();
Builder.SetInsertPoint(Header, Header->begin());
@@ -1213,10 +1247,11 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
IVIncInsertPos : Pred->getTerminator();
Builder.SetInsertPoint(InsertPos);
Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+
if (isa<OverflowingBinaryOperator>(IncV)) {
- if (Normalized->getNoWrapFlags(SCEV::FlagNUW))
+ if (IncrementIsNUW)
cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap();
- if (Normalized->getNoWrapFlags(SCEV::FlagNSW))
+ if (IncrementIsNSW)
cast<BinaryOperator>(IncV)->setHasNoSignedWrap();
}
PN->addIncoming(IncV, Pred);
@@ -1711,7 +1746,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
// Fold constant phis. They may be congruent to other constant phis and
// would confuse the logic below that expects proper IVs.
- if (Value *V = SimplifyInstruction(Phi, SE.DL, SE.TLI, SE.DT, SE.AT)) {
+ if (Value *V = SimplifyInstruction(Phi, SE.DL, SE.TLI, SE.DT, SE.AC)) {
Phi->replaceAllUsesWith(V);
DeadInsts.push_back(Phi);
++NumElim;
diff --git a/lib/Analysis/ScopedNoAliasAA.cpp b/lib/Analysis/ScopedNoAliasAA.cpp
index f6c300a..c6ea3af 100644
--- a/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/lib/Analysis/ScopedNoAliasAA.cpp
@@ -33,8 +33,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp
new file mode 100644
index 0000000..91041fc
--- /dev/null
+++ b/lib/Analysis/TargetLibraryInfo.cpp
@@ -0,0 +1,810 @@
+//===-- TargetLibraryInfo.cpp - Runtime library information ----------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetLibraryInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/ADT/Triple.h"
+using namespace llvm;
+
+const char* TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] =
+ {
+ "_IO_getc",
+ "_IO_putc",
+ "_ZdaPv",
+ "_ZdaPvRKSt9nothrow_t",
+ "_ZdaPvj",
+ "_ZdaPvm",
+ "_ZdlPv",
+ "_ZdlPvRKSt9nothrow_t",
+ "_ZdlPvj",
+ "_ZdlPvm",
+ "_Znaj",
+ "_ZnajRKSt9nothrow_t",
+ "_Znam",
+ "_ZnamRKSt9nothrow_t",
+ "_Znwj",
+ "_ZnwjRKSt9nothrow_t",
+ "_Znwm",
+ "_ZnwmRKSt9nothrow_t",
+ "__cospi",
+ "__cospif",
+ "__cxa_atexit",
+ "__cxa_guard_abort",
+ "__cxa_guard_acquire",
+ "__cxa_guard_release",
+ "__isoc99_scanf",
+ "__isoc99_sscanf",
+ "__memcpy_chk",
+ "__memmove_chk",
+ "__memset_chk",
+ "__sincospi_stret",
+ "__sincospif_stret",
+ "__sinpi",
+ "__sinpif",
+ "__sqrt_finite",
+ "__sqrtf_finite",
+ "__sqrtl_finite",
+ "__stpcpy_chk",
+ "__stpncpy_chk",
+ "__strcpy_chk",
+ "__strdup",
+ "__strncpy_chk",
+ "__strndup",
+ "__strtok_r",
+ "abs",
+ "access",
+ "acos",
+ "acosf",
+ "acosh",
+ "acoshf",
+ "acoshl",
+ "acosl",
+ "asin",
+ "asinf",
+ "asinh",
+ "asinhf",
+ "asinhl",
+ "asinl",
+ "atan",
+ "atan2",
+ "atan2f",
+ "atan2l",
+ "atanf",
+ "atanh",
+ "atanhf",
+ "atanhl",
+ "atanl",
+ "atof",
+ "atoi",
+ "atol",
+ "atoll",
+ "bcmp",
+ "bcopy",
+ "bzero",
+ "calloc",
+ "cbrt",
+ "cbrtf",
+ "cbrtl",
+ "ceil",
+ "ceilf",
+ "ceill",
+ "chmod",
+ "chown",
+ "clearerr",
+ "closedir",
+ "copysign",
+ "copysignf",
+ "copysignl",
+ "cos",
+ "cosf",
+ "cosh",
+ "coshf",
+ "coshl",
+ "cosl",
+ "ctermid",
+ "exp",
+ "exp10",
+ "exp10f",
+ "exp10l",
+ "exp2",
+ "exp2f",
+ "exp2l",
+ "expf",
+ "expl",
+ "expm1",
+ "expm1f",
+ "expm1l",
+ "fabs",
+ "fabsf",
+ "fabsl",
+ "fclose",
+ "fdopen",
+ "feof",
+ "ferror",
+ "fflush",
+ "ffs",
+ "ffsl",
+ "ffsll",
+ "fgetc",
+ "fgetpos",
+ "fgets",
+ "fileno",
+ "fiprintf",
+ "flockfile",
+ "floor",
+ "floorf",
+ "floorl",
+ "fmax",
+ "fmaxf",
+ "fmaxl",
+ "fmin",
+ "fminf",
+ "fminl",
+ "fmod",
+ "fmodf",
+ "fmodl",
+ "fopen",
+ "fopen64",
+ "fprintf",
+ "fputc",
+ "fputs",
+ "fread",
+ "free",
+ "frexp",
+ "frexpf",
+ "frexpl",
+ "fscanf",
+ "fseek",
+ "fseeko",
+ "fseeko64",
+ "fsetpos",
+ "fstat",
+ "fstat64",
+ "fstatvfs",
+ "fstatvfs64",
+ "ftell",
+ "ftello",
+ "ftello64",
+ "ftrylockfile",
+ "funlockfile",
+ "fwrite",
+ "getc",
+ "getc_unlocked",
+ "getchar",
+ "getenv",
+ "getitimer",
+ "getlogin_r",
+ "getpwnam",
+ "gets",
+ "gettimeofday",
+ "htonl",
+ "htons",
+ "iprintf",
+ "isascii",
+ "isdigit",
+ "labs",
+ "lchown",
+ "ldexp",
+ "ldexpf",
+ "ldexpl",
+ "llabs",
+ "log",
+ "log10",
+ "log10f",
+ "log10l",
+ "log1p",
+ "log1pf",
+ "log1pl",
+ "log2",
+ "log2f",
+ "log2l",
+ "logb",
+ "logbf",
+ "logbl",
+ "logf",
+ "logl",
+ "lstat",
+ "lstat64",
+ "malloc",
+ "memalign",
+ "memccpy",
+ "memchr",
+ "memcmp",
+ "memcpy",
+ "memmove",
+ "memrchr",
+ "memset",
+ "memset_pattern16",
+ "mkdir",
+ "mktime",
+ "modf",
+ "modff",
+ "modfl",
+ "nearbyint",
+ "nearbyintf",
+ "nearbyintl",
+ "ntohl",
+ "ntohs",
+ "open",
+ "open64",
+ "opendir",
+ "pclose",
+ "perror",
+ "popen",
+ "posix_memalign",
+ "pow",
+ "powf",
+ "powl",
+ "pread",
+ "printf",
+ "putc",
+ "putchar",
+ "puts",
+ "pwrite",
+ "qsort",
+ "read",
+ "readlink",
+ "realloc",
+ "reallocf",
+ "realpath",
+ "remove",
+ "rename",
+ "rewind",
+ "rint",
+ "rintf",
+ "rintl",
+ "rmdir",
+ "round",
+ "roundf",
+ "roundl",
+ "scanf",
+ "setbuf",
+ "setitimer",
+ "setvbuf",
+ "sin",
+ "sinf",
+ "sinh",
+ "sinhf",
+ "sinhl",
+ "sinl",
+ "siprintf",
+ "snprintf",
+ "sprintf",
+ "sqrt",
+ "sqrtf",
+ "sqrtl",
+ "sscanf",
+ "stat",
+ "stat64",
+ "statvfs",
+ "statvfs64",
+ "stpcpy",
+ "stpncpy",
+ "strcasecmp",
+ "strcat",
+ "strchr",
+ "strcmp",
+ "strcoll",
+ "strcpy",
+ "strcspn",
+ "strdup",
+ "strlen",
+ "strncasecmp",
+ "strncat",
+ "strncmp",
+ "strncpy",
+ "strndup",
+ "strnlen",
+ "strpbrk",
+ "strrchr",
+ "strspn",
+ "strstr",
+ "strtod",
+ "strtof",
+ "strtok",
+ "strtok_r",
+ "strtol",
+ "strtold",
+ "strtoll",
+ "strtoul",
+ "strtoull",
+ "strxfrm",
+ "system",
+ "tan",
+ "tanf",
+ "tanh",
+ "tanhf",
+ "tanhl",
+ "tanl",
+ "times",
+ "tmpfile",
+ "tmpfile64",
+ "toascii",
+ "trunc",
+ "truncf",
+ "truncl",
+ "uname",
+ "ungetc",
+ "unlink",
+ "unsetenv",
+ "utime",
+ "utimes",
+ "valloc",
+ "vfprintf",
+ "vfscanf",
+ "vprintf",
+ "vscanf",
+ "vsnprintf",
+ "vsprintf",
+ "vsscanf",
+ "write"
+ };
+
+static bool hasSinCosPiStret(const Triple &T) {
+ // Only Darwin variants have _stret versions of combined trig functions.
+ if (!T.isOSDarwin())
+ return false;
+
+ // The ABI is rather complicated on x86, so don't do anything special there.
+ if (T.getArch() == Triple::x86)
+ return false;
+
+ if (T.isMacOSX() && T.isMacOSXVersionLT(10, 9))
+ return false;
+
+ if (T.isiOS() && T.isOSVersionLT(7, 0))
+ return false;
+
+ return true;
+}
+
+/// initialize - Initialize the set of available library functions based on the
+/// specified target triple. This should be carefully written so that a missing
+/// target triple gets a sane set of defaults.
+static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
+ const char **StandardNames) {
+#ifndef NDEBUG
+ // Verify that the StandardNames array is in alphabetical order.
+ for (unsigned F = 1; F < LibFunc::NumLibFuncs; ++F) {
+ if (strcmp(StandardNames[F-1], StandardNames[F]) >= 0)
+ llvm_unreachable("TargetLibraryInfoImpl function names must be sorted");
+ }
+#endif // !NDEBUG
+
+ // There are no library implementations of mempcy and memset for AMD gpus and
+ // these can be difficult to lower in the backend.
+ if (T.getArch() == Triple::r600 ||
+ T.getArch() == Triple::amdgcn) {
+ TLI.setUnavailable(LibFunc::memcpy);
+ TLI.setUnavailable(LibFunc::memset);
+ TLI.setUnavailable(LibFunc::memset_pattern16);
+ return;
+ }
+
+ // memset_pattern16 is only available on iOS 3.0 and Mac OS X 10.5 and later.
+ if (T.isMacOSX()) {
+ if (T.isMacOSXVersionLT(10, 5))
+ TLI.setUnavailable(LibFunc::memset_pattern16);
+ } else if (T.isiOS()) {
+ if (T.isOSVersionLT(3, 0))
+ TLI.setUnavailable(LibFunc::memset_pattern16);
+ } else {
+ TLI.setUnavailable(LibFunc::memset_pattern16);
+ }
+
+ if (!hasSinCosPiStret(T)) {
+ TLI.setUnavailable(LibFunc::sinpi);
+ TLI.setUnavailable(LibFunc::sinpif);
+ TLI.setUnavailable(LibFunc::cospi);
+ TLI.setUnavailable(LibFunc::cospif);
+ TLI.setUnavailable(LibFunc::sincospi_stret);
+ TLI.setUnavailable(LibFunc::sincospif_stret);
+ }
+
+ if (T.isMacOSX() && T.getArch() == Triple::x86 &&
+ !T.isMacOSXVersionLT(10, 7)) {
+ // x86-32 OSX has a scheme where fwrite and fputs (and some other functions
+ // we don't care about) have two versions; on recent OSX, the one we want
+ // has a $UNIX2003 suffix. The two implementations are identical except
+ // for the return value in some edge cases. However, we don't want to
+ // generate code that depends on the old symbols.
+ TLI.setAvailableWithName(LibFunc::fwrite, "fwrite$UNIX2003");
+ TLI.setAvailableWithName(LibFunc::fputs, "fputs$UNIX2003");
+ }
+
+ // iprintf and friends are only available on XCore and TCE.
+ if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) {
+ TLI.setUnavailable(LibFunc::iprintf);
+ TLI.setUnavailable(LibFunc::siprintf);
+ TLI.setUnavailable(LibFunc::fiprintf);
+ }
+
+ if (T.isOSWindows() && !T.isOSCygMing()) {
+ // Win32 does not support long double
+ TLI.setUnavailable(LibFunc::acosl);
+ TLI.setUnavailable(LibFunc::asinl);
+ TLI.setUnavailable(LibFunc::atanl);
+ TLI.setUnavailable(LibFunc::atan2l);
+ TLI.setUnavailable(LibFunc::ceill);
+ TLI.setUnavailable(LibFunc::copysignl);
+ TLI.setUnavailable(LibFunc::cosl);
+ TLI.setUnavailable(LibFunc::coshl);
+ TLI.setUnavailable(LibFunc::expl);
+ TLI.setUnavailable(LibFunc::fabsf); // Win32 and Win64 both lack fabsf
+ TLI.setUnavailable(LibFunc::fabsl);
+ TLI.setUnavailable(LibFunc::floorl);
+ TLI.setUnavailable(LibFunc::fmaxl);
+ TLI.setUnavailable(LibFunc::fminl);
+ TLI.setUnavailable(LibFunc::fmodl);
+ TLI.setUnavailable(LibFunc::frexpl);
+ TLI.setUnavailable(LibFunc::ldexpf);
+ TLI.setUnavailable(LibFunc::ldexpl);
+ TLI.setUnavailable(LibFunc::logl);
+ TLI.setUnavailable(LibFunc::modfl);
+ TLI.setUnavailable(LibFunc::powl);
+ TLI.setUnavailable(LibFunc::sinl);
+ TLI.setUnavailable(LibFunc::sinhl);
+ TLI.setUnavailable(LibFunc::sqrtl);
+ TLI.setUnavailable(LibFunc::tanl);
+ TLI.setUnavailable(LibFunc::tanhl);
+
+ // Win32 only has C89 math
+ TLI.setUnavailable(LibFunc::acosh);
+ TLI.setUnavailable(LibFunc::acoshf);
+ TLI.setUnavailable(LibFunc::acoshl);
+ TLI.setUnavailable(LibFunc::asinh);
+ TLI.setUnavailable(LibFunc::asinhf);
+ TLI.setUnavailable(LibFunc::asinhl);
+ TLI.setUnavailable(LibFunc::atanh);
+ TLI.setUnavailable(LibFunc::atanhf);
+ TLI.setUnavailable(LibFunc::atanhl);
+ TLI.setUnavailable(LibFunc::cbrt);
+ TLI.setUnavailable(LibFunc::cbrtf);
+ TLI.setUnavailable(LibFunc::cbrtl);
+ TLI.setUnavailable(LibFunc::exp2);
+ TLI.setUnavailable(LibFunc::exp2f);
+ TLI.setUnavailable(LibFunc::exp2l);
+ TLI.setUnavailable(LibFunc::expm1);
+ TLI.setUnavailable(LibFunc::expm1f);
+ TLI.setUnavailable(LibFunc::expm1l);
+ TLI.setUnavailable(LibFunc::log2);
+ TLI.setUnavailable(LibFunc::log2f);
+ TLI.setUnavailable(LibFunc::log2l);
+ TLI.setUnavailable(LibFunc::log1p);
+ TLI.setUnavailable(LibFunc::log1pf);
+ TLI.setUnavailable(LibFunc::log1pl);
+ TLI.setUnavailable(LibFunc::logb);
+ TLI.setUnavailable(LibFunc::logbf);
+ TLI.setUnavailable(LibFunc::logbl);
+ TLI.setUnavailable(LibFunc::nearbyint);
+ TLI.setUnavailable(LibFunc::nearbyintf);
+ TLI.setUnavailable(LibFunc::nearbyintl);
+ TLI.setUnavailable(LibFunc::rint);
+ TLI.setUnavailable(LibFunc::rintf);
+ TLI.setUnavailable(LibFunc::rintl);
+ TLI.setUnavailable(LibFunc::round);
+ TLI.setUnavailable(LibFunc::roundf);
+ TLI.setUnavailable(LibFunc::roundl);
+ TLI.setUnavailable(LibFunc::trunc);
+ TLI.setUnavailable(LibFunc::truncf);
+ TLI.setUnavailable(LibFunc::truncl);
+
+ // Win32 provides some C99 math with mangled names
+ TLI.setAvailableWithName(LibFunc::copysign, "_copysign");
+
+ if (T.getArch() == Triple::x86) {
+ // Win32 on x86 implements single-precision math functions as macros
+ TLI.setUnavailable(LibFunc::acosf);
+ TLI.setUnavailable(LibFunc::asinf);
+ TLI.setUnavailable(LibFunc::atanf);
+ TLI.setUnavailable(LibFunc::atan2f);
+ TLI.setUnavailable(LibFunc::ceilf);
+ TLI.setUnavailable(LibFunc::copysignf);
+ TLI.setUnavailable(LibFunc::cosf);
+ TLI.setUnavailable(LibFunc::coshf);
+ TLI.setUnavailable(LibFunc::expf);
+ TLI.setUnavailable(LibFunc::floorf);
+ TLI.setUnavailable(LibFunc::fminf);
+ TLI.setUnavailable(LibFunc::fmaxf);
+ TLI.setUnavailable(LibFunc::fmodf);
+ TLI.setUnavailable(LibFunc::logf);
+ TLI.setUnavailable(LibFunc::powf);
+ TLI.setUnavailable(LibFunc::sinf);
+ TLI.setUnavailable(LibFunc::sinhf);
+ TLI.setUnavailable(LibFunc::sqrtf);
+ TLI.setUnavailable(LibFunc::tanf);
+ TLI.setUnavailable(LibFunc::tanhf);
+ }
+
+ // Win32 does *not* provide provide these functions, but they are
+ // generally available on POSIX-compliant systems:
+ TLI.setUnavailable(LibFunc::access);
+ TLI.setUnavailable(LibFunc::bcmp);
+ TLI.setUnavailable(LibFunc::bcopy);
+ TLI.setUnavailable(LibFunc::bzero);
+ TLI.setUnavailable(LibFunc::chmod);
+ TLI.setUnavailable(LibFunc::chown);
+ TLI.setUnavailable(LibFunc::closedir);
+ TLI.setUnavailable(LibFunc::ctermid);
+ TLI.setUnavailable(LibFunc::fdopen);
+ TLI.setUnavailable(LibFunc::ffs);
+ TLI.setUnavailable(LibFunc::fileno);
+ TLI.setUnavailable(LibFunc::flockfile);
+ TLI.setUnavailable(LibFunc::fseeko);
+ TLI.setUnavailable(LibFunc::fstat);
+ TLI.setUnavailable(LibFunc::fstatvfs);
+ TLI.setUnavailable(LibFunc::ftello);
+ TLI.setUnavailable(LibFunc::ftrylockfile);
+ TLI.setUnavailable(LibFunc::funlockfile);
+ TLI.setUnavailable(LibFunc::getc_unlocked);
+ TLI.setUnavailable(LibFunc::getitimer);
+ TLI.setUnavailable(LibFunc::getlogin_r);
+ TLI.setUnavailable(LibFunc::getpwnam);
+ TLI.setUnavailable(LibFunc::gettimeofday);
+ TLI.setUnavailable(LibFunc::htonl);
+ TLI.setUnavailable(LibFunc::htons);
+ TLI.setUnavailable(LibFunc::lchown);
+ TLI.setUnavailable(LibFunc::lstat);
+ TLI.setUnavailable(LibFunc::memccpy);
+ TLI.setUnavailable(LibFunc::mkdir);
+ TLI.setUnavailable(LibFunc::ntohl);
+ TLI.setUnavailable(LibFunc::ntohs);
+ TLI.setUnavailable(LibFunc::open);
+ TLI.setUnavailable(LibFunc::opendir);
+ TLI.setUnavailable(LibFunc::pclose);
+ TLI.setUnavailable(LibFunc::popen);
+ TLI.setUnavailable(LibFunc::pread);
+ TLI.setUnavailable(LibFunc::pwrite);
+ TLI.setUnavailable(LibFunc::read);
+ TLI.setUnavailable(LibFunc::readlink);
+ TLI.setUnavailable(LibFunc::realpath);
+ TLI.setUnavailable(LibFunc::rmdir);
+ TLI.setUnavailable(LibFunc::setitimer);
+ TLI.setUnavailable(LibFunc::stat);
+ TLI.setUnavailable(LibFunc::statvfs);
+ TLI.setUnavailable(LibFunc::stpcpy);
+ TLI.setUnavailable(LibFunc::stpncpy);
+ TLI.setUnavailable(LibFunc::strcasecmp);
+ TLI.setUnavailable(LibFunc::strncasecmp);
+ TLI.setUnavailable(LibFunc::times);
+ TLI.setUnavailable(LibFunc::uname);
+ TLI.setUnavailable(LibFunc::unlink);
+ TLI.setUnavailable(LibFunc::unsetenv);
+ TLI.setUnavailable(LibFunc::utime);
+ TLI.setUnavailable(LibFunc::utimes);
+ TLI.setUnavailable(LibFunc::write);
+
+ // Win32 does *not* provide provide these functions, but they are
+ // specified by C99:
+ TLI.setUnavailable(LibFunc::atoll);
+ TLI.setUnavailable(LibFunc::frexpf);
+ TLI.setUnavailable(LibFunc::llabs);
+ }
+
+ switch (T.getOS()) {
+ case Triple::MacOSX:
+ // exp10 and exp10f are not available on OS X until 10.9 and iOS until 7.0
+ // and their names are __exp10 and __exp10f. exp10l is not available on
+ // OS X or iOS.
+ TLI.setUnavailable(LibFunc::exp10l);
+ if (T.isMacOSXVersionLT(10, 9)) {
+ TLI.setUnavailable(LibFunc::exp10);
+ TLI.setUnavailable(LibFunc::exp10f);
+ } else {
+ TLI.setAvailableWithName(LibFunc::exp10, "__exp10");
+ TLI.setAvailableWithName(LibFunc::exp10f, "__exp10f");
+ }
+ break;
+ case Triple::IOS:
+ TLI.setUnavailable(LibFunc::exp10l);
+ if (T.isOSVersionLT(7, 0)) {
+ TLI.setUnavailable(LibFunc::exp10);
+ TLI.setUnavailable(LibFunc::exp10f);
+ } else {
+ TLI.setAvailableWithName(LibFunc::exp10, "__exp10");
+ TLI.setAvailableWithName(LibFunc::exp10f, "__exp10f");
+ }
+ break;
+ case Triple::Linux:
+ // exp10, exp10f, exp10l is available on Linux (GLIBC) but are extremely
+ // buggy prior to glibc version 2.18. Until this version is widely deployed
+ // or we have a reasonable detection strategy, we cannot use exp10 reliably
+ // on Linux.
+ //
+ // Fall through to disable all of them.
+ default:
+ TLI.setUnavailable(LibFunc::exp10);
+ TLI.setUnavailable(LibFunc::exp10f);
+ TLI.setUnavailable(LibFunc::exp10l);
+ }
+
+ // ffsl is available on at least Darwin, Mac OS X, iOS, FreeBSD, and
+ // Linux (GLIBC):
+ // http://developer.apple.com/library/mac/#documentation/Darwin/Reference/ManPages/man3/ffsl.3.html
+ // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsl.c
+ // http://www.gnu.org/software/gnulib/manual/html_node/ffsl.html
+ switch (T.getOS()) {
+ case Triple::Darwin:
+ case Triple::MacOSX:
+ case Triple::IOS:
+ case Triple::FreeBSD:
+ case Triple::Linux:
+ break;
+ default:
+ TLI.setUnavailable(LibFunc::ffsl);
+ }
+
+ // ffsll is available on at least FreeBSD and Linux (GLIBC):
+ // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsll.c
+ // http://www.gnu.org/software/gnulib/manual/html_node/ffsll.html
+ switch (T.getOS()) {
+ case Triple::FreeBSD:
+ case Triple::Linux:
+ break;
+ default:
+ TLI.setUnavailable(LibFunc::ffsll);
+ }
+
+ // The following functions are available on at least Linux:
+ if (!T.isOSLinux()) {
+ TLI.setUnavailable(LibFunc::dunder_strdup);
+ TLI.setUnavailable(LibFunc::dunder_strtok_r);
+ TLI.setUnavailable(LibFunc::dunder_isoc99_scanf);
+ TLI.setUnavailable(LibFunc::dunder_isoc99_sscanf);
+ TLI.setUnavailable(LibFunc::under_IO_getc);
+ TLI.setUnavailable(LibFunc::under_IO_putc);
+ TLI.setUnavailable(LibFunc::memalign);
+ TLI.setUnavailable(LibFunc::fopen64);
+ TLI.setUnavailable(LibFunc::fseeko64);
+ TLI.setUnavailable(LibFunc::fstat64);
+ TLI.setUnavailable(LibFunc::fstatvfs64);
+ TLI.setUnavailable(LibFunc::ftello64);
+ TLI.setUnavailable(LibFunc::lstat64);
+ TLI.setUnavailable(LibFunc::open64);
+ TLI.setUnavailable(LibFunc::stat64);
+ TLI.setUnavailable(LibFunc::statvfs64);
+ TLI.setUnavailable(LibFunc::tmpfile64);
+ }
+}
+
+TargetLibraryInfoImpl::TargetLibraryInfoImpl() {
+ // Default to everything being available.
+ memset(AvailableArray, -1, sizeof(AvailableArray));
+
+ initialize(*this, Triple(), StandardNames);
+}
+
+TargetLibraryInfoImpl::TargetLibraryInfoImpl(const Triple &T) {
+ // Default to everything being available.
+ memset(AvailableArray, -1, sizeof(AvailableArray));
+
+ initialize(*this, T, StandardNames);
+}
+
+TargetLibraryInfoImpl::TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI)
+ : CustomNames(TLI.CustomNames) {
+ memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
+}
+
+TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI)
+ : CustomNames(std::move(TLI.CustomNames)) {
+ std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray),
+ AvailableArray);
+}
+
+TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(const TargetLibraryInfoImpl &TLI) {
+ CustomNames = TLI.CustomNames;
+ memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
+ return *this;
+}
+
+TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(TargetLibraryInfoImpl &&TLI) {
+ CustomNames = std::move(TLI.CustomNames);
+ std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray),
+ AvailableArray);
+ return *this;
+}
+
+namespace {
+struct StringComparator {
+ /// Compare two strings and return true if LHS is lexicographically less than
+ /// RHS. Requires that RHS doesn't contain any zero bytes.
+ bool operator()(const char *LHS, StringRef RHS) const {
+ // Compare prefixes with strncmp. If prefixes match we know that LHS is
+ // greater or equal to RHS as RHS can't contain any '\0'.
+ return std::strncmp(LHS, RHS.data(), RHS.size()) < 0;
+ }
+
+ // Provided for compatibility with MSVC's debug mode.
+ bool operator()(StringRef LHS, const char *RHS) const { return LHS < RHS; }
+ bool operator()(StringRef LHS, StringRef RHS) const { return LHS < RHS; }
+ bool operator()(const char *LHS, const char *RHS) const {
+ return std::strcmp(LHS, RHS) < 0;
+ }
+};
+}
+
+bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName,
+ LibFunc::Func &F) const {
+ const char **Start = &StandardNames[0];
+ const char **End = &StandardNames[LibFunc::NumLibFuncs];
+
+ // Filter out empty names and names containing null bytes, those can't be in
+ // our table.
+ if (funcName.empty() || funcName.find('\0') != StringRef::npos)
+ return false;
+
+ // Check for \01 prefix that is used to mangle __asm declarations and
+ // strip it if present.
+ if (funcName.front() == '\01')
+ funcName = funcName.substr(1);
+ const char **I = std::lower_bound(Start, End, funcName, StringComparator());
+ if (I != End && *I == funcName) {
+ F = (LibFunc::Func)(I - Start);
+ return true;
+ }
+ return false;
+}
+
+void TargetLibraryInfoImpl::disableAllFunctions() {
+ memset(AvailableArray, 0, sizeof(AvailableArray));
+}
+
+TargetLibraryInfo TargetLibraryAnalysis::run(Module &M) {
+ if (PresetInfoImpl)
+ return TargetLibraryInfo(*PresetInfoImpl);
+
+ return TargetLibraryInfo(lookupInfoImpl(Triple(M.getTargetTriple())));
+}
+
+TargetLibraryInfo TargetLibraryAnalysis::run(Function &F) {
+ if (PresetInfoImpl)
+ return TargetLibraryInfo(*PresetInfoImpl);
+
+ return TargetLibraryInfo(
+ lookupInfoImpl(Triple(F.getParent()->getTargetTriple())));
+}
+
+TargetLibraryInfoImpl &TargetLibraryAnalysis::lookupInfoImpl(Triple T) {
+ std::unique_ptr<TargetLibraryInfoImpl> &Impl =
+ Impls[T.normalize()];
+ if (!Impl)
+ Impl.reset(new TargetLibraryInfoImpl(T));
+
+ return *Impl;
+}
+
+
+TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass()
+ : ImmutablePass(ID), TLIImpl(), TLI(TLIImpl) {
+ initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass(const Triple &T)
+ : ImmutablePass(ID), TLIImpl(T), TLI(TLIImpl) {
+ initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass(
+ const TargetLibraryInfoImpl &TLIImpl)
+ : ImmutablePass(ID), TLIImpl(TLIImpl), TLI(this->TLIImpl) {
+ initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+char TargetLibraryAnalysis::PassID;
+
+// Register the basic pass.
+INITIALIZE_PASS(TargetLibraryInfoWrapperPass, "targetlibinfo",
+ "Target Library Information", false, true)
+char TargetLibraryInfoWrapperPass::ID = 0;
+
+void TargetLibraryInfoWrapperPass::anchor() {}
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index c1ffb9d..7ff29b0 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -8,11 +8,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfoImpl.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/ErrorHandling.h"
@@ -20,623 +22,290 @@ using namespace llvm;
#define DEBUG_TYPE "tti"
-// Setup the analysis group to manage the TargetTransformInfo passes.
-INITIALIZE_ANALYSIS_GROUP(TargetTransformInfo, "Target Information", NoTTI)
-char TargetTransformInfo::ID = 0;
-
-TargetTransformInfo::~TargetTransformInfo() {
+namespace {
+/// \brief No-op implementation of the TTI interface using the utility base
+/// classes.
+///
+/// This is used when no target specific information is available.
+struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
+ explicit NoTTIImpl(const DataLayout *DL)
+ : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
+};
}
-void TargetTransformInfo::pushTTIStack(Pass *P) {
- TopTTI = this;
- PrevTTI = &P->getAnalysis<TargetTransformInfo>();
+TargetTransformInfo::TargetTransformInfo(const DataLayout *DL)
+ : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
- // Walk up the chain and update the top TTI pointer.
- for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI)
- PTTI->TopTTI = this;
-}
+TargetTransformInfo::~TargetTransformInfo() {}
+
+TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg)
+ : TTIImpl(std::move(Arg.TTIImpl)) {}
-void TargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetTransformInfo>();
+TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) {
+ TTIImpl = std::move(RHS.TTIImpl);
+ return *this;
}
unsigned TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty,
Type *OpTy) const {
- return PrevTTI->getOperationCost(Opcode, Ty, OpTy);
-}
-
-unsigned TargetTransformInfo::getGEPCost(
- const Value *Ptr, ArrayRef<const Value *> Operands) const {
- return PrevTTI->getGEPCost(Ptr, Operands);
+ return TTIImpl->getOperationCost(Opcode, Ty, OpTy);
}
unsigned TargetTransformInfo::getCallCost(FunctionType *FTy,
int NumArgs) const {
- return PrevTTI->getCallCost(FTy, NumArgs);
-}
-
-unsigned TargetTransformInfo::getCallCost(const Function *F,
- int NumArgs) const {
- return PrevTTI->getCallCost(F, NumArgs);
-}
-
-unsigned TargetTransformInfo::getCallCost(
- const Function *F, ArrayRef<const Value *> Arguments) const {
- return PrevTTI->getCallCost(F, Arguments);
+ return TTIImpl->getCallCost(FTy, NumArgs);
}
-unsigned TargetTransformInfo::getIntrinsicCost(
- Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> ParamTys) const {
- return PrevTTI->getIntrinsicCost(IID, RetTy, ParamTys);
+unsigned
+TargetTransformInfo::getCallCost(const Function *F,
+ ArrayRef<const Value *> Arguments) const {
+ return TTIImpl->getCallCost(F, Arguments);
}
-unsigned TargetTransformInfo::getIntrinsicCost(
- Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const {
- return PrevTTI->getIntrinsicCost(IID, RetTy, Arguments);
+unsigned
+TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<const Value *> Arguments) const {
+ return TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
}
unsigned TargetTransformInfo::getUserCost(const User *U) const {
- return PrevTTI->getUserCost(U);
+ return TTIImpl->getUserCost(U);
}
bool TargetTransformInfo::hasBranchDivergence() const {
- return PrevTTI->hasBranchDivergence();
+ return TTIImpl->hasBranchDivergence();
}
bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
- return PrevTTI->isLoweredToCall(F);
+ return TTIImpl->isLoweredToCall(F);
}
-void
-TargetTransformInfo::getUnrollingPreferences(const Function *F, Loop *L,
- UnrollingPreferences &UP) const {
- PrevTTI->getUnrollingPreferences(F, L, UP);
+void TargetTransformInfo::getUnrollingPreferences(
+ Loop *L, UnrollingPreferences &UP) const {
+ return TTIImpl->getUnrollingPreferences(L, UP);
}
bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
- return PrevTTI->isLegalAddImmediate(Imm);
+ return TTIImpl->isLegalAddImmediate(Imm);
}
bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
- return PrevTTI->isLegalICmpImmediate(Imm);
+ return TTIImpl->isLegalICmpImmediate(Imm);
}
bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset,
bool HasBaseReg,
int64_t Scale) const {
- return PrevTTI->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
+ return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
Scale);
}
+bool TargetTransformInfo::isLegalMaskedStore(Type *DataType,
+ int Consecutive) const {
+ return TTIImpl->isLegalMaskedStore(DataType, Consecutive);
+}
+
+bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,
+ int Consecutive) const {
+ return TTIImpl->isLegalMaskedLoad(DataType, Consecutive);
+}
+
int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset,
bool HasBaseReg,
int64_t Scale) const {
- return PrevTTI->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
+ return TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
Scale);
}
bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
- return PrevTTI->isTruncateFree(Ty1, Ty2);
+ return TTIImpl->isTruncateFree(Ty1, Ty2);
+}
+
+bool TargetTransformInfo::isProfitableToHoist(Instruction *I) const {
+ return TTIImpl->isProfitableToHoist(I);
}
bool TargetTransformInfo::isTypeLegal(Type *Ty) const {
- return PrevTTI->isTypeLegal(Ty);
+ return TTIImpl->isTypeLegal(Ty);
}
unsigned TargetTransformInfo::getJumpBufAlignment() const {
- return PrevTTI->getJumpBufAlignment();
+ return TTIImpl->getJumpBufAlignment();
}
unsigned TargetTransformInfo::getJumpBufSize() const {
- return PrevTTI->getJumpBufSize();
+ return TTIImpl->getJumpBufSize();
}
bool TargetTransformInfo::shouldBuildLookupTables() const {
- return PrevTTI->shouldBuildLookupTables();
+ return TTIImpl->shouldBuildLookupTables();
}
TargetTransformInfo::PopcntSupportKind
TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
- return PrevTTI->getPopcntSupport(IntTyWidthInBit);
+ return TTIImpl->getPopcntSupport(IntTyWidthInBit);
}
bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
- return PrevTTI->haveFastSqrt(Ty);
+ return TTIImpl->haveFastSqrt(Ty);
+}
+
+unsigned TargetTransformInfo::getFPOpCost(Type *Ty) const {
+ return TTIImpl->getFPOpCost(Ty);
}
unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
- return PrevTTI->getIntImmCost(Imm, Ty);
+ return TTIImpl->getIntImmCost(Imm, Ty);
}
-unsigned TargetTransformInfo::getIntImmCost(unsigned Opc, unsigned Idx,
+unsigned TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty) const {
- return PrevTTI->getIntImmCost(Opc, Idx, Imm, Ty);
+ return TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
}
unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty) const {
- return PrevTTI->getIntImmCost(IID, Idx, Imm, Ty);
+ return TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
}
unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
- return PrevTTI->getNumberOfRegisters(Vector);
+ return TTIImpl->getNumberOfRegisters(Vector);
}
unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
- return PrevTTI->getRegisterBitWidth(Vector);
+ return TTIImpl->getRegisterBitWidth(Vector);
}
unsigned TargetTransformInfo::getMaxInterleaveFactor() const {
- return PrevTTI->getMaxInterleaveFactor();
+ return TTIImpl->getMaxInterleaveFactor();
}
unsigned TargetTransformInfo::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
- OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
+ unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
+ OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
OperandValueProperties Opd2PropInfo) const {
- return PrevTTI->getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ return TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
Opd1PropInfo, Opd2PropInfo);
}
-unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp,
+unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty,
int Index, Type *SubTp) const {
- return PrevTTI->getShuffleCost(Kind, Tp, Index, SubTp);
+ return TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
}
unsigned TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
Type *Src) const {
- return PrevTTI->getCastInstrCost(Opcode, Dst, Src);
+ return TTIImpl->getCastInstrCost(Opcode, Dst, Src);
}
unsigned TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
- return PrevTTI->getCFInstrCost(Opcode);
+ return TTIImpl->getCFInstrCost(Opcode);
}
unsigned TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy) const {
- return PrevTTI->getCmpSelInstrCost(Opcode, ValTy, CondTy);
+ return TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
unsigned TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) const {
- return PrevTTI->getVectorInstrCost(Opcode, Val, Index);
+ return TTIImpl->getVectorInstrCost(Opcode, Val, Index);
}
unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
unsigned AddressSpace) const {
- return PrevTTI->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
- ;
+ return TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
}
unsigned
-TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID,
- Type *RetTy,
+TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const {
+ return TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+}
+
+unsigned
+TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys) const {
- return PrevTTI->getIntrinsicInstrCost(ID, RetTy, Tys);
+ return TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys);
}
unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
- return PrevTTI->getNumberOfParts(Tp);
+ return TTIImpl->getNumberOfParts(Tp);
}
unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp,
bool IsComplex) const {
- return PrevTTI->getAddressComputationCost(Tp, IsComplex);
+ return TTIImpl->getAddressComputationCost(Tp, IsComplex);
}
unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty,
- bool IsPairwise) const {
- return PrevTTI->getReductionCost(Opcode, Ty, IsPairwise);
+ bool IsPairwiseForm) const {
+ return TTIImpl->getReductionCost(Opcode, Ty, IsPairwiseForm);
}
-unsigned TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys)
- const {
- return PrevTTI->getCostOfKeepingLiveOverCall(Tys);
+unsigned
+TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
+ return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
}
-namespace {
+bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst,
+ MemIntrinsicInfo &Info) const {
+ return TTIImpl->getTgtMemIntrinsic(Inst, Info);
+}
-struct NoTTI final : ImmutablePass, TargetTransformInfo {
- const DataLayout *DL;
-
- NoTTI() : ImmutablePass(ID), DL(nullptr) {
- initializeNoTTIPass(*PassRegistry::getPassRegistry());
- }
-
- void initializePass() override {
- // Note that this subclass is special, and must *not* call initializeTTI as
- // it does not chain.
- TopTTI = this;
- PrevTTI = nullptr;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- // Note that this subclass is special, and must *not* call
- // TTI::getAnalysisUsage as it breaks the recursion.
- }
-
- /// Pass identification.
- static char ID;
-
- /// Provide necessary pointer adjustments for the two base classes.
- void *getAdjustedAnalysisPointer(const void *ID) override {
- if (ID == &TargetTransformInfo::ID)
- return (TargetTransformInfo*)this;
- return this;
- }
-
- unsigned getOperationCost(unsigned Opcode, Type *Ty,
- Type *OpTy) const override {
- switch (Opcode) {
- default:
- // By default, just classify everything as 'basic'.
- return TCC_Basic;
-
- case Instruction::GetElementPtr:
- llvm_unreachable("Use getGEPCost for GEP operations!");
-
- case Instruction::BitCast:
- assert(OpTy && "Cast instructions must provide the operand type");
- if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy()))
- // Identity and pointer-to-pointer casts are free.
- return TCC_Free;
-
- // Otherwise, the default basic cost is used.
- return TCC_Basic;
-
- case Instruction::IntToPtr: {
- if (!DL)
- return TCC_Basic;
-
- // An inttoptr cast is free so long as the input is a legal integer type
- // which doesn't contain values outside the range of a pointer.
- unsigned OpSize = OpTy->getScalarSizeInBits();
- if (DL->isLegalInteger(OpSize) &&
- OpSize <= DL->getPointerTypeSizeInBits(Ty))
- return TCC_Free;
-
- // Otherwise it's not a no-op.
- return TCC_Basic;
- }
- case Instruction::PtrToInt: {
- if (!DL)
- return TCC_Basic;
-
- // A ptrtoint cast is free so long as the result is large enough to store
- // the pointer, and a legal integer type.
- unsigned DestSize = Ty->getScalarSizeInBits();
- if (DL->isLegalInteger(DestSize) &&
- DestSize >= DL->getPointerTypeSizeInBits(OpTy))
- return TCC_Free;
-
- // Otherwise it's not a no-op.
- return TCC_Basic;
- }
- case Instruction::Trunc:
- // trunc to a native type is free (assuming the target has compare and
- // shift-right of the same width).
- if (DL && DL->isLegalInteger(DL->getTypeSizeInBits(Ty)))
- return TCC_Free;
-
- return TCC_Basic;
- }
- }
-
- unsigned getGEPCost(const Value *Ptr,
- ArrayRef<const Value *> Operands) const override {
- // In the basic model, we just assume that all-constant GEPs will be folded
- // into their uses via addressing modes.
- for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
- if (!isa<Constant>(Operands[Idx]))
- return TCC_Basic;
-
- return TCC_Free;
- }
-
- unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const override
- {
- assert(FTy && "FunctionType must be provided to this routine.");
-
- // The target-independent implementation just measures the size of the
- // function by approximating that each argument will take on average one
- // instruction to prepare.
-
- if (NumArgs < 0)
- // Set the argument number to the number of explicit arguments in the
- // function.
- NumArgs = FTy->getNumParams();
-
- return TCC_Basic * (NumArgs + 1);
- }
-
- unsigned getCallCost(const Function *F, int NumArgs = -1) const override
- {
- assert(F && "A concrete function must be provided to this routine.");
-
- if (NumArgs < 0)
- // Set the argument number to the number of explicit arguments in the
- // function.
- NumArgs = F->arg_size();
-
- if (Intrinsic::ID IID = (Intrinsic::ID)F->getIntrinsicID()) {
- FunctionType *FTy = F->getFunctionType();
- SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
- return TopTTI->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys);
- }
-
- if (!TopTTI->isLoweredToCall(F))
- return TCC_Basic; // Give a basic cost if it will be lowered directly.
-
- return TopTTI->getCallCost(F->getFunctionType(), NumArgs);
- }
-
- unsigned getCallCost(const Function *F,
- ArrayRef<const Value *> Arguments) const override {
- // Simply delegate to generic handling of the call.
- // FIXME: We should use instsimplify or something else to catch calls which
- // will constant fold with these arguments.
- return TopTTI->getCallCost(F, Arguments.size());
- }
-
- unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> ParamTys) const override {
- switch (IID) {
- default:
- // Intrinsics rarely (if ever) have normal argument setup constraints.
- // Model them as having a basic instruction cost.
- // FIXME: This is wrong for libc intrinsics.
- return TCC_Basic;
-
- case Intrinsic::annotation:
- case Intrinsic::assume:
- case Intrinsic::dbg_declare:
- case Intrinsic::dbg_value:
- case Intrinsic::invariant_start:
- case Intrinsic::invariant_end:
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- case Intrinsic::objectsize:
- case Intrinsic::ptr_annotation:
- case Intrinsic::var_annotation:
- // These intrinsics don't actually represent code after lowering.
- return TCC_Free;
- }
- }
-
- unsigned
- getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments) const override {
- // Delegate to the generic intrinsic handling code. This mostly provides an
- // opportunity for targets to (for example) special case the cost of
- // certain intrinsics based on constants used as arguments.
- SmallVector<Type *, 8> ParamTys;
- ParamTys.reserve(Arguments.size());
- for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
- ParamTys.push_back(Arguments[Idx]->getType());
- return TopTTI->getIntrinsicCost(IID, RetTy, ParamTys);
- }
-
- unsigned getUserCost(const User *U) const override {
- if (isa<PHINode>(U))
- return TCC_Free; // Model all PHI nodes as free.
-
- if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
- SmallVector<const Value *, 4> Indices(GEP->idx_begin(), GEP->idx_end());
- return TopTTI->getGEPCost(GEP->getPointerOperand(), Indices);
- }
-
- if (ImmutableCallSite CS = U) {
- const Function *F = CS.getCalledFunction();
- if (!F) {
- // Just use the called value type.
- Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
- return TopTTI->getCallCost(cast<FunctionType>(FTy), CS.arg_size());
- }
-
- SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end());
- return TopTTI->getCallCost(F, Arguments);
- }
-
- if (const CastInst *CI = dyn_cast<CastInst>(U)) {
- // Result of a cmp instruction is often extended (to be used by other
- // cmp instructions, logical or return instructions). These are usually
- // nop on most sane targets.
- if (isa<CmpInst>(CI->getOperand(0)))
- return TCC_Free;
- }
-
- // Otherwise delegate to the fully generic implementations.
- return getOperationCost(Operator::getOpcode(U), U->getType(),
- U->getNumOperands() == 1 ?
- U->getOperand(0)->getType() : nullptr);
- }
-
- bool hasBranchDivergence() const override { return false; }
-
- bool isLoweredToCall(const Function *F) const override {
- // FIXME: These should almost certainly not be handled here, and instead
- // handled with the help of TLI or the target itself. This was largely
- // ported from existing analysis heuristics here so that such refactorings
- // can take place in the future.
-
- if (F->isIntrinsic())
- return false;
-
- if (F->hasLocalLinkage() || !F->hasName())
- return true;
-
- StringRef Name = F->getName();
-
- // These will all likely lower to a single selection DAG node.
- if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
- Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
- Name == "fmin" || Name == "fminf" || Name == "fminl" ||
- Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
- Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
- Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
- return false;
-
- // These are all likely to be optimized into something smaller.
- if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
- Name == "exp2l" || Name == "exp2f" || Name == "floor" || Name ==
- "floorf" || Name == "ceil" || Name == "round" || Name == "ffs" ||
- Name == "ffsl" || Name == "abs" || Name == "labs" || Name == "llabs")
- return false;
-
- return true;
- }
-
- void getUnrollingPreferences(const Function *, Loop *,
- UnrollingPreferences &) const override {}
-
- bool isLegalAddImmediate(int64_t Imm) const override {
- return false;
- }
-
- bool isLegalICmpImmediate(int64_t Imm) const override {
- return false;
- }
-
- bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale) const override
- {
- // Guess that reg+reg addressing is allowed. This heuristic is taken from
- // the implementation of LSR.
- return !BaseGV && BaseOffset == 0 && Scale <= 1;
- }
-
- int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale) const override {
- // Guess that all legal addressing mode are free.
- if(isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale))
- return 0;
- return -1;
- }
-
- bool isTruncateFree(Type *Ty1, Type *Ty2) const override {
- return false;
- }
-
- bool isTypeLegal(Type *Ty) const override {
- return false;
- }
-
- unsigned getJumpBufAlignment() const override {
- return 0;
- }
-
- unsigned getJumpBufSize() const override {
- return 0;
- }
-
- bool shouldBuildLookupTables() const override {
- return true;
- }
-
- PopcntSupportKind
- getPopcntSupport(unsigned IntTyWidthInBit) const override {
- return PSK_Software;
- }
-
- bool haveFastSqrt(Type *Ty) const override {
- return false;
- }
-
- unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override {
- return TCC_Basic;
- }
-
- unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty) const override {
- return TCC_Free;
- }
-
- unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty) const override {
- return TCC_Free;
- }
-
- unsigned getNumberOfRegisters(bool Vector) const override {
- return 8;
- }
-
- unsigned getRegisterBitWidth(bool Vector) const override {
- return 32;
- }
-
- unsigned getMaxInterleaveFactor() const override {
- return 1;
- }
-
- unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
- OperandValueKind, OperandValueProperties,
- OperandValueProperties) const override {
- return 1;
- }
-
- unsigned getShuffleCost(ShuffleKind Kind, Type *Ty,
- int Index = 0, Type *SubTp = nullptr) const override {
- return 1;
- }
-
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const override {
- return 1;
- }
-
- unsigned getCFInstrCost(unsigned Opcode) const override {
- return 1;
- }
-
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy = nullptr) const override {
- return 1;
- }
-
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index = -1) const override {
- return 1;
- }
-
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) const override {
- return 1;
- }
-
- unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type*> Tys) const override {
- return 1;
- }
-
- unsigned getNumberOfParts(Type *Tp) const override {
- return 0;
- }
-
- unsigned getAddressComputationCost(Type *Tp, bool) const override {
- return 0;
- }
-
- unsigned getReductionCost(unsigned, Type *, bool) const override {
- return 1;
- }
-
- unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override {
- return 0;
- }
+Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
+ IntrinsicInst *Inst, Type *ExpectedType) const {
+ return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
+}
-};
+TargetTransformInfo::Concept::~Concept() {}
+
+TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
-} // end anonymous namespace
+TargetIRAnalysis::TargetIRAnalysis(
+ std::function<Result(Function &)> TTICallback)
+ : TTICallback(TTICallback) {}
+
+TargetIRAnalysis::Result TargetIRAnalysis::run(Function &F) {
+ return TTICallback(F);
+}
-INITIALIZE_AG_PASS(NoTTI, TargetTransformInfo, "notti",
- "No target information", true, true, true)
-char NoTTI::ID = 0;
+char TargetIRAnalysis::PassID;
+
+TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(Function &F) {
+ return Result(F.getParent()->getDataLayout());
+}
+
+// Register the basic pass.
+INITIALIZE_PASS(TargetTransformInfoWrapperPass, "tti",
+ "Target Transform Information", false, true)
+char TargetTransformInfoWrapperPass::ID = 0;
+
+void TargetTransformInfoWrapperPass::anchor() {}
+
+TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass()
+ : ImmutablePass(ID) {
+ initializeTargetTransformInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass(
+ TargetIRAnalysis TIRA)
+ : ImmutablePass(ID), TIRA(std::move(TIRA)) {
+ initializeTargetTransformInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(Function &F) {
+ TTI = TIRA.run(F);
+ return *TTI;
+}
-ImmutablePass *llvm::createNoTargetTransformInfoPass() {
- return new NoTTI();
+ImmutablePass *
+llvm::createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA) {
+ return new TargetTransformInfoWrapperPass(std::move(TIRA));
}
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index f347eb5..ff89558 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -167,7 +167,7 @@ namespace {
bool TypeIsImmutable() const {
if (Node->getNumOperands() < 3)
return false;
- ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2));
+ ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2));
if (!CI)
return false;
return CI->getValue()[0];
@@ -194,7 +194,7 @@ namespace {
return dyn_cast_or_null<MDNode>(Node->getOperand(1));
}
uint64_t getOffset() const {
- return cast<ConstantInt>(Node->getOperand(2))->getZExtValue();
+ return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
}
/// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for
/// objects which are not modified (by any means) in the context where this
@@ -202,7 +202,7 @@ namespace {
bool TypeIsImmutable() const {
if (Node->getNumOperands() < 4)
return false;
- ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(3));
+ ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3));
if (!CI)
return false;
return CI->getValue()[0];
@@ -233,8 +233,10 @@ namespace {
// Fast path for a scalar type node and a struct type node with a single
// field.
if (Node->getNumOperands() <= 3) {
- uint64_t Cur = Node->getNumOperands() == 2 ? 0 :
- cast<ConstantInt>(Node->getOperand(2))->getZExtValue();
+ uint64_t Cur = Node->getNumOperands() == 2
+ ? 0
+ : mdconst::extract<ConstantInt>(Node->getOperand(2))
+ ->getZExtValue();
Offset -= Cur;
MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
if (!P)
@@ -246,8 +248,8 @@ namespace {
// the current offset is bigger than the given offset.
unsigned TheIdx = 0;
for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
- uint64_t Cur = cast<ConstantInt>(Node->getOperand(Idx + 1))->
- getZExtValue();
+ uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1))
+ ->getZExtValue();
if (Cur > Offset) {
assert(Idx >= 3 &&
"TBAAStructTypeNode::getParent should have an offset match!");
@@ -258,8 +260,8 @@ namespace {
// Move along the last field.
if (TheIdx == 0)
TheIdx = Node->getNumOperands() - 2;
- uint64_t Cur = cast<ConstantInt>(Node->getOperand(TheIdx + 1))->
- getZExtValue();
+ uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1))
+ ->getZExtValue();
Offset -= Cur;
MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
if (!P)
@@ -608,7 +610,8 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
return nullptr;
// We need to convert from a type node to a tag node.
Type *Int64 = IntegerType::get(A->getContext(), 64);
- Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) };
+ Metadata *Ops[3] = {Ret, Ret,
+ ConstantAsMetadata::get(ConstantInt::get(Int64, 0))};
return MDNode::get(A->getContext(), Ops);
}
@@ -620,8 +623,8 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
N.TBAA = getMetadata(LLVMContext::MD_tbaa);
if (Merge)
- N.Scope =
- MDNode::intersect(N.Scope, getMetadata(LLVMContext::MD_alias_scope));
+ N.Scope = MDNode::getMostGenericAliasScope(
+ N.Scope, getMetadata(LLVMContext::MD_alias_scope));
else
N.Scope = getMetadata(LLVMContext::MD_alias_scope);
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index e9bbf83..0458d28 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/IR/CallSite.h"
@@ -65,16 +65,16 @@ namespace {
// figuring out if we can use it.
struct Query {
ExclInvsSet ExclInvs;
- AssumptionTracker *AT;
+ AssumptionCache *AC;
const Instruction *CxtI;
const DominatorTree *DT;
- Query(AssumptionTracker *AT = nullptr, const Instruction *CxtI = nullptr,
+ Query(AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr,
const DominatorTree *DT = nullptr)
- : AT(AT), CxtI(CxtI), DT(DT) {}
+ : AC(AC), CxtI(CxtI), DT(DT) {}
Query(const Query &Q, const Value *NewExcl)
- : ExclInvs(Q.ExclInvs), AT(Q.AT), CxtI(Q.CxtI), DT(Q.DT) {
+ : ExclInvs(Q.ExclInvs), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT) {
ExclInvs.insert(NewExcl);
}
};
@@ -102,10 +102,10 @@ static void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
const DataLayout *TD, unsigned Depth,
- AssumptionTracker *AT, const Instruction *CxtI,
+ AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT) {
::computeKnownBits(V, KnownZero, KnownOne, TD, Depth,
- Query(AT, safeCxtI(V, CxtI), DT));
+ Query(AC, safeCxtI(V, CxtI), DT));
}
static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
@@ -114,52 +114,50 @@ static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
const DataLayout *TD, unsigned Depth,
- AssumptionTracker *AT, const Instruction *CxtI,
+ AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT) {
::ComputeSignBit(V, KnownZero, KnownOne, TD, Depth,
- Query(AT, safeCxtI(V, CxtI), DT));
+ Query(AC, safeCxtI(V, CxtI), DT));
}
static bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
const Query &Q);
bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
- AssumptionTracker *AT,
- const Instruction *CxtI,
+ AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT) {
return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth,
- Query(AT, safeCxtI(V, CxtI), DT));
+ Query(AC, safeCxtI(V, CxtI), DT));
}
static bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
const Query &Q);
bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
- AssumptionTracker *AT, const Instruction *CxtI,
+ AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT) {
- return ::isKnownNonZero(V, TD, Depth, Query(AT, safeCxtI(V, CxtI), DT));
+ return ::isKnownNonZero(V, TD, Depth, Query(AC, safeCxtI(V, CxtI), DT));
}
static bool MaskedValueIsZero(Value *V, const APInt &Mask,
const DataLayout *TD, unsigned Depth,
const Query &Q);
-bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
- const DataLayout *TD, unsigned Depth,
- AssumptionTracker *AT, const Instruction *CxtI,
- const DominatorTree *DT) {
+bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout *TD,
+ unsigned Depth, AssumptionCache *AC,
+ const Instruction *CxtI, const DominatorTree *DT) {
return ::MaskedValueIsZero(V, Mask, TD, Depth,
- Query(AT, safeCxtI(V, CxtI), DT));
+ Query(AC, safeCxtI(V, CxtI), DT));
}
static unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
unsigned Depth, const Query &Q);
unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
- unsigned Depth, AssumptionTracker *AT,
+ unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT) {
- return ::ComputeNumSignBits(V, TD, Depth, Query(AT, safeCxtI(V, CxtI), DT));
+ return ::ComputeNumSignBits(V, TD, Depth, Query(AC, safeCxtI(V, CxtI), DT));
}
static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
@@ -312,8 +310,10 @@ void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
// Use the high end of the ranges to find leading zeros.
unsigned MinLeadingZeros = BitWidth;
for (unsigned i = 0; i < NumRanges; ++i) {
- ConstantInt *Lower = cast<ConstantInt>(Ranges.getOperand(2*i + 0));
- ConstantInt *Upper = cast<ConstantInt>(Ranges.getOperand(2*i + 1));
+ ConstantInt *Lower =
+ mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
+ ConstantInt *Upper =
+ mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
ConstantRange Range(Lower->getValue(), Upper->getValue());
if (Range.isWrappedSet())
MinLeadingZeros = 0; // -1 has no zeros
@@ -480,18 +480,31 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
unsigned Depth, const Query &Q) {
// Use of assumptions is context-sensitive. If we don't have a context, we
// cannot use them!
- if (!Q.AT || !Q.CxtI)
+ if (!Q.AC || !Q.CxtI)
return;
unsigned BitWidth = KnownZero.getBitWidth();
- Function *F = const_cast<Function*>(Q.CxtI->getParent()->getParent());
- for (auto &CI : Q.AT->assumptions(F)) {
- CallInst *I = CI;
+ for (auto &AssumeVH : Q.AC->assumptions()) {
+ if (!AssumeVH)
+ continue;
+ CallInst *I = cast<CallInst>(AssumeVH);
+ assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() &&
+ "Got assumption for the wrong function!");
if (Q.ExclInvs.count(I))
continue;
- if (match(I, m_Intrinsic<Intrinsic::assume>(m_Specific(V))) &&
+ // Warning: This loop can end up being somewhat performance sensetive.
+ // We're running this loop for once for each value queried resulting in a
+ // runtime of ~O(#assumes * #values).
+
+ assert(isa<IntrinsicInst>(I) &&
+ dyn_cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::assume &&
+ "must be an assume intrinsic");
+
+ Value *Arg = I->getArgOperand(0);
+
+ if (Arg == V &&
isValidAssumeForContext(I, Q, DL)) {
assert(BitWidth == 1 && "assume operand is not i1?");
KnownZero.clearAllBits();
@@ -499,6 +512,10 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
return;
}
+ // The remaining tests are all recursive, so bail out if we hit the limit.
+ if (Depth == MaxDepth)
+ continue;
+
Value *A, *B;
auto m_V = m_CombineOr(m_Specific(V),
m_CombineOr(m_PtrToInt(m_Specific(V)),
@@ -507,16 +524,15 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
CmpInst::Predicate Pred;
ConstantInt *C;
// assume(v = a)
- if (match(I, m_Intrinsic<Intrinsic::assume>(
- m_c_ICmp(Pred, m_V, m_Value(A)))) &&
+ if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
KnownZero |= RHSKnownZero;
KnownOne |= RHSKnownOne;
// assume(v & b = a)
- } else if (match(I, m_Intrinsic<Intrinsic::assume>(
- m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A)))) &&
+ } else if (match(Arg, m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)),
+ m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
@@ -528,9 +544,8 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownZero |= RHSKnownZero & MaskKnownOne;
KnownOne |= RHSKnownOne & MaskKnownOne;
// assume(~(v & b) = a)
- } else if (match(I, m_Intrinsic<Intrinsic::assume>(
- m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
- m_Value(A)))) &&
+ } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
+ m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
@@ -542,8 +557,8 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownZero |= RHSKnownOne & MaskKnownOne;
KnownOne |= RHSKnownZero & MaskKnownOne;
// assume(v | b = a)
- } else if (match(I, m_Intrinsic<Intrinsic::assume>(
- m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A)))) &&
+ } else if (match(Arg, m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)),
+ m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
@@ -555,9 +570,8 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownZero |= RHSKnownZero & BKnownZero;
KnownOne |= RHSKnownOne & BKnownZero;
// assume(~(v | b) = a)
- } else if (match(I, m_Intrinsic<Intrinsic::assume>(
- m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
- m_Value(A)))) &&
+ } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
+ m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
@@ -569,8 +583,8 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownZero |= RHSKnownOne & BKnownZero;
KnownOne |= RHSKnownZero & BKnownZero;
// assume(v ^ b = a)
- } else if (match(I, m_Intrinsic<Intrinsic::assume>(
- m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A)))) &&
+ } else if (match(Arg, m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)),
+ m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
@@ -585,9 +599,8 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownZero |= RHSKnownOne & BKnownOne;
KnownOne |= RHSKnownZero & BKnownOne;
// assume(~(v ^ b) = a)
- } else if (match(I, m_Intrinsic<Intrinsic::assume>(
- m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
- m_Value(A)))) &&
+ } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
+ m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
@@ -602,9 +615,8 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownZero |= RHSKnownZero & BKnownOne;
KnownOne |= RHSKnownOne & BKnownOne;
// assume(v << c = a)
- } else if (match(I, m_Intrinsic<Intrinsic::assume>(
- m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
- m_Value(A)))) &&
+ } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
+ m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
@@ -613,9 +625,8 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownZero |= RHSKnownZero.lshr(C->getZExtValue());
KnownOne |= RHSKnownOne.lshr(C->getZExtValue());
// assume(~(v << c) = a)
- } else if (match(I, m_Intrinsic<Intrinsic::assume>(
- m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
- m_Value(A)))) &&
+ } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
+ m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
@@ -624,11 +635,11 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownZero |= RHSKnownOne.lshr(C->getZExtValue());
KnownOne |= RHSKnownZero.lshr(C->getZExtValue());
// assume(v >> c = a)
- } else if (match(I, m_Intrinsic<Intrinsic::assume>(
- m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)),
+ } else if (match(Arg,
+ m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)),
m_AShr(m_V,
m_ConstantInt(C))),
- m_Value(A)))) &&
+ m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
@@ -637,11 +648,10 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownZero |= RHSKnownZero << C->getZExtValue();
KnownOne |= RHSKnownOne << C->getZExtValue();
// assume(~(v >> c) = a)
- } else if (match(I, m_Intrinsic<Intrinsic::assume>(
- m_c_ICmp(Pred, m_Not(m_CombineOr(
+ } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_CombineOr(
m_LShr(m_V, m_ConstantInt(C)),
m_AShr(m_V, m_ConstantInt(C)))),
- m_Value(A)))) &&
+ m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
@@ -650,8 +660,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownZero |= RHSKnownOne << C->getZExtValue();
KnownOne |= RHSKnownZero << C->getZExtValue();
// assume(v >=_s c) where c is non-negative
- } else if (match(I, m_Intrinsic<Intrinsic::assume>(
- m_ICmp(Pred, m_V, m_Value(A)))) &&
+ } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
Pred == ICmpInst::ICMP_SGE &&
isValidAssumeForContext(I, Q, DL)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
@@ -662,8 +671,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownZero |= APInt::getSignBit(BitWidth);
}
// assume(v >_s c) where c is at least -1.
- } else if (match(I, m_Intrinsic<Intrinsic::assume>(
- m_ICmp(Pred, m_V, m_Value(A)))) &&
+ } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
Pred == ICmpInst::ICMP_SGT &&
isValidAssumeForContext(I, Q, DL)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
@@ -674,8 +682,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownZero |= APInt::getSignBit(BitWidth);
}
// assume(v <=_s c) where c is negative
- } else if (match(I, m_Intrinsic<Intrinsic::assume>(
- m_ICmp(Pred, m_V, m_Value(A)))) &&
+ } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
Pred == ICmpInst::ICMP_SLE &&
isValidAssumeForContext(I, Q, DL)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
@@ -686,8 +693,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownOne |= APInt::getSignBit(BitWidth);
}
// assume(v <_s c) where c is non-positive
- } else if (match(I, m_Intrinsic<Intrinsic::assume>(
- m_ICmp(Pred, m_V, m_Value(A)))) &&
+ } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
Pred == ICmpInst::ICMP_SLT &&
isValidAssumeForContext(I, Q, DL)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
@@ -698,8 +704,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownOne |= APInt::getSignBit(BitWidth);
}
// assume(v <=_u c)
- } else if (match(I, m_Intrinsic<Intrinsic::assume>(
- m_ICmp(Pred, m_V, m_Value(A)))) &&
+ } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
Pred == ICmpInst::ICMP_ULE &&
isValidAssumeForContext(I, Q, DL)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
@@ -709,8 +714,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownZero |=
APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes());
// assume(v <_u c)
- } else if (match(I, m_Intrinsic<Intrinsic::assume>(
- m_ICmp(Pred, m_V, m_Value(A)))) &&
+ } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
Pred == ICmpInst::ICMP_ULT &&
isValidAssumeForContext(I, Q, DL)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
@@ -790,22 +794,11 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
return;
}
- // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
- // the bits of its aliasee.
- if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
- if (GA->mayBeOverridden()) {
- KnownZero.clearAllBits(); KnownOne.clearAllBits();
- } else {
- computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1, Q);
- }
- return;
- }
-
// The address of an aligned GlobalValue has trailing zeros.
- if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
- unsigned Align = GV->getAlignment();
+ if (auto *GO = dyn_cast<GlobalObject>(V)) {
+ unsigned Align = GO->getAlignment();
if (Align == 0 && TD) {
- if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+ if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
Type *ObjectType = GVar->getType()->getElementType();
if (ObjectType->isSized()) {
// If the object is defined in the current Module, we'll be giving
@@ -839,6 +832,9 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
if (Align)
KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
+ else
+ KnownZero.clearAllBits();
+ KnownOne.clearAllBits();
// Don't give up yet... there might be an assumption that provides more
// information...
@@ -849,8 +845,18 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// Start out not knowing anything.
KnownZero.clearAllBits(); KnownOne.clearAllBits();
+ // Limit search depth.
+ // All recursive calls that increase depth must come after this.
if (Depth == MaxDepth)
- return; // Limit search depth.
+ return;
+
+ // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
+ // the bits of its aliasee.
+ if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (!GA->mayBeOverridden())
+ computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth + 1, Q);
+ return;
+ }
// Check whether a nearby assume intrinsic can determine some known bits.
computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q);
@@ -1507,8 +1513,10 @@ static bool rangeMetadataExcludesValue(MDNode* Ranges,
const unsigned NumRanges = Ranges->getNumOperands() / 2;
assert(NumRanges >= 1);
for (unsigned i = 0; i < NumRanges; ++i) {
- ConstantInt *Lower = cast<ConstantInt>(Ranges->getOperand(2*i + 0));
- ConstantInt *Upper = cast<ConstantInt>(Ranges->getOperand(2*i + 1));
+ ConstantInt *Lower =
+ mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0));
+ ConstantInt *Upper =
+ mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1));
ConstantRange Range(Lower->getValue(), Upper->getValue());
if (Range.contains(Value))
return false;
@@ -1764,7 +1772,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
if (Tmp == 1) return 1; // Early out.
// Special case decrementing a value (ADD X, -1):
- if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1)))
+ if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
if (CRHS->isAllOnesValue()) {
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
computeKnownBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
@@ -1789,7 +1797,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
if (Tmp2 == 1) return 1;
// Handle NEG.
- if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0)))
+ if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
if (CLHS->isNullValue()) {
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
computeKnownBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
@@ -1814,13 +1822,16 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
case Instruction::PHI: {
PHINode *PN = cast<PHINode>(U);
+ unsigned NumIncomingValues = PN->getNumIncomingValues();
// Don't analyze large in-degree PHIs.
- if (PN->getNumIncomingValues() > 4) break;
+ if (NumIncomingValues > 4) break;
+ // Unreachable blocks may have zero-operand PHI nodes.
+ if (NumIncomingValues == 0) break;
// Take the minimum of all incoming values. This can't infinitely loop
// because of our depth threshold.
Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1, Q);
- for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+ for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) {
if (Tmp == 1) return Tmp;
Tmp = std::min(Tmp,
ComputeNumSignBits(PN->getIncomingValue(i), TD,
@@ -1989,8 +2000,11 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
return !CFP->getValueAPF().isNegZero();
+ // FIXME: Magic number! At the least, this should be given a name because it's
+ // used similarly in CannotBeOrderedLessThanZero(). A better fix may be to
+ // expose it as a parameter, so it can be used for testing / experimenting.
if (Depth == 6)
- return 1; // Limit search depth.
+ return false; // Limit search depth.
const Operator *I = dyn_cast<Operator>(V);
if (!I) return false;
@@ -2033,6 +2047,62 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
return false;
}
+bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) {
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
+ return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero();
+
+ // FIXME: Magic number! At the least, this should be given a name because it's
+ // used similarly in CannotBeNegativeZero(). A better fix may be to
+ // expose it as a parameter, so it can be used for testing / experimenting.
+ if (Depth == 6)
+ return false; // Limit search depth.
+
+ const Operator *I = dyn_cast<Operator>(V);
+ if (!I) return false;
+
+ switch (I->getOpcode()) {
+ default: break;
+ case Instruction::FMul:
+ // x*x is always non-negative or a NaN.
+ if (I->getOperand(0) == I->getOperand(1))
+ return true;
+ // Fall through
+ case Instruction::FAdd:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) &&
+ CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1);
+ case Instruction::FPExt:
+ case Instruction::FPTrunc:
+ // Widening/narrowing never change sign.
+ return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1);
+ case Instruction::Call:
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::fabs:
+ case Intrinsic::sqrt:
+ return true;
+ case Intrinsic::powi:
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // powi(x,n) is non-negative if n is even.
+ if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0)
+ return true;
+ }
+ return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1);
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd:
+ // x*x+y is non-negative if y is non-negative.
+ return I->getOperand(0) == I->getOperand(1) &&
+ CannotBeOrderedLessThanZero(I->getOperand(2), Depth+1);
+ }
+ break;
+ }
+ return false;
+}
+
/// If the specified value can be set by repeating the same byte in memory,
/// return the i8 value that it is represented with. This is
/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
@@ -2057,26 +2127,16 @@ Value *llvm::isBytewiseValue(Value *V) {
// Don't handle long double formats, which have strange constraints.
}
- // We can handle constant integers that are power of two in size and a
- // multiple of 8 bits.
+ // We can handle constant integers that are multiple of 8 bits.
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- unsigned Width = CI->getBitWidth();
- if (isPowerOf2_32(Width) && Width > 8) {
- // We can handle this value if the recursive binary decomposition is the
- // same at all levels.
- APInt Val = CI->getValue();
- APInt Val2;
- while (Val.getBitWidth() != 8) {
- unsigned NextWidth = Val.getBitWidth()/2;
- Val2 = Val.lshr(NextWidth);
- Val2 = Val2.trunc(Val.getBitWidth()/2);
- Val = Val.trunc(Val.getBitWidth()/2);
-
- // If the top/bottom halves aren't the same, reject it.
- if (Val != Val2)
- return nullptr;
- }
- return ConstantInt::get(V->getContext(), Val);
+ if (CI->getBitWidth() % 8 == 0) {
+ assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
+
+ // We can check that all bytes of an integer are equal by making use of a
+ // little trick: rotate by 8 and check if it's still the same value.
+ if (CI->getValue() != CI->getValue().rotl(8))
+ return nullptr;
+ return ConstantInt::get(V->getContext(), CI->getValue().trunc(8));
}
}
@@ -2474,7 +2534,7 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) {
} else {
// See if InstructionSimplify knows any relevant tricks.
if (Instruction *I = dyn_cast<Instruction>(V))
- // TODO: Acquire a DominatorTree and AssumptionTracker and use them.
+ // TODO: Acquire a DominatorTree and AssumptionCache and use them.
if (Value *Simplified = SimplifyInstruction(I, TD, nullptr)) {
V = Simplified;
continue;
@@ -2556,20 +2616,20 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
case Instruction::SDiv:
case Instruction::SRem: {
// x / y is undefined if y == 0 or x == INT_MIN and y == -1
- const APInt *X, *Y;
- if (match(Inst->getOperand(1), m_APInt(Y))) {
- if (*Y != 0) {
- if (*Y == -1) {
- // The numerator can't be MinSignedValue if the denominator is -1.
- if (match(Inst->getOperand(0), m_APInt(X)))
- return !Y->isMinSignedValue();
- // The numerator *might* be MinSignedValue.
- return false;
- }
- // The denominator is not 0 or -1, it's safe to proceed.
- return true;
- }
- }
+ const APInt *Numerator, *Denominator;
+ if (!match(Inst->getOperand(1), m_APInt(Denominator)))
+ return false;
+ // We cannot hoist this division if the denominator is 0.
+ if (*Denominator == 0)
+ return false;
+ // It's safe to hoist if the denominator is not 0 or -1.
+ if (*Denominator != -1)
+ return true;
+ // At this point we know that the denominator is -1. It is safe to hoist as
+ // long we know that the numerator is not INT_MIN.
+ if (match(Inst->getOperand(0), m_APInt(Numerator)))
+ return !Numerator->isMinSignedValue();
+ // The numerator *might* be MinSignedValue.
return false;
}
case Instruction::Load: {
@@ -2668,3 +2728,82 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
return false;
}
+
+OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
+ const DataLayout *DL,
+ AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
+ // Multiplying n * m significant bits yields a result of n + m significant
+ // bits. If the total number of significant bits does not exceed the
+ // result bit width (minus 1), there is no overflow.
+ // This means if we have enough leading zero bits in the operands
+ // we can guarantee that the result does not overflow.
+ // Ref: "Hacker's Delight" by Henry Warren
+ unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
+ APInt LHSKnownZero(BitWidth, 0);
+ APInt LHSKnownOne(BitWidth, 0);
+ APInt RHSKnownZero(BitWidth, 0);
+ APInt RHSKnownOne(BitWidth, 0);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, /*Depth=*/0, AC, CxtI,
+ DT);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, /*Depth=*/0, AC, CxtI,
+ DT);
+ // Note that underestimating the number of zero bits gives a more
+ // conservative answer.
+ unsigned ZeroBits = LHSKnownZero.countLeadingOnes() +
+ RHSKnownZero.countLeadingOnes();
+ // First handle the easy case: if we have enough zero bits there's
+ // definitely no overflow.
+ if (ZeroBits >= BitWidth)
+ return OverflowResult::NeverOverflows;
+
+ // Get the largest possible values for each operand.
+ APInt LHSMax = ~LHSKnownZero;
+ APInt RHSMax = ~RHSKnownZero;
+
+ // We know the multiply operation doesn't overflow if the maximum values for
+ // each operand will not overflow after we multiply them together.
+ bool MaxOverflow;
+ LHSMax.umul_ov(RHSMax, MaxOverflow);
+ if (!MaxOverflow)
+ return OverflowResult::NeverOverflows;
+
+ // We know it always overflows if multiplying the smallest possible values for
+ // the operands also results in overflow.
+ bool MinOverflow;
+ LHSKnownOne.umul_ov(RHSKnownOne, MinOverflow);
+ if (MinOverflow)
+ return OverflowResult::AlwaysOverflows;
+
+ return OverflowResult::MayOverflow;
+}
+
+OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
+ const DataLayout *DL,
+ AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
+ bool LHSKnownNonNegative, LHSKnownNegative;
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0,
+ AC, CxtI, DT);
+ if (LHSKnownNonNegative || LHSKnownNegative) {
+ bool RHSKnownNonNegative, RHSKnownNegative;
+ ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0,
+ AC, CxtI, DT);
+
+ if (LHSKnownNegative && RHSKnownNegative) {
+ // The sign bit is set in both cases: this MUST overflow.
+ // Create a simple add instruction, and insert it into the struct.
+ return OverflowResult::AlwaysOverflows;
+ }
+
+ if (LHSKnownNonNegative && RHSKnownNonNegative) {
+ // The sign bit is clear in both cases: this CANNOT overflow.
+ // Create a simple add instruction, and insert it into the struct.
+ return OverflowResult::NeverOverflows;
+ }
+ }
+
+ return OverflowResult::MayOverflow;
+}