diff options
author | Stephen Hines <srhines@google.com> | 2014-05-29 02:49:00 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-05-29 02:49:00 -0700 |
commit | dce4a407a24b04eebc6a376f8e62b41aaa7b071f (patch) | |
tree | dcebc53f2b182f145a2e659393bf9a0472cedf23 /lib/Analysis | |
parent | 220b921aed042f9e520c26cffd8282a94c66c3d5 (diff) | |
download | external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.zip external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.gz external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.bz2 |
Update LLVM for 3.5 rebase (r209712).
Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
Diffstat (limited to 'lib/Analysis')
51 files changed, 3369 insertions, 1277 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 9583bbe..57237e5 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -473,7 +473,7 @@ AliasAnalysis::~AliasAnalysis() {} /// void AliasAnalysis::InitializeAliasAnalysis(Pass *P) { DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = P->getAnalysisIfAvailable<TargetLibraryInfo>(); AA = &P->getAnalysis<AliasAnalysis>(); } diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp index 2e3bc55..b860914 100644 --- a/lib/Analysis/AliasAnalysisCounter.cpp +++ b/lib/Analysis/AliasAnalysisCounter.cpp @@ -126,7 +126,7 @@ AliasAnalysis::AliasResult AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) { AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB); - const char *AliasString = 0; + const char *AliasString = nullptr; switch (R) { case NoAlias: No++; AliasString = "No alias"; break; case MayAlias: May++; AliasString = "May alias"; break; @@ -152,7 +152,7 @@ AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc); - const char *MRString = 0; + const char *MRString = nullptr; switch (R) { case NoModRef: NoMR++; MRString = "NoModRef"; break; case Ref: JustRef++; MRString = "JustRef"; break; diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index ab1005e..a45fe23 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -72,16 +72,16 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) { AS.PtrList->setPrevInList(PtrListEnd); PtrListEnd = AS.PtrListEnd; - AS.PtrList = 0; + AS.PtrList = nullptr; AS.PtrListEnd = &AS.PtrList; - assert(*AS.PtrListEnd == 0 && "End of list is not null?"); + assert(*AS.PtrListEnd == nullptr && "End of list is not null?"); } } void AliasSetTracker::removeAliasSet(AliasSet *AS) { if (AliasSet *Fwd = AS->Forward) { Fwd->dropRef(*this); - AS->Forward = 0; + AS->Forward = nullptr; } AliasSets.erase(AS); } @@ -115,10 +115,10 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, Entry.updateSizeAndTBAAInfo(Size, TBAAInfo); // Add it to the end of the list... - assert(*PtrListEnd == 0 && "End of list is not null?"); + assert(*PtrListEnd == nullptr && "End of list is not null?"); *PtrListEnd = &Entry; PtrListEnd = Entry.setPrevInList(PtrListEnd); - assert(*PtrListEnd == 0 && "End of list is not null?"); + assert(*PtrListEnd == nullptr && "End of list is not null?"); addRef(); // Entry points to alias set. } @@ -217,11 +217,11 @@ void AliasSetTracker::clear() { AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) { - AliasSet *FoundSet = 0; + AliasSet *FoundSet = nullptr; for (iterator I = begin(), E = end(); I != E; ++I) { if (I->Forward || !I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue; - if (FoundSet == 0) { // If this is the first alias set ptr can go into. + if (!FoundSet) { // If this is the first alias set ptr can go into. FoundSet = I; // Remember it. } else { // Otherwise, we must merge the sets. FoundSet->mergeSetIn(*I, *this); // Merge in contents. @@ -245,12 +245,12 @@ bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size, AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) { - AliasSet *FoundSet = 0; + AliasSet *FoundSet = nullptr; for (iterator I = begin(), E = end(); I != E; ++I) { if (I->Forward || !I->aliasesUnknownInst(Inst, AA)) continue; - if (FoundSet == 0) // If this is the first alias set ptr can go into. + if (!FoundSet) // If this is the first alias set ptr can go into. FoundSet = I; // Remember it. else if (!I->Forward) // Otherwise, we must merge the sets. FoundSet->mergeSetIn(*I, *this); // Merge in contents. diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index c960123..01c1c7e 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -73,7 +73,7 @@ void LLVMInitializeAnalysis(LLVMPassRegistryRef R) { LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action, char **OutMessages) { - raw_ostream *DebugOS = Action != LLVMReturnStatusAction ? &errs() : 0; + raw_ostream *DebugOS = Action != LLVMReturnStatusAction ? &errs() : nullptr; std::string Messages; raw_string_ostream MsgsOS(Messages); @@ -94,7 +94,8 @@ LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action, LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) { LLVMBool Result = verifyFunction( - *unwrap<Function>(Fn), Action != LLVMReturnStatusAction ? &errs() : 0); + *unwrap<Function>(Fn), Action != LLVMReturnStatusAction ? &errs() + : nullptr); if (Action == LLVMAbortProcessAction && Result) report_fatal_error("Broken function found, compilation aborted!"); diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk index 76eee74..a8fef77 100644 --- a/lib/Analysis/Android.mk +++ b/lib/Analysis/Android.mk @@ -9,6 +9,7 @@ analysis_SRC_FILES := \ Analysis.cpp \ BasicAliasAnalysis.cpp \ BlockFrequencyInfo.cpp \ + BlockFrequencyInfoImpl.cpp \ BranchProbabilityInfo.cpp \ CFG.cpp \ CFGPrinter.cpp \ diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index e267374..fe90b84 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -298,7 +298,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, do { // See if this is a bitcast or GEP. const Operator *Op = dyn_cast<Operator>(V); - if (Op == 0) { + if (!Op) { // The only non-operator case we can handle are GlobalAliases. if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { if (!GA->mayBeOverridden()) { @@ -315,7 +315,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, } const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op); - if (GEPOp == 0) { + if (!GEPOp) { // If it's not a GEP, hand it off to SimplifyInstruction to see if it // can come up with something. This matches what GetUnderlyingObject does. if (const Instruction *I = dyn_cast<Instruction>(V)) @@ -336,7 +336,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // If we are lacking DataLayout information, we can't compute the offets of // elements computed by GEPs. However, we can handle bitcast equivalent // GEPs. - if (DL == 0) { + if (!DL) { if (!GEPOp->hasAllZeroIndices()) return V; V = GEPOp->getOperand(0); @@ -433,7 +433,7 @@ static const Function *getParent(const Value *V) { if (const Argument *arg = dyn_cast<Argument>(V)) return arg->getParent(); - return NULL; + return nullptr; } static bool notDifferentParent(const Value *O1, const Value *O2) { @@ -753,7 +753,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, // Finally, handle specific knowledge of intrinsics. const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()); - if (II != 0) + if (II != nullptr) switch (II->getIntrinsicID()) { default: break; case Intrinsic::memcpy: @@ -868,21 +868,6 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min); } -static bool areVarIndicesEqual(SmallVectorImpl<VariableGEPIndex> &Indices1, - SmallVectorImpl<VariableGEPIndex> &Indices2) { - unsigned Size1 = Indices1.size(); - unsigned Size2 = Indices2.size(); - - if (Size1 != Size2) - return false; - - for (unsigned I = 0; I != Size1; ++I) - if (Indices1[I] != Indices2[I]) - return false; - - return true; -} - /// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction /// against another pointer. We know that V1 is a GEP, but we don't know /// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, DL), @@ -904,8 +889,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // derived pointer. if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) { // Do the base pointers alias? - AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0, - UnderlyingV2, UnknownSize, 0); + AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, nullptr, + UnderlyingV2, UnknownSize, nullptr); // Check for geps of non-aliasing underlying pointers where the offsets are // identical. @@ -929,8 +914,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { - assert(DL == 0 && - "DecomposeGEPExpression and GetUnderlyingObject disagree!"); + assert(!DL && + "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } // If the max search depth is reached the result is undefined @@ -939,7 +924,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // Same offsets. if (GEP1BaseOffset == GEP2BaseOffset && - areVarIndicesEqual(GEP1VariableIndices, GEP2VariableIndices)) + GEP1VariableIndices == GEP2VariableIndices) return NoAlias; GEP1VariableIndices.clear(); } @@ -966,7 +951,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { - assert(DL == 0 && + assert(!DL && "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } @@ -988,7 +973,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, if (V1Size == UnknownSize && V2Size == UnknownSize) return MayAlias; - AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, 0, + AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, nullptr, V2, V2Size, V2TBAAInfo); if (R != MustAlias) // If V2 may alias GEP base pointer, conservatively returns MayAlias. @@ -1005,7 +990,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. if (GEP1BasePtr != UnderlyingV1) { - assert(DL == 0 && + assert(!DL && "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } @@ -1371,7 +1356,7 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V, // Use dominance or loop info if available. DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0; + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; LoopInfo *LI = getAnalysisIfAvailable<LoopInfo>(); // Make sure that the visited phis cannot reach the Value. This ensures that diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp index 63049a5..8ed8e3e 100644 --- a/lib/Analysis/BlockFrequencyInfo.cpp +++ b/lib/Analysis/BlockFrequencyInfo.cpp @@ -1,4 +1,4 @@ -//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------===// +//===- BlockFrequencyInfo.cpp - Block Frequency Analysis ------------------===// // // The LLVM Compiler Infrastructure // @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/BlockFrequencyImpl.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" @@ -24,6 +24,8 @@ using namespace llvm; +#define DEBUG_TYPE "block-freq" + #ifndef NDEBUG enum GVDAGType { GVDT_None, @@ -106,6 +108,7 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits { INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis", true, true) @@ -120,14 +123,16 @@ BlockFrequencyInfo::~BlockFrequencyInfo() {} void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<BranchProbabilityInfo>(); + AU.addRequired<LoopInfo>(); AU.setPreservesAll(); } bool BlockFrequencyInfo::runOnFunction(Function &F) { BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>(); + LoopInfo &LI = getAnalysis<LoopInfo>(); if (!BFI) BFI.reset(new ImplType); - BFI->doFunction(&F, &BPI); + BFI->doFunction(&F, &BPI, &LI); #ifndef NDEBUG if (ViewBlockFreqPropagationDAG != GVDT_None) view(); @@ -158,7 +163,7 @@ void BlockFrequencyInfo::view() const { } const Function *BlockFrequencyInfo::getFunction() const { - return BFI ? BFI->Fn : nullptr; + return BFI ? BFI->getFunction() : nullptr; } raw_ostream &BlockFrequencyInfo:: diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp new file mode 100644 index 0000000..87d93a4 --- /dev/null +++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -0,0 +1,995 @@ +//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loops should be simplified before this analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/Support/raw_ostream.h" +#include <deque> + +using namespace llvm; +using namespace llvm::bfi_detail; + +#define DEBUG_TYPE "block-freq" + +//===----------------------------------------------------------------------===// +// +// UnsignedFloat implementation. +// +//===----------------------------------------------------------------------===// +#ifndef _MSC_VER +const int32_t UnsignedFloatBase::MaxExponent; +const int32_t UnsignedFloatBase::MinExponent; +#endif + +static void appendDigit(std::string &Str, unsigned D) { + assert(D < 10); + Str += '0' + D % 10; +} + +static void appendNumber(std::string &Str, uint64_t N) { + while (N) { + appendDigit(Str, N % 10); + N /= 10; + } +} + +static bool doesRoundUp(char Digit) { + switch (Digit) { + case '5': + case '6': + case '7': + case '8': + case '9': + return true; + default: + return false; + } +} + +static std::string toStringAPFloat(uint64_t D, int E, unsigned Precision) { + assert(E >= UnsignedFloatBase::MinExponent); + assert(E <= UnsignedFloatBase::MaxExponent); + + // Find a new E, but don't let it increase past MaxExponent. + int LeadingZeros = UnsignedFloatBase::countLeadingZeros64(D); + int NewE = std::min(UnsignedFloatBase::MaxExponent, E + 63 - LeadingZeros); + int Shift = 63 - (NewE - E); + assert(Shift <= LeadingZeros); + assert(Shift == LeadingZeros || NewE == UnsignedFloatBase::MaxExponent); + D <<= Shift; + E = NewE; + + // Check for a denormal. + unsigned AdjustedE = E + 16383; + if (!(D >> 63)) { + assert(E == UnsignedFloatBase::MaxExponent); + AdjustedE = 0; + } + + // Build the float and print it. + uint64_t RawBits[2] = {D, AdjustedE}; + APFloat Float(APFloat::x87DoubleExtended, APInt(80, RawBits)); + SmallVector<char, 24> Chars; + Float.toString(Chars, Precision, 0); + return std::string(Chars.begin(), Chars.end()); +} + +static std::string stripTrailingZeros(const std::string &Float) { + size_t NonZero = Float.find_last_not_of('0'); + assert(NonZero != std::string::npos && "no . in floating point string"); + + if (Float[NonZero] == '.') + ++NonZero; + + return Float.substr(0, NonZero + 1); +} + +std::string UnsignedFloatBase::toString(uint64_t D, int16_t E, int Width, + unsigned Precision) { + if (!D) + return "0.0"; + + // Canonicalize exponent and digits. + uint64_t Above0 = 0; + uint64_t Below0 = 0; + uint64_t Extra = 0; + int ExtraShift = 0; + if (E == 0) { + Above0 = D; + } else if (E > 0) { + if (int Shift = std::min(int16_t(countLeadingZeros64(D)), E)) { + D <<= Shift; + E -= Shift; + + if (!E) + Above0 = D; + } + } else if (E > -64) { + Above0 = D >> -E; + Below0 = D << (64 + E); + } else if (E > -120) { + Below0 = D >> (-E - 64); + Extra = D << (128 + E); + ExtraShift = -64 - E; + } + + // Fall back on APFloat for very small and very large numbers. + if (!Above0 && !Below0) + return toStringAPFloat(D, E, Precision); + + // Append the digits before the decimal. + std::string Str; + size_t DigitsOut = 0; + if (Above0) { + appendNumber(Str, Above0); + DigitsOut = Str.size(); + } else + appendDigit(Str, 0); + std::reverse(Str.begin(), Str.end()); + + // Return early if there's nothing after the decimal. + if (!Below0) + return Str + ".0"; + + // Append the decimal and beyond. + Str += '.'; + uint64_t Error = UINT64_C(1) << (64 - Width); + + // We need to shift Below0 to the right to make space for calculating + // digits. Save the precision we're losing in Extra. + Extra = (Below0 & 0xf) << 56 | (Extra >> 8); + Below0 >>= 4; + size_t SinceDot = 0; + size_t AfterDot = Str.size(); + do { + if (ExtraShift) { + --ExtraShift; + Error *= 5; + } else + Error *= 10; + + Below0 *= 10; + Extra *= 10; + Below0 += (Extra >> 60); + Extra = Extra & (UINT64_MAX >> 4); + appendDigit(Str, Below0 >> 60); + Below0 = Below0 & (UINT64_MAX >> 4); + if (DigitsOut || Str.back() != '0') + ++DigitsOut; + ++SinceDot; + } while (Error && (Below0 << 4 | Extra >> 60) >= Error / 2 && + (!Precision || DigitsOut <= Precision || SinceDot < 2)); + + // Return early for maximum precision. + if (!Precision || DigitsOut <= Precision) + return stripTrailingZeros(Str); + + // Find where to truncate. + size_t Truncate = + std::max(Str.size() - (DigitsOut - Precision), AfterDot + 1); + + // Check if there's anything to truncate. + if (Truncate >= Str.size()) + return stripTrailingZeros(Str); + + bool Carry = doesRoundUp(Str[Truncate]); + if (!Carry) + return stripTrailingZeros(Str.substr(0, Truncate)); + + // Round with the first truncated digit. + for (std::string::reverse_iterator I(Str.begin() + Truncate), E = Str.rend(); + I != E; ++I) { + if (*I == '.') + continue; + if (*I == '9') { + *I = '0'; + continue; + } + + ++*I; + Carry = false; + break; + } + + // Add "1" in front if we still need to carry. + return stripTrailingZeros(std::string(Carry, '1') + Str.substr(0, Truncate)); +} + +raw_ostream &UnsignedFloatBase::print(raw_ostream &OS, uint64_t D, int16_t E, + int Width, unsigned Precision) { + return OS << toString(D, E, Width, Precision); +} + +void UnsignedFloatBase::dump(uint64_t D, int16_t E, int Width) { + print(dbgs(), D, E, Width, 0) << "[" << Width << ":" << D << "*2^" << E + << "]"; +} + +static std::pair<uint64_t, int16_t> +getRoundedFloat(uint64_t N, bool ShouldRound, int64_t Shift) { + if (ShouldRound) + if (!++N) + // Rounding caused an overflow. + return std::make_pair(UINT64_C(1), Shift + 64); + return std::make_pair(N, Shift); +} + +std::pair<uint64_t, int16_t> UnsignedFloatBase::divide64(uint64_t Dividend, + uint64_t Divisor) { + // Input should be sanitized. + assert(Divisor); + assert(Dividend); + + // Minimize size of divisor. + int16_t Shift = 0; + if (int Zeros = countTrailingZeros(Divisor)) { + Shift -= Zeros; + Divisor >>= Zeros; + } + + // Check for powers of two. + if (Divisor == 1) + return std::make_pair(Dividend, Shift); + + // Maximize size of dividend. + if (int Zeros = countLeadingZeros64(Dividend)) { + Shift -= Zeros; + Dividend <<= Zeros; + } + + // Start with the result of a divide. + uint64_t Quotient = Dividend / Divisor; + Dividend %= Divisor; + + // Continue building the quotient with long division. + // + // TODO: continue with largers digits. + while (!(Quotient >> 63) && Dividend) { + // Shift Dividend, and check for overflow. + bool IsOverflow = Dividend >> 63; + Dividend <<= 1; + --Shift; + + // Divide. + bool DoesDivide = IsOverflow || Divisor <= Dividend; + Quotient = (Quotient << 1) | uint64_t(DoesDivide); + Dividend -= DoesDivide ? Divisor : 0; + } + + // Round. + if (Dividend >= getHalf(Divisor)) + if (!++Quotient) + // Rounding caused an overflow in Quotient. + return std::make_pair(UINT64_C(1), Shift + 64); + + return getRoundedFloat(Quotient, Dividend >= getHalf(Divisor), Shift); +} + +std::pair<uint64_t, int16_t> UnsignedFloatBase::multiply64(uint64_t L, + uint64_t R) { + // Separate into two 32-bit digits (U.L). + uint64_t UL = L >> 32, LL = L & UINT32_MAX, UR = R >> 32, LR = R & UINT32_MAX; + + // Compute cross products. + uint64_t P1 = UL * UR, P2 = UL * LR, P3 = LL * UR, P4 = LL * LR; + + // Sum into two 64-bit digits. + uint64_t Upper = P1, Lower = P4; + auto addWithCarry = [&](uint64_t N) { + uint64_t NewLower = Lower + (N << 32); + Upper += (N >> 32) + (NewLower < Lower); + Lower = NewLower; + }; + addWithCarry(P2); + addWithCarry(P3); + + // Check whether the upper digit is empty. + if (!Upper) + return std::make_pair(Lower, 0); + + // Shift as little as possible to maximize precision. + unsigned LeadingZeros = countLeadingZeros64(Upper); + int16_t Shift = 64 - LeadingZeros; + if (LeadingZeros) + Upper = Upper << LeadingZeros | Lower >> Shift; + bool ShouldRound = Shift && (Lower & UINT64_C(1) << (Shift - 1)); + return getRoundedFloat(Upper, ShouldRound, Shift); +} + +//===----------------------------------------------------------------------===// +// +// BlockMass implementation. +// +//===----------------------------------------------------------------------===// +UnsignedFloat<uint64_t> BlockMass::toFloat() const { + if (isFull()) + return UnsignedFloat<uint64_t>(1, 0); + return UnsignedFloat<uint64_t>(getMass() + 1, -64); +} + +void BlockMass::dump() const { print(dbgs()); } + +static char getHexDigit(int N) { + assert(N < 16); + if (N < 10) + return '0' + N; + return 'a' + N - 10; +} +raw_ostream &BlockMass::print(raw_ostream &OS) const { + for (int Digits = 0; Digits < 16; ++Digits) + OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf); + return OS; +} + +//===----------------------------------------------------------------------===// +// +// BlockFrequencyInfoImpl implementation. +// +//===----------------------------------------------------------------------===// +namespace { + +typedef BlockFrequencyInfoImplBase::BlockNode BlockNode; +typedef BlockFrequencyInfoImplBase::Distribution Distribution; +typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList; +typedef BlockFrequencyInfoImplBase::Float Float; +typedef BlockFrequencyInfoImplBase::LoopData LoopData; +typedef BlockFrequencyInfoImplBase::Weight Weight; +typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData; + +/// \brief Dithering mass distributer. +/// +/// This class splits up a single mass into portions by weight, dithering to +/// spread out error. No mass is lost. The dithering precision depends on the +/// precision of the product of \a BlockMass and \a BranchProbability. +/// +/// The distribution algorithm follows. +/// +/// 1. Initialize by saving the sum of the weights in \a RemWeight and the +/// mass to distribute in \a RemMass. +/// +/// 2. For each portion: +/// +/// 1. Construct a branch probability, P, as the portion's weight divided +/// by the current value of \a RemWeight. +/// 2. Calculate the portion's mass as \a RemMass times P. +/// 3. Update \a RemWeight and \a RemMass at each portion by subtracting +/// the current portion's weight and mass. +struct DitheringDistributer { + uint32_t RemWeight; + BlockMass RemMass; + + DitheringDistributer(Distribution &Dist, const BlockMass &Mass); + + BlockMass takeMass(uint32_t Weight); +}; +} + +DitheringDistributer::DitheringDistributer(Distribution &Dist, + const BlockMass &Mass) { + Dist.normalize(); + RemWeight = Dist.Total; + RemMass = Mass; +} + +BlockMass DitheringDistributer::takeMass(uint32_t Weight) { + assert(Weight && "invalid weight"); + assert(Weight <= RemWeight); + BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight); + + // Decrement totals (dither). + RemWeight -= Weight; + RemMass -= Mass; + return Mass; +} + +void Distribution::add(const BlockNode &Node, uint64_t Amount, + Weight::DistType Type) { + assert(Amount && "invalid weight of 0"); + uint64_t NewTotal = Total + Amount; + + // Check for overflow. It should be impossible to overflow twice. + bool IsOverflow = NewTotal < Total; + assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow"); + DidOverflow |= IsOverflow; + + // Update the total. + Total = NewTotal; + + // Save the weight. + Weight W; + W.TargetNode = Node; + W.Amount = Amount; + W.Type = Type; + Weights.push_back(W); +} + +static void combineWeight(Weight &W, const Weight &OtherW) { + assert(OtherW.TargetNode.isValid()); + if (!W.Amount) { + W = OtherW; + return; + } + assert(W.Type == OtherW.Type); + assert(W.TargetNode == OtherW.TargetNode); + assert(W.Amount < W.Amount + OtherW.Amount && "Unexpected overflow"); + W.Amount += OtherW.Amount; +} +static void combineWeightsBySorting(WeightList &Weights) { + // Sort so edges to the same node are adjacent. + std::sort(Weights.begin(), Weights.end(), + [](const Weight &L, + const Weight &R) { return L.TargetNode < R.TargetNode; }); + + // Combine adjacent edges. + WeightList::iterator O = Weights.begin(); + for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E; + ++O, (I = L)) { + *O = *I; + + // Find the adjacent weights to the same node. + for (++L; L != E && I->TargetNode == L->TargetNode; ++L) + combineWeight(*O, *L); + } + + // Erase extra entries. + Weights.erase(O, Weights.end()); + return; +} +static void combineWeightsByHashing(WeightList &Weights) { + // Collect weights into a DenseMap. + typedef DenseMap<BlockNode::IndexType, Weight> HashTable; + HashTable Combined(NextPowerOf2(2 * Weights.size())); + for (const Weight &W : Weights) + combineWeight(Combined[W.TargetNode.Index], W); + + // Check whether anything changed. + if (Weights.size() == Combined.size()) + return; + + // Fill in the new weights. + Weights.clear(); + Weights.reserve(Combined.size()); + for (const auto &I : Combined) + Weights.push_back(I.second); +} +static void combineWeights(WeightList &Weights) { + // Use a hash table for many successors to keep this linear. + if (Weights.size() > 128) { + combineWeightsByHashing(Weights); + return; + } + + combineWeightsBySorting(Weights); +} +static uint64_t shiftRightAndRound(uint64_t N, int Shift) { + assert(Shift >= 0); + assert(Shift < 64); + if (!Shift) + return N; + return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1)); +} +void Distribution::normalize() { + // Early exit for termination nodes. + if (Weights.empty()) + return; + + // Only bother if there are multiple successors. + if (Weights.size() > 1) + combineWeights(Weights); + + // Early exit when combined into a single successor. + if (Weights.size() == 1) { + Total = 1; + Weights.front().Amount = 1; + return; + } + + // Determine how much to shift right so that the total fits into 32-bits. + // + // If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1 + // for each weight can cause a 32-bit overflow. + int Shift = 0; + if (DidOverflow) + Shift = 33; + else if (Total > UINT32_MAX) + Shift = 33 - countLeadingZeros(Total); + + // Early exit if nothing needs to be scaled. + if (!Shift) + return; + + // Recompute the total through accumulation (rather than shifting it) so that + // it's accurate after shifting. + Total = 0; + + // Sum the weights to each node and shift right if necessary. + for (Weight &W : Weights) { + // Scale down below UINT32_MAX. Since Shift is larger than necessary, we + // can round here without concern about overflow. + assert(W.TargetNode.isValid()); + W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift)); + assert(W.Amount <= UINT32_MAX); + + // Update the total. + Total += W.Amount; + } + assert(Total <= UINT32_MAX); +} + +void BlockFrequencyInfoImplBase::clear() { + // Swap with a default-constructed std::vector, since std::vector<>::clear() + // does not actually clear heap storage. + std::vector<FrequencyData>().swap(Freqs); + std::vector<WorkingData>().swap(Working); + Loops.clear(); +} + +/// \brief Clear all memory not needed downstream. +/// +/// Releases all memory not used downstream. In particular, saves Freqs. +static void cleanup(BlockFrequencyInfoImplBase &BFI) { + std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs)); + BFI.clear(); + BFI.Freqs = std::move(SavedFreqs); +} + +bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist, + const LoopData *OuterLoop, + const BlockNode &Pred, + const BlockNode &Succ, + uint64_t Weight) { + if (!Weight) + Weight = 1; + + auto isLoopHeader = [&OuterLoop](const BlockNode &Node) { + return OuterLoop && OuterLoop->isHeader(Node); + }; + + BlockNode Resolved = Working[Succ.Index].getResolvedNode(); + +#ifndef NDEBUG + auto debugSuccessor = [&](const char *Type) { + dbgs() << " =>" + << " [" << Type << "] weight = " << Weight; + if (!isLoopHeader(Resolved)) + dbgs() << ", succ = " << getBlockName(Succ); + if (Resolved != Succ) + dbgs() << ", resolved = " << getBlockName(Resolved); + dbgs() << "\n"; + }; + (void)debugSuccessor; +#endif + + if (isLoopHeader(Resolved)) { + DEBUG(debugSuccessor("backedge")); + Dist.addBackedge(OuterLoop->getHeader(), Weight); + return true; + } + + if (Working[Resolved.Index].getContainingLoop() != OuterLoop) { + DEBUG(debugSuccessor(" exit ")); + Dist.addExit(Resolved, Weight); + return true; + } + + if (Resolved < Pred) { + if (!isLoopHeader(Pred)) { + // If OuterLoop is an irreducible loop, we can't actually handle this. + assert((!OuterLoop || !OuterLoop->isIrreducible()) && + "unhandled irreducible control flow"); + + // Irreducible backedge. Abort. + DEBUG(debugSuccessor("abort!!!")); + return false; + } + + // If "Pred" is a loop header, then this isn't really a backedge; rather, + // OuterLoop must be irreducible. These false backedges can come only from + // secondary loop headers. + assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) && + "unhandled irreducible control flow"); + } + + DEBUG(debugSuccessor(" local ")); + Dist.addLocal(Resolved, Weight); + return true; +} + +bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist( + const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) { + // Copy the exit map into Dist. + for (const auto &I : Loop.Exits) + if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first, + I.second.getMass())) + // Irreducible backedge. + return false; + + return true; +} + +/// \brief Get the maximum allowed loop scale. +/// +/// Gives the maximum number of estimated iterations allowed for a loop. Very +/// large numbers cause problems downstream (even within 64-bits). +static Float getMaxLoopScale() { return Float(1, 12); } + +/// \brief Compute the loop scale for a loop. +void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) { + // Compute loop scale. + DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n"); + + // LoopScale == 1 / ExitMass + // ExitMass == HeadMass - BackedgeMass + BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass; + + // Block scale stores the inverse of the scale. + Loop.Scale = ExitMass.toFloat().inverse(); + + DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull() + << " - " << Loop.BackedgeMass << ")\n" + << " - scale = " << Loop.Scale << "\n"); + + if (Loop.Scale > getMaxLoopScale()) { + Loop.Scale = getMaxLoopScale(); + DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n"); + } +} + +/// \brief Package up a loop. +void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) { + DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n"); + + // Clear the subloop exits to prevent quadratic memory usage. + for (const BlockNode &M : Loop.Nodes) { + if (auto *Loop = Working[M.Index].getPackagedLoop()) + Loop->Exits.clear(); + DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n"); + } + Loop.IsPackaged = true; +} + +void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source, + LoopData *OuterLoop, + Distribution &Dist) { + BlockMass Mass = Working[Source.Index].getMass(); + DEBUG(dbgs() << " => mass: " << Mass << "\n"); + + // Distribute mass to successors as laid out in Dist. + DitheringDistributer D(Dist, Mass); + +#ifndef NDEBUG + auto debugAssign = [&](const BlockNode &T, const BlockMass &M, + const char *Desc) { + dbgs() << " => assign " << M << " (" << D.RemMass << ")"; + if (Desc) + dbgs() << " [" << Desc << "]"; + if (T.isValid()) + dbgs() << " to " << getBlockName(T); + dbgs() << "\n"; + }; + (void)debugAssign; +#endif + + for (const Weight &W : Dist.Weights) { + // Check for a local edge (non-backedge and non-exit). + BlockMass Taken = D.takeMass(W.Amount); + if (W.Type == Weight::Local) { + Working[W.TargetNode.Index].getMass() += Taken; + DEBUG(debugAssign(W.TargetNode, Taken, nullptr)); + continue; + } + + // Backedges and exits only make sense if we're processing a loop. + assert(OuterLoop && "backedge or exit outside of loop"); + + // Check for a backedge. + if (W.Type == Weight::Backedge) { + OuterLoop->BackedgeMass += Taken; + DEBUG(debugAssign(BlockNode(), Taken, "back")); + continue; + } + + // This must be an exit. + assert(W.Type == Weight::Exit); + OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken)); + DEBUG(debugAssign(W.TargetNode, Taken, "exit")); + } +} + +static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, + const Float &Min, const Float &Max) { + // Scale the Factor to a size that creates integers. Ideally, integers would + // be scaled so that Max == UINT64_MAX so that they can be best + // differentiated. However, the register allocator currently deals poorly + // with large numbers. Instead, push Min up a little from 1 to give some + // room to differentiate small, unequal numbers. + // + // TODO: fix issues downstream so that ScalingFactor can be Float(1,64)/Max. + Float ScalingFactor = Min.inverse(); + if ((Max / Min).lg() < 60) + ScalingFactor <<= 3; + + // Translate the floats to integers. + DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max + << ", factor = " << ScalingFactor << "\n"); + for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) { + Float Scaled = BFI.Freqs[Index].Floating * ScalingFactor; + BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>()); + DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = " + << BFI.Freqs[Index].Floating << ", scaled = " << Scaled + << ", int = " << BFI.Freqs[Index].Integer << "\n"); + } +} + +/// \brief Unwrap a loop package. +/// +/// Visits all the members of a loop, adjusting their BlockData according to +/// the loop's pseudo-node. +static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) { + DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop) + << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale + << "\n"); + Loop.Scale *= Loop.Mass.toFloat(); + Loop.IsPackaged = false; + DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n"); + + // Propagate the head scale through the loop. Since members are visited in + // RPO, the head scale will be updated by the loop scale first, and then the + // final head scale will be used for updated the rest of the members. + for (const BlockNode &N : Loop.Nodes) { + const auto &Working = BFI.Working[N.Index]; + Float &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale + : BFI.Freqs[N.Index].Floating; + Float New = Loop.Scale * F; + DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New + << "\n"); + F = New; + } +} + +void BlockFrequencyInfoImplBase::unwrapLoops() { + // Set initial frequencies from loop-local masses. + for (size_t Index = 0; Index < Working.size(); ++Index) + Freqs[Index].Floating = Working[Index].Mass.toFloat(); + + for (LoopData &Loop : Loops) + unwrapLoop(*this, Loop); +} + +void BlockFrequencyInfoImplBase::finalizeMetrics() { + // Unwrap loop packages in reverse post-order, tracking min and max + // frequencies. + auto Min = Float::getLargest(); + auto Max = Float::getZero(); + for (size_t Index = 0; Index < Working.size(); ++Index) { + // Update min/max scale. + Min = std::min(Min, Freqs[Index].Floating); + Max = std::max(Max, Freqs[Index].Floating); + } + + // Convert to integers. + convertFloatingToInteger(*this, Min, Max); + + // Clean up data structures. + cleanup(*this); + + // Print out the final stats. + DEBUG(dump()); +} + +BlockFrequency +BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const { + if (!Node.isValid()) + return 0; + return Freqs[Node.Index].Integer; +} +Float +BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const { + if (!Node.isValid()) + return Float::getZero(); + return Freqs[Node.Index].Floating; +} + +std::string +BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const { + return std::string(); +} +std::string +BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const { + return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*"); +} + +raw_ostream & +BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS, + const BlockNode &Node) const { + return OS << getFloatingBlockFreq(Node); +} + +raw_ostream & +BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS, + const BlockFrequency &Freq) const { + Float Block(Freq.getFrequency(), 0); + Float Entry(getEntryFreq(), 0); + + return OS << Block / Entry; +} + +void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) { + Start = OuterLoop.getHeader(); + Nodes.reserve(OuterLoop.Nodes.size()); + for (auto N : OuterLoop.Nodes) + addNode(N); + indexNodes(); +} +void IrreducibleGraph::addNodesInFunction() { + Start = 0; + for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index) + if (!BFI.Working[Index].isPackaged()) + addNode(Index); + indexNodes(); +} +void IrreducibleGraph::indexNodes() { + for (auto &I : Nodes) + Lookup[I.Node.Index] = &I; +} +void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ, + const BFIBase::LoopData *OuterLoop) { + if (OuterLoop && OuterLoop->isHeader(Succ)) + return; + auto L = Lookup.find(Succ.Index); + if (L == Lookup.end()) + return; + IrrNode &SuccIrr = *L->second; + Irr.Edges.push_back(&SuccIrr); + SuccIrr.Edges.push_front(&Irr); + ++SuccIrr.NumIn; +} + +namespace llvm { +template <> struct GraphTraits<IrreducibleGraph> { + typedef bfi_detail::IrreducibleGraph GraphT; + + typedef const GraphT::IrrNode NodeType; + typedef GraphT::IrrNode::iterator ChildIteratorType; + + static const NodeType *getEntryNode(const GraphT &G) { + return G.StartIrr; + } + static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); } + static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); } +}; +} + +/// \brief Find extra irreducible headers. +/// +/// Find entry blocks and other blocks with backedges, which exist when \c G +/// contains irreducible sub-SCCs. +static void findIrreducibleHeaders( + const BlockFrequencyInfoImplBase &BFI, + const IrreducibleGraph &G, + const std::vector<const IrreducibleGraph::IrrNode *> &SCC, + LoopData::NodeList &Headers, LoopData::NodeList &Others) { + // Map from nodes in the SCC to whether it's an entry block. + SmallDenseMap<const IrreducibleGraph::IrrNode *, bool, 8> InSCC; + + // InSCC also acts the set of nodes in the graph. Seed it. + for (const auto *I : SCC) + InSCC[I] = false; + + for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) { + auto &Irr = *I->first; + for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) { + if (InSCC.count(P)) + continue; + + // This is an entry block. + I->second = true; + Headers.push_back(Irr.Node); + DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node) << "\n"); + break; + } + } + assert(Headers.size() >= 2 && "Should be irreducible"); + if (Headers.size() == InSCC.size()) { + // Every block is a header. + std::sort(Headers.begin(), Headers.end()); + return; + } + + // Look for extra headers from irreducible sub-SCCs. + for (const auto &I : InSCC) { + // Entry blocks are already headers. + if (I.second) + continue; + + auto &Irr = *I.first; + for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) { + // Skip forward edges. + if (P->Node < Irr.Node) + continue; + + // Skip predecessors from entry blocks. These can have inverted + // ordering. + if (InSCC.lookup(P)) + continue; + + // Store the extra header. + Headers.push_back(Irr.Node); + DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node) << "\n"); + break; + } + if (Headers.back() == Irr.Node) + // Added this as a header. + continue; + + // This is not a header. + Others.push_back(Irr.Node); + DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n"); + } + std::sort(Headers.begin(), Headers.end()); + std::sort(Others.begin(), Others.end()); +} + +static void createIrreducibleLoop( + BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, + LoopData *OuterLoop, std::list<LoopData>::iterator Insert, + const std::vector<const IrreducibleGraph::IrrNode *> &SCC) { + // Translate the SCC into RPO. + DEBUG(dbgs() << " - found-scc\n"); + + LoopData::NodeList Headers; + LoopData::NodeList Others; + findIrreducibleHeaders(BFI, G, SCC, Headers, Others); + + auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(), + Headers.end(), Others.begin(), Others.end()); + + // Update loop hierarchy. + for (const auto &N : Loop->Nodes) + if (BFI.Working[N.Index].isLoopHeader()) + BFI.Working[N.Index].Loop->Parent = &*Loop; + else + BFI.Working[N.Index].Loop = &*Loop; +} + +iterator_range<std::list<LoopData>::iterator> +BlockFrequencyInfoImplBase::analyzeIrreducible( + const IrreducibleGraph &G, LoopData *OuterLoop, + std::list<LoopData>::iterator Insert) { + assert((OuterLoop == nullptr) == (Insert == Loops.begin())); + auto Prev = OuterLoop ? std::prev(Insert) : Loops.end(); + + for (auto I = scc_begin(G); !I.isAtEnd(); ++I) { + if (I->size() < 2) + continue; + + // Translate the SCC into RPO. + createIrreducibleLoop(*this, G, OuterLoop, Insert, *I); + } + + if (OuterLoop) + return make_range(std::next(Prev), Insert); + return make_range(Loops.begin(), Insert); +} + +void +BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) { + OuterLoop.Exits.clear(); + OuterLoop.BackedgeMass = BlockMass::getEmpty(); + auto O = OuterLoop.Nodes.begin() + 1; + for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I) + if (!Working[I->Index].isPackaged()) + *O++ = *I; + OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end()); +} diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index b901c54..bbd8750 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "branch-prob" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/LoopInfo.h" @@ -25,6 +24,8 @@ using namespace llvm; +#define DEBUG_TYPE "branch-prob" + INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob", "Branch Probability Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfo) @@ -322,6 +323,9 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { InEdges.push_back(I.getSuccessorIndex()); } + if (BackEdges.empty() && ExitingEdges.empty()) + return false; + if (uint32_t numBackEdges = BackEdges.size()) { uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges; if (backWeight < NORMAL_WEIGHT) @@ -557,7 +561,7 @@ isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const { BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { uint32_t Sum = 0; uint32_t MaxWeight = 0; - BasicBlock *MaxSucc = 0; + BasicBlock *MaxSucc = nullptr; for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { BasicBlock *Succ = *I; @@ -577,7 +581,7 @@ BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { if (BranchProbability(MaxWeight, Sum) > BranchProbability(4, 5)) return MaxSucc; - return 0; + return nullptr; } /// Get the raw edge weight for the edge. If can't find it, return @@ -594,11 +598,9 @@ getEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors) const { return DEFAULT_WEIGHT; } -uint32_t -BranchProbabilityInfo:: -getEdgeWeight(const BasicBlock *Src, succ_const_iterator Dst) const { - size_t index = std::distance(succ_begin(Src), Dst); - return getEdgeWeight(Src, index); +uint32_t BranchProbabilityInfo::getEdgeWeight(const BasicBlock *Src, + succ_const_iterator Dst) const { + return getEdgeWeight(Src, Dst.getSuccessorIndex()); } /// Get the raw edge weight calculated for the block pair. This returns the sum diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp index 6963760..8ef5302 100644 --- a/lib/Analysis/CFG.cpp +++ b/lib/Analysis/CFG.cpp @@ -123,7 +123,7 @@ static bool loopContainsBoth(const LoopInfo *LI, const BasicBlock *BB1, const BasicBlock *BB2) { const Loop *L1 = getOutermostLoop(LI, BB1); const Loop *L2 = getOutermostLoop(LI, BB2); - return L1 != NULL && L1 == L2; + return L1 != nullptr && L1 == L2; } static bool isPotentiallyReachableInner(SmallVectorImpl<BasicBlock *> &Worklist, @@ -133,7 +133,7 @@ static bool isPotentiallyReachableInner(SmallVectorImpl<BasicBlock *> &Worklist, // When the stop block is unreachable, it's dominated from everywhere, // regardless of whether there's a path between the two blocks. if (DT && !DT->isReachableFromEntry(StopBB)) - DT = 0; + DT = nullptr; // Limit the number of blocks we visit. The goal is to avoid run-away compile // times on large CFGs without hampering sensible code. Arbitrarily chosen. @@ -156,7 +156,7 @@ static bool isPotentiallyReachableInner(SmallVectorImpl<BasicBlock *> &Worklist, return true; } - if (const Loop *Outer = LI ? getOutermostLoop(LI, BB) : 0) { + if (const Loop *Outer = LI ? getOutermostLoop(LI, BB) : nullptr) { // All blocks in a single loop are reachable from all other blocks. From // any of these blocks, we can skip directly to the exits of the loop, // ignoring any other blocks inside the loop body. @@ -200,7 +200,7 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B, // If the block is in a loop then we can reach any instruction in the block // from any other instruction in the block by going around a backedge. - if (LI && LI->getLoopFor(BB) != 0) + if (LI && LI->getLoopFor(BB) != nullptr) return true; // Linear scan, start at 'A', see whether we hit 'B' or the end first. diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp index 537d6d1..c2c19d6 100644 --- a/lib/Analysis/CFGPrinter.cpp +++ b/lib/Analysis/CFGPrinter.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/CFGPrinter.h" #include "llvm/Pass.h" +#include "llvm/Support/FileSystem.h" using namespace llvm; namespace { @@ -33,7 +34,7 @@ namespace { return false; } - void print(raw_ostream &OS, const Module* = 0) const override {} + void print(raw_ostream &OS, const Module* = nullptr) const override {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); @@ -56,7 +57,7 @@ namespace { return false; } - void print(raw_ostream &OS, const Module* = 0) const override {} + void print(raw_ostream &OS, const Module* = nullptr) const override {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); @@ -90,7 +91,7 @@ namespace { return false; } - void print(raw_ostream &OS, const Module* = 0) const override {} + void print(raw_ostream &OS, const Module* = nullptr) const override {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); @@ -123,7 +124,7 @@ namespace { errs() << "\n"; return false; } - void print(raw_ostream &OS, const Module* = 0) const override {} + void print(raw_ostream &OS, const Module* = nullptr) const override {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); @@ -147,8 +148,8 @@ void Function::viewCFG() const { /// viewCFGOnly - This function is meant for use from the debugger. It works /// just like viewCFG, but it does not include the contents of basic blocks -/// into the nodes, just the label. If you are only interested in the CFG t -/// his can make the graph smaller. +/// into the nodes, just the label. If you are only interested in the CFG +/// this can make the graph smaller. /// void Function::viewCFGOnly() const { ViewGraph(this, "cfg" + getName(), true); diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp new file mode 100644 index 0000000..5d1d8a9 --- /dev/null +++ b/lib/Analysis/CGSCCPassManager.cpp @@ -0,0 +1,167 @@ +//===- CGSCCPassManager.cpp - Managing & running CGSCC passes -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +static cl::opt<bool> +DebugPM("debug-cgscc-pass-manager", cl::Hidden, + cl::desc("Print CGSCC pass management debugging information")); + +PreservedAnalyses CGSCCPassManager::run(LazyCallGraph::SCC *C, + CGSCCAnalysisManager *AM) { + PreservedAnalyses PA = PreservedAnalyses::all(); + + if (DebugPM) + dbgs() << "Starting CGSCC pass manager run.\n"; + + for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) { + if (DebugPM) + dbgs() << "Running CGSCC pass: " << Passes[Idx]->name() << "\n"; + + PreservedAnalyses PassPA = Passes[Idx]->run(C, AM); + if (AM) + AM->invalidate(C, PassPA); + PA.intersect(std::move(PassPA)); + } + + if (DebugPM) + dbgs() << "Finished CGSCC pass manager run.\n"; + + return PA; +} + +bool CGSCCAnalysisManager::empty() const { + assert(CGSCCAnalysisResults.empty() == CGSCCAnalysisResultLists.empty() && + "The storage and index of analysis results disagree on how many there " + "are!"); + return CGSCCAnalysisResults.empty(); +} + +void CGSCCAnalysisManager::clear() { + CGSCCAnalysisResults.clear(); + CGSCCAnalysisResultLists.clear(); +} + +CGSCCAnalysisManager::ResultConceptT & +CGSCCAnalysisManager::getResultImpl(void *PassID, LazyCallGraph::SCC *C) { + CGSCCAnalysisResultMapT::iterator RI; + bool Inserted; + std::tie(RI, Inserted) = CGSCCAnalysisResults.insert(std::make_pair( + std::make_pair(PassID, C), CGSCCAnalysisResultListT::iterator())); + + // If we don't have a cached result for this function, look up the pass and + // run it to produce a result, which we then add to the cache. + if (Inserted) { + CGSCCAnalysisResultListT &ResultList = CGSCCAnalysisResultLists[C]; + ResultList.emplace_back(PassID, lookupPass(PassID).run(C, this)); + RI->second = std::prev(ResultList.end()); + } + + return *RI->second->second; +} + +CGSCCAnalysisManager::ResultConceptT * +CGSCCAnalysisManager::getCachedResultImpl(void *PassID, + LazyCallGraph::SCC *C) const { + CGSCCAnalysisResultMapT::const_iterator RI = + CGSCCAnalysisResults.find(std::make_pair(PassID, C)); + return RI == CGSCCAnalysisResults.end() ? nullptr : &*RI->second->second; +} + +void CGSCCAnalysisManager::invalidateImpl(void *PassID, LazyCallGraph::SCC *C) { + CGSCCAnalysisResultMapT::iterator RI = + CGSCCAnalysisResults.find(std::make_pair(PassID, C)); + if (RI == CGSCCAnalysisResults.end()) + return; + + CGSCCAnalysisResultLists[C].erase(RI->second); +} + +void CGSCCAnalysisManager::invalidateImpl(LazyCallGraph::SCC *C, + const PreservedAnalyses &PA) { + // Clear all the invalidated results associated specifically with this + // function. + SmallVector<void *, 8> InvalidatedPassIDs; + CGSCCAnalysisResultListT &ResultsList = CGSCCAnalysisResultLists[C]; + for (CGSCCAnalysisResultListT::iterator I = ResultsList.begin(), + E = ResultsList.end(); + I != E;) + if (I->second->invalidate(C, PA)) { + InvalidatedPassIDs.push_back(I->first); + I = ResultsList.erase(I); + } else { + ++I; + } + while (!InvalidatedPassIDs.empty()) + CGSCCAnalysisResults.erase( + std::make_pair(InvalidatedPassIDs.pop_back_val(), C)); + CGSCCAnalysisResultLists.erase(C); +} + +char CGSCCAnalysisManagerModuleProxy::PassID; + +CGSCCAnalysisManagerModuleProxy::Result +CGSCCAnalysisManagerModuleProxy::run(Module *M) { + assert(CGAM->empty() && "CGSCC analyses ran prior to the module proxy!"); + return Result(*CGAM); +} + +CGSCCAnalysisManagerModuleProxy::Result::~Result() { + // Clear out the analysis manager if we're being destroyed -- it means we + // didn't even see an invalidate call when we got invalidated. + CGAM->clear(); +} + +bool CGSCCAnalysisManagerModuleProxy::Result::invalidate( + Module *M, const PreservedAnalyses &PA) { + // If this proxy isn't marked as preserved, then we can't even invalidate + // individual CGSCC analyses, there may be an invalid set of SCC objects in + // the cache making it impossible to incrementally preserve them. + // Just clear the entire manager. + if (!PA.preserved(ID())) + CGAM->clear(); + + // Return false to indicate that this result is still a valid proxy. + return false; +} + +char ModuleAnalysisManagerCGSCCProxy::PassID; + +char FunctionAnalysisManagerCGSCCProxy::PassID; + +FunctionAnalysisManagerCGSCCProxy::Result +FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC *C) { + assert(FAM->empty() && "Function analyses ran prior to the CGSCC proxy!"); + return Result(*FAM); +} + +FunctionAnalysisManagerCGSCCProxy::Result::~Result() { + // Clear out the analysis manager if we're being destroyed -- it means we + // didn't even see an invalidate call when we got invalidated. + FAM->clear(); +} + +bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate( + LazyCallGraph::SCC *C, const PreservedAnalyses &PA) { + // If this proxy isn't marked as preserved, then we can't even invalidate + // individual function analyses, there may be an invalid set of Function + // objects in the cache making it impossible to incrementally preserve them. + // Just clear the entire manager. + if (!PA.preserved(ID())) + FAM->clear(); + + // Return false to indicate that this result is still a valid proxy. + return false; +} + +char CGSCCAnalysisManagerFunctionProxy::PassID; diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index c6d4573..b546789 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -7,9 +7,11 @@ add_llvm_library(LLVMAnalysis Analysis.cpp BasicAliasAnalysis.cpp BlockFrequencyInfo.cpp + BlockFrequencyInfoImpl.cpp BranchProbabilityInfo.cpp CFG.cpp CFGPrinter.cpp + CGSCCPassManager.cpp CaptureTracking.cpp CostModel.cpp CodeMetrics.cpp diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 782acfa..0ac1cb5 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -56,7 +56,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // Handle a vector->integer cast. if (IntegerType *IT = dyn_cast<IntegerType>(DestTy)) { VectorType *VTy = dyn_cast<VectorType>(C->getType()); - if (VTy == 0) + if (!VTy) return ConstantExpr::getBitCast(C, DestTy); unsigned NumSrcElts = VTy->getNumElements(); @@ -73,7 +73,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, } ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C); - if (CDV == 0) + if (!CDV) return ConstantExpr::getBitCast(C, DestTy); // Now that we know that the input value is a vector of integers, just shift @@ -93,7 +93,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // The code below only handles casts to vectors currently. VectorType *DestVTy = dyn_cast<VectorType>(DestTy); - if (DestVTy == 0) + if (!DestVTy) return ConstantExpr::getBitCast(C, DestTy); // If this is a scalar -> vector cast, convert the input into a <1 x scalar> @@ -411,32 +411,32 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, TD.getTypeAllocSizeInBits(LoadTy), AS); } else - return 0; + return nullptr; C = FoldBitCast(C, MapTy, TD); if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, TD)) return FoldBitCast(Res, LoadTy, TD); - return 0; + return nullptr; } unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8; if (BytesLoaded > 32 || BytesLoaded == 0) - return 0; + return nullptr; GlobalValue *GVal; APInt Offset; if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD)) - return 0; + return nullptr; GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal); if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() || !GV->getInitializer()->getType()->isSized()) - return 0; + return nullptr; // If we're loading off the beginning of the global, some bytes may be valid, // but we don't try to handle this. if (Offset.isNegative()) - return 0; + return nullptr; // If we're not accessing anything in this constant, the result is undefined. if (Offset.getZExtValue() >= @@ -446,7 +446,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, unsigned char RawBytes[32] = {0}; if (!ReadDataFromGlobal(GV->getInitializer(), Offset.getZExtValue(), RawBytes, BytesLoaded, TD)) - return 0; + return nullptr; APInt ResultVal = APInt(IntType->getBitWidth(), 0); if (TD.isLittleEndian()) { @@ -466,6 +466,52 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, return ConstantInt::get(IntType->getContext(), ResultVal); } +static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE, + const DataLayout *DL) { + if (!DL) + return nullptr; + auto *DestPtrTy = dyn_cast<PointerType>(CE->getType()); + if (!DestPtrTy) + return nullptr; + Type *DestTy = DestPtrTy->getElementType(); + + Constant *C = ConstantFoldLoadFromConstPtr(CE->getOperand(0), DL); + if (!C) + return nullptr; + + do { + Type *SrcTy = C->getType(); + + // If the type sizes are the same and a cast is legal, just directly + // cast the constant. + if (DL->getTypeSizeInBits(DestTy) == DL->getTypeSizeInBits(SrcTy)) { + Instruction::CastOps Cast = Instruction::BitCast; + // If we are going from a pointer to int or vice versa, we spell the cast + // differently. + if (SrcTy->isIntegerTy() && DestTy->isPointerTy()) + Cast = Instruction::IntToPtr; + else if (SrcTy->isPointerTy() && DestTy->isIntegerTy()) + Cast = Instruction::PtrToInt; + + if (CastInst::castIsValid(Cast, C, DestTy)) + return ConstantExpr::getCast(Cast, C, DestTy); + } + + // If this isn't an aggregate type, there is nothing we can do to drill down + // and find a bitcastable constant. + if (!SrcTy->isAggregateType()) + return nullptr; + + // We're simulating a load through a pointer that was bitcast to point to + // a different type, so we can try to walk down through the initial + // elements of an aggregate to see if some part of th e aggregate is + // castable to implement the "load" semantic model. + C = C->getAggregateElement(0u); + } while (C); + + return nullptr; +} + /// ConstantFoldLoadFromConstPtr - Return the value that a load from C would /// produce if it is constant and determinable. If this is not determinable, /// return null. @@ -479,7 +525,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, // If the loaded value isn't a constant expr, we can't handle it. ConstantExpr *CE = dyn_cast<ConstantExpr>(C); if (!CE) - return 0; + return nullptr; if (CE->getOpcode() == Instruction::GetElementPtr) { if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) { @@ -491,6 +537,10 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, } } + if (CE->getOpcode() == Instruction::BitCast) + if (Constant *LoadedC = ConstantFoldLoadThroughBitcast(CE, TD)) + return LoadedC; + // Instead of loading constant c string, use corresponding integer value // directly if string length is small enough. StringRef Str; @@ -542,16 +592,16 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, // Try hard to fold loads from bitcasted strange and non-type-safe things. if (TD) return FoldReinterpretLoadFromConstPtr(CE, *TD); - return 0; + return nullptr; } static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){ - if (LI->isVolatile()) return 0; + if (LI->isVolatile()) return nullptr; if (Constant *C = dyn_cast<Constant>(LI->getOperand(0))) return ConstantFoldLoadFromConstPtr(C, TD); - return 0; + return nullptr; } /// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression. @@ -571,8 +621,8 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()->getScalarType()); APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0); APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0); - ComputeMaskedBits(Op0, KnownZero0, KnownOne0, DL); - ComputeMaskedBits(Op1, KnownZero1, KnownOne1, DL); + computeKnownBits(Op0, KnownZero0, KnownOne0, DL); + computeKnownBits(Op1, KnownZero1, KnownOne1, DL); if ((KnownOne1 | KnownZero0).isAllOnesValue()) { // All the bits of Op0 that the 'and' could be masking are already zero. return Op0; @@ -608,7 +658,7 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, } } - return 0; + return nullptr; } /// CastGEPIndices - If array indices are not pointer-sized integers, @@ -618,7 +668,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TD) - return 0; + return nullptr; Type *IntPtrTy = TD->getIntPtrType(ResultTy); @@ -641,7 +691,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, } if (!Any) - return 0; + return nullptr; Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { @@ -676,7 +726,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, Constant *Ptr = Ops[0]; if (!TD || !Ptr->getType()->getPointerElementType()->isSized() || !Ptr->getType()->isPointerTy()) - return 0; + return nullptr; Type *IntPtrTy = TD->getIntPtrType(Ptr->getType()); Type *ResultElementTy = ResultTy->getPointerElementType(); @@ -690,7 +740,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, // "inttoptr (sub (ptrtoint Ptr), V)" if (Ops.size() == 2 && ResultElementTy->isIntegerTy(8)) { ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]); - assert((CE == 0 || CE->getType() == IntPtrTy) && + assert((!CE || CE->getType() == IntPtrTy) && "CastGEPIndices didn't canonicalize index types!"); if (CE && CE->getOpcode() == Instruction::Sub && CE->getOperand(0)->isNullValue()) { @@ -702,7 +752,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, return Res; } } - return 0; + return nullptr; } unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy); @@ -765,7 +815,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, // Only handle pointers to sized types, not pointers to functions. if (!ATy->getElementType()->isSized()) - return 0; + return nullptr; } // Determine which element of the array the offset points into. @@ -810,7 +860,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, // type, then the offset is pointing into the middle of an indivisible // member, so we can't simplify it. if (Offset != 0) - return 0; + return nullptr; // Create a GEP. Constant *C = ConstantExpr::getGetElementPtr(Ptr, NewIdxs); @@ -841,7 +891,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetLibraryInfo *TLI) { // Handle PHI nodes quickly here... if (PHINode *PN = dyn_cast<PHINode>(I)) { - Constant *CommonValue = 0; + Constant *CommonValue = nullptr; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *Incoming = PN->getIncomingValue(i); @@ -854,14 +904,14 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, // If the incoming value is not a constant, then give up. Constant *C = dyn_cast<Constant>(Incoming); if (!C) - return 0; + return nullptr; // Fold the PHI's operands. if (ConstantExpr *NewC = dyn_cast<ConstantExpr>(C)) C = ConstantFoldConstantExpression(NewC, TD, TLI); // If the incoming value is a different constant to // the one we saw previously, then give up. if (CommonValue && C != CommonValue) - return 0; + return nullptr; CommonValue = C; } @@ -876,7 +926,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) { Constant *Op = dyn_cast<Constant>(*i); if (!Op) - return 0; // All operands not constant! + return nullptr; // All operands not constant! // Fold the Instruction's operands. if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(Op)) @@ -966,14 +1016,14 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, } switch (Opcode) { - default: return 0; + default: return nullptr; case Instruction::ICmp: case Instruction::FCmp: llvm_unreachable("Invalid for compares"); case Instruction::Call: if (Function *F = dyn_cast<Function>(Ops.back())) if (canConstantFoldCallTo(F)) return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1), TLI); - return 0; + return nullptr; case Instruction::PtrToInt: // If the input is a inttoptr, eliminate the pair. This requires knowing // the width of a pointer, so it can't be done in ConstantExpr::getCast. @@ -1142,14 +1192,14 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE) { if (!CE->getOperand(1)->isNullValue()) - return 0; // Do not allow stepping over the value! + return nullptr; // Do not allow stepping over the value! // Loop over all of the operands, tracking down which value we are // addressing. for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) { C = C->getAggregateElement(CE->getOperand(i)); - if (C == 0) - return 0; + if (!C) + return nullptr; } return C; } @@ -1164,8 +1214,8 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, // addressing. for (unsigned i = 0, e = Indices.size(); i != e; ++i) { C = C->getAggregateElement(Indices[i]); - if (C == 0) - return 0; + if (!C) + return nullptr; } return C; } @@ -1270,7 +1320,7 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, V = NativeFP(V); if (sys::llvm_fenv_testexcept()) { sys::llvm_fenv_clearexcept(); - return 0; + return nullptr; } return GetConstantFoldFPValue(V, Ty); @@ -1282,7 +1332,7 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), V = NativeFP(V, W); if (sys::llvm_fenv_testexcept()) { sys::llvm_fenv_clearexcept(); - return 0; + return nullptr; } return GetConstantFoldFPValue(V, Ty); @@ -1311,7 +1361,7 @@ static Constant *ConstantFoldConvertToInt(const APFloat &Val, /*isSigned=*/true, mode, &isExact); if (status != APFloat::opOK && status != APFloat::opInexact) - return 0; + return nullptr; return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true); } @@ -1345,7 +1395,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, } if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) - return 0; + return nullptr; if (IntrinsicID == Intrinsic::round) { APFloat V = Op->getValueAPF(); @@ -1357,7 +1407,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, /// likely to be aborted with an exception anyway, and some host libms /// have known errors raising exceptions. if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity()) - return 0; + return nullptr; /// Currently APFloat versions of these functions do not exist, so we use /// the host native double versions. Float versions are not called @@ -1396,7 +1446,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, } if (!TLI) - return 0; + return nullptr; switch (Name[0]) { case 'a': @@ -1467,7 +1517,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, default: break; } - return 0; + return nullptr; } if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) { @@ -1491,7 +1541,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, return ConstantFP::get(Ty->getContext(), Val); } default: - return 0; + return nullptr; } } @@ -1523,21 +1573,21 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, if (isa<UndefValue>(Operands[0])) { if (IntrinsicID == Intrinsic::bswap) return Operands[0]; - return 0; + return nullptr; } - return 0; + return nullptr; } if (Operands.size() == 2) { if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) { if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) - return 0; + return nullptr; double Op1V = getValueAsDouble(Op1); if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) { if (Op2->getType() != Op1->getType()) - return 0; + return nullptr; double Op2V = getValueAsDouble(Op2); if (IntrinsicID == Intrinsic::pow) { @@ -1550,7 +1600,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, return ConstantFP::get(Ty->getContext(), V1); } if (!TLI) - return 0; + return nullptr; if (Name == "pow" && TLI->has(LibFunc::pow)) return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); if (Name == "fmod" && TLI->has(LibFunc::fmod)) @@ -1571,7 +1621,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, APFloat((double)std::pow((double)Op1V, (int)Op2C->getZExtValue()))); } - return 0; + return nullptr; } if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) { @@ -1624,13 +1674,13 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, } } - return 0; + return nullptr; } - return 0; + return nullptr; } if (Operands.size() != 3) - return 0; + return nullptr; if (const ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) { if (const ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) { @@ -1646,14 +1696,14 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, if (s != APFloat::opInvalidOp) return ConstantFP::get(Ty->getContext(), V); - return 0; + return nullptr; } } } } } - return 0; + return nullptr; } static Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID, @@ -1690,7 +1740,7 @@ Constant * llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, const TargetLibraryInfo *TLI) { if (!F->hasName()) - return 0; + return nullptr; StringRef Name = F->getName(); Type *Ty = F->getReturnType(); diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp index b49211d..780b1aa 100644 --- a/lib/Analysis/CostModel.cpp +++ b/lib/Analysis/CostModel.cpp @@ -17,8 +17,6 @@ // //===----------------------------------------------------------------------===// -#define CM_NAME "cost-model" -#define DEBUG_TYPE CM_NAME #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -32,6 +30,9 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define CM_NAME "cost-model" +#define DEBUG_TYPE CM_NAME + static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::Hidden, cl::desc("Recognize reduction patterns.")); @@ -41,7 +42,7 @@ namespace { public: static char ID; // Class identification, replacement for typeinfo - CostModelAnalysis() : FunctionPass(ID), F(0), TTI(0) { + CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) { initializeCostModelAnalysisPass( *PassRegistry::getPassRegistry()); } @@ -101,24 +102,13 @@ static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { // Check for a splat of a constant or for a non uniform vector of constants. if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) { OpInfo = TargetTransformInfo::OK_NonUniformConstantValue; - if (cast<Constant>(V)->getSplatValue() != NULL) + if (cast<Constant>(V)->getSplatValue() != nullptr) OpInfo = TargetTransformInfo::OK_UniformConstantValue; } return OpInfo; } -static bool matchMask(SmallVectorImpl<int> &M1, SmallVectorImpl<int> &M2) { - if (M1.size() != M2.size()) - return false; - - for (unsigned i = 0, e = M1.size(); i != e; ++i) - if (M1[i] != M2[i]) - return false; - - return true; -} - static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, unsigned Level) { // We don't need a shuffle if we just want to have element 0 in position 0 of @@ -136,7 +126,7 @@ static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, Mask[i] = val; SmallVector<int, 16> ActualMask = SI->getShuffleMask(); - if (!matchMask(Mask, ActualMask)) + if (Mask != ActualMask) return false; return true; @@ -150,7 +140,7 @@ static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 - if (BinOp == 0) + if (BinOp == nullptr) return false; assert(BinOp->getType()->isVectorTy() && "Expecting a vector type"); @@ -171,9 +161,9 @@ static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, return false; // Shuffle inputs must match. - Value *NextLevelOpL = LS ? LS->getOperand(0) : 0; - Value *NextLevelOpR = RS ? RS->getOperand(0) : 0; - Value *NextLevelOp = 0; + Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr; + Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr; + Value *NextLevelOp = nullptr; if (NextLevelOpR && NextLevelOpL) { // If we have two shuffles their operands must match. if (NextLevelOpL != NextLevelOpR) @@ -198,7 +188,7 @@ static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, // Check that the next levels binary operation exists and matches with the // current one. - BinaryOperator *NextLevelBinOp = 0; + BinaryOperator *NextLevelBinOp = nullptr; if (Level + 1 != NumLevels) { if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp))) return false; @@ -277,7 +267,7 @@ getShuffleAndOtherOprd(BinaryOperator *B) { Value *L = B->getOperand(0); Value *R = B->getOperand(1); - ShuffleVectorInst *S = 0; + ShuffleVectorInst *S = nullptr; if ((S = dyn_cast<ShuffleVectorInst>(L))) return std::make_pair(R, S); @@ -337,7 +327,7 @@ static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp); // Check the current reduction operation and the shuffle use the same value. - if (Shuffle == 0) + if (Shuffle == nullptr) return false; if (Shuffle->getOperand(0) != NextRdxOp) return false; @@ -349,7 +339,7 @@ static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); - if (!matchMask(ShuffleMask, Mask)) + if (ShuffleMask != Mask) return false; RdxOp = NextRdxOp; @@ -478,7 +468,7 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { if (NumVecElems == Mask.size() && isReverseVectorMask(Mask)) return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0, - 0); + nullptr); return -1; } case Instruction::Call: diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp index fd4a2f0..9334ceb 100644 --- a/lib/Analysis/Delinearization.cpp +++ b/lib/Analysis/Delinearization.cpp @@ -14,8 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DL_NAME "delinearize" -#define DEBUG_TYPE DL_NAME #include "llvm/IR/Constants.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" @@ -34,6 +32,9 @@ using namespace llvm; +#define DL_NAME "delinearize" +#define DEBUG_TYPE DL_NAME + namespace { class Delinearization : public FunctionPass { @@ -51,7 +52,7 @@ public: } bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override; - void print(raw_ostream &O, const Module *M = 0) const override; + void print(raw_ostream &O, const Module *M = nullptr) const override; }; } // end anonymous namespace @@ -76,7 +77,7 @@ static Value *getPointerOperand(Instruction &Inst) { return Store->getPointerOperand(); else if (GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(&Inst)) return Gep->getPointerOperand(); - return NULL; + return nullptr; } void Delinearization::print(raw_ostream &O, const Module *) const { @@ -92,25 +93,38 @@ void Delinearization::print(raw_ostream &O, const Module *) const { const BasicBlock *BB = Inst->getParent(); // Delinearize the memory access as analyzed in all the surrounding loops. // Do not analyze memory accesses outside loops. - for (Loop *L = LI->getLoopFor(BB); L != NULL; L = L->getParentLoop()) { + for (Loop *L = LI->getLoopFor(BB); L != nullptr; L = L->getParentLoop()) { const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(*Inst), L); + + const SCEVUnknown *BasePointer = + dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFn)); + // Do not delinearize if we cannot find the base pointer. + if (!BasePointer) + break; + AccessFn = SE->getMinusSCEV(AccessFn, BasePointer); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(AccessFn); // Do not try to delinearize memory accesses that are not AddRecs. if (!AR) break; + + O << "\n"; + O << "Inst:" << *Inst << "\n"; + O << "In Loop with Header: " << L->getHeader()->getName() << "\n"; O << "AddRec: " << *AR << "\n"; SmallVector<const SCEV *, 3> Subscripts, Sizes; - const SCEV *Res = AR->delinearize(*SE, Subscripts, Sizes); - int Size = Subscripts.size(); - if (Res == AR || Size == 0) { + AR->delinearize(*SE, Subscripts, Sizes, SE->getElementSize(Inst)); + if (Subscripts.size() == 0 || Sizes.size() == 0 || + Subscripts.size() != Sizes.size()) { O << "failed to delinearize\n"; continue; } - O << "Base offset: " << *Res << "\n"; + + O << "Base offset: " << *BasePointer << "\n"; O << "ArrayDecl[UnknownSize]"; + int Size = Subscripts.size(); for (int i = 0; i < Size - 1; i++) O << "[" << *Sizes[i] << "]"; O << " with elements of " << *Sizes[Size - 1] << " bytes.\n"; diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index ff98611..d0784f1 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -51,8 +51,6 @@ // // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "da" - #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -69,6 +67,8 @@ using namespace llvm; +#define DEBUG_TYPE "da" + //===----------------------------------------------------------------------===// // statistics @@ -234,7 +234,7 @@ FullDependence::FullDependence(Instruction *Source, Levels(CommonLevels), LoopIndependent(PossiblyLoopIndependent) { Consistent = true; - DV = CommonLevels ? new DVEntry[CommonLevels] : NULL; + DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr; } // The rest are simple getters that hide the implementation. @@ -658,7 +658,7 @@ Value *getPointerOperand(Instruction *I) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand(); llvm_unreachable("Value is not load or store instruction"); - return 0; + return nullptr; } @@ -932,7 +932,7 @@ const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L, const SCEV *UB = SE->getBackedgeTakenCount(L); return SE->getNoopOrZeroExtend(UB, T); } - return NULL; + return nullptr; } @@ -943,7 +943,7 @@ const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L, ) const { if (const SCEV *UB = collectUpperBound(L, T)) return dyn_cast<SCEVConstant>(UB); - return NULL; + return nullptr; } @@ -2194,7 +2194,7 @@ const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op))) return Constant; } - return NULL; + return nullptr; } @@ -2646,8 +2646,8 @@ void DependenceAnalysis::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, BoundInfo *Bound, unsigned K) const { - Bound[K].Lower[Dependence::DVEntry::ALL] = NULL; // Default value = -infinity. - Bound[K].Upper[Dependence::DVEntry::ALL] = NULL; // Default value = +infinity. + Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. if (Bound[K].Iterations) { Bound[K].Lower[Dependence::DVEntry::ALL] = SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), @@ -2687,8 +2687,8 @@ void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B, BoundInfo *Bound, unsigned K) const { - Bound[K].Lower[Dependence::DVEntry::EQ] = NULL; // Default value = -infinity. - Bound[K].Upper[Dependence::DVEntry::EQ] = NULL; // Default value = +infinity. + Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. if (Bound[K].Iterations) { const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); const SCEV *NegativePart = getNegativePart(Delta); @@ -2729,8 +2729,8 @@ void DependenceAnalysis::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B, BoundInfo *Bound, unsigned K) const { - Bound[K].Lower[Dependence::DVEntry::LT] = NULL; // Default value = -infinity. - Bound[K].Upper[Dependence::DVEntry::LT] = NULL; // Default value = +infinity. + Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. if (Bound[K].Iterations) { const SCEV *Iter_1 = SE->getMinusSCEV(Bound[K].Iterations, @@ -2776,8 +2776,8 @@ void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, BoundInfo *Bound, unsigned K) const { - Bound[K].Lower[Dependence::DVEntry::GT] = NULL; // Default value = -infinity. - Bound[K].Upper[Dependence::DVEntry::GT] = NULL; // Default value = +infinity. + Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. if (Bound[K].Iterations) { const SCEV *Iter_1 = SE->getMinusSCEV(Bound[K].Iterations, @@ -2829,7 +2829,7 @@ DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript, CI[K].Coeff = Zero; CI[K].PosPart = Zero; CI[K].NegPart = Zero; - CI[K].Iterations = NULL; + CI[K].Iterations = nullptr; } while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { const Loop *L = AddRec->getLoop(); @@ -2872,7 +2872,7 @@ const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const { if (Bound[K].Lower[Bound[K].Direction]) Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); else - Sum = NULL; + Sum = nullptr; } return Sum; } @@ -2888,7 +2888,7 @@ const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const { if (Bound[K].Upper[Bound[K].Direction]) Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); else - Sum = NULL; + Sum = nullptr; } return Sum; } @@ -3148,12 +3148,12 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, } else if (CurConstraint.isLine()) { Level.Scalar = false; - Level.Distance = NULL; + Level.Distance = nullptr; // direction should be accurate } else if (CurConstraint.isPoint()) { Level.Scalar = false; - Level.Distance = NULL; + Level.Distance = nullptr; unsigned NewDirection = Dependence::DVEntry::NONE; if (!isKnownPredicate(CmpInst::ICMP_NE, CurConstraint.getY(), @@ -3180,59 +3180,55 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, /// source and destination array references are recurrences on a nested loop, /// this function flattens the nested recurrences into separate recurrences /// for each loop level. -bool -DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV, - SmallVectorImpl<Subscript> &Pair) const { +bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, + const SCEV *DstSCEV, + SmallVectorImpl<Subscript> &Pair, + const SCEV *ElementSize) const { + const SCEVUnknown *SrcBase = + dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcSCEV)); + const SCEVUnknown *DstBase = + dyn_cast<SCEVUnknown>(SE->getPointerBase(DstSCEV)); + + if (!SrcBase || !DstBase || SrcBase != DstBase) + return false; + + SrcSCEV = SE->getMinusSCEV(SrcSCEV, SrcBase); + DstSCEV = SE->getMinusSCEV(DstSCEV, DstBase); + const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) return false; - SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts, SrcSizes, DstSizes; - const SCEV *RemainderS = SrcAR->delinearize(*SE, SrcSubscripts, SrcSizes); - const SCEV *RemainderD = DstAR->delinearize(*SE, DstSubscripts, DstSizes); + // First step: collect parametric terms in both array references. + SmallVector<const SCEV *, 4> Terms; + SrcAR->collectParametricTerms(*SE, Terms); + DstAR->collectParametricTerms(*SE, Terms); - int size = SrcSubscripts.size(); - // Fail when there is only a subscript: that's a linearized access function. - if (size < 2) - return false; - - int dstSize = DstSubscripts.size(); - // Fail when the number of subscripts in Src and Dst differ. - if (size != dstSize) - return false; + // Second step: find subscript sizes. + SmallVector<const SCEV *, 4> Sizes; + SE->findArrayDimensions(Terms, Sizes, ElementSize); - // Fail when the size of any of the subscripts in Src and Dst differs: the - // dependence analysis assumes that elements in the same array have same size. - // SCEV delinearization does not have a context based on which it would decide - // globally the size of subscripts that would best fit all the array accesses. - for (int i = 0; i < size; ++i) - if (SrcSizes[i] != DstSizes[i]) - return false; + // Third step: compute the access functions for each subscript. + SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; + SrcAR->computeAccessFunctions(*SE, SrcSubscripts, Sizes); + DstAR->computeAccessFunctions(*SE, DstSubscripts, Sizes); - // When the difference in remainders is different than a constant it might be - // that the base address of the arrays is not the same. - const SCEV *DiffRemainders = SE->getMinusSCEV(RemainderS, RemainderD); - if (!isa<SCEVConstant>(DiffRemainders)) + // Fail when there is only a subscript: that's a linearized access function. + if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || + SrcSubscripts.size() != DstSubscripts.size()) return false; - // Normalize the last dimension: integrate the size of the "scalar dimension" - // and the remainder of the delinearization. - DstSubscripts[size-1] = SE->getMulExpr(DstSubscripts[size-1], - DstSizes[size-1]); - SrcSubscripts[size-1] = SE->getMulExpr(SrcSubscripts[size-1], - SrcSizes[size-1]); - SrcSubscripts[size-1] = SE->getAddExpr(SrcSubscripts[size-1], RemainderS); - DstSubscripts[size-1] = SE->getAddExpr(DstSubscripts[size-1], RemainderD); + int size = SrcSubscripts.size(); -#ifndef NDEBUG - DEBUG(errs() << "\nSrcSubscripts: "); - for (int i = 0; i < size; i++) - DEBUG(errs() << *SrcSubscripts[i]); - DEBUG(errs() << "\nDstSubscripts: "); - for (int i = 0; i < size; i++) - DEBUG(errs() << *DstSubscripts[i]); -#endif + DEBUG({ + dbgs() << "\nSrcSubscripts: "; + for (int i = 0; i < size; i++) + dbgs() << *SrcSubscripts[i]; + dbgs() << "\nDstSubscripts: "; + for (int i = 0; i < size; i++) + dbgs() << *DstSubscripts[i]; + }); // The delinearization transforms a single-subscript MIV dependence test into // a multi-subscript SIV dependence test that is easier to compute. So we @@ -3290,7 +3286,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) // if both instructions don't reference memory, there's no dependence - return NULL; + return nullptr; if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { // can only analyze simple loads and stores, i.e., no calls, invokes, etc. @@ -3310,7 +3306,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, case AliasAnalysis::NoAlias: // If the objects noalias, they are distinct, accesses are independent. DEBUG(dbgs() << "no alias\n"); - return NULL; + return nullptr; case AliasAnalysis::MustAlias: break; // The underlying objects alias; test accesses for dependence. } @@ -3363,7 +3359,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, } if (Delinearize && Pairs == 1 && CommonLevels > 1 && - tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) { + tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { DEBUG(dbgs() << " delinerized GEP\n"); Pairs = Pair.size(); } @@ -3505,26 +3501,26 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, case Subscript::ZIV: DEBUG(dbgs() << ", ZIV\n"); if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) - return NULL; + return nullptr; break; case Subscript::SIV: { DEBUG(dbgs() << ", SIV\n"); unsigned Level; - const SCEV *SplitIter = NULL; + const SCEV *SplitIter = nullptr; if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, SplitIter)) - return NULL; + return nullptr; break; } case Subscript::RDIV: DEBUG(dbgs() << ", RDIV\n"); if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) - return NULL; + return nullptr; break; case Subscript::MIV: DEBUG(dbgs() << ", MIV\n"); if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) - return NULL; + return nullptr; break; default: llvm_unreachable("subscript has unexpected classification"); @@ -3558,16 +3554,16 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); // SJ is an SIV subscript that's part of the current coupled group unsigned Level; - const SCEV *SplitIter = NULL; + const SCEV *SplitIter = nullptr; DEBUG(dbgs() << "SIV\n"); if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, SplitIter)) - return NULL; + return nullptr; ConstrainedLevels.set(Level); if (intersectConstraints(&Constraints[Level], &NewConstraint)) { if (Constraints[Level].isEmpty()) { ++DeltaIndependence; - return NULL; + return nullptr; } Changed = true; } @@ -3593,7 +3589,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, case Subscript::ZIV: DEBUG(dbgs() << "ZIV\n"); if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) - return NULL; + return nullptr; Mivs.reset(SJ); break; case Subscript::SIV: @@ -3616,7 +3612,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, if (Pair[SJ].Classification == Subscript::RDIV) { DEBUG(dbgs() << "RDIV test\n"); if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) - return NULL; + return nullptr; // I don't yet understand how to propagate RDIV results Mivs.reset(SJ); } @@ -3629,7 +3625,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, if (Pair[SJ].Classification == Subscript::MIV) { DEBUG(dbgs() << "MIV test\n"); if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) - return NULL; + return nullptr; } else llvm_unreachable("expected only MIV subscripts at this point"); @@ -3641,7 +3637,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) { updateDirection(Result.DV[SJ - 1], Constraints[SJ]); if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) - return NULL; + return nullptr; } } } @@ -3676,11 +3672,11 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, } } if (AllEqual) - return NULL; + return nullptr; } FullDependence *Final = new FullDependence(Result); - Result.DV = NULL; + Result.DV = nullptr; return Final; } @@ -3787,7 +3783,7 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, } if (Delinearize && Pairs == 1 && CommonLevels > 1 && - tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) { + tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { DEBUG(dbgs() << " delinerized GEP\n"); Pairs = Pair.size(); } @@ -3853,11 +3849,11 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, switch (Pair[SI].Classification) { case Subscript::SIV: { unsigned Level; - const SCEV *SplitIter = NULL; + const SCEV *SplitIter = nullptr; (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, SplitIter); if (Level == SplitLevel) { - assert(SplitIter != NULL); + assert(SplitIter != nullptr); return SplitIter; } break; @@ -3892,7 +3888,7 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { // SJ is an SIV subscript that's part of the current coupled group unsigned Level; - const SCEV *SplitIter = NULL; + const SCEV *SplitIter = nullptr; (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, SplitIter); if (Level == SplitLevel && SplitIter) @@ -3933,5 +3929,5 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, } } llvm_unreachable("somehow reached end of routine"); - return NULL; + return nullptr; } diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp index f0787f1..74594f8 100644 --- a/lib/Analysis/DominanceFrontier.cpp +++ b/lib/Analysis/DominanceFrontier.cpp @@ -40,12 +40,12 @@ const DominanceFrontier::DomSetType & DominanceFrontier::calculate(const DominatorTree &DT, const DomTreeNode *Node) { BasicBlock *BB = Node->getBlock(); - DomSetType *Result = NULL; + DomSetType *Result = nullptr; std::vector<DFCalculateWorkObject> workList; SmallPtrSet<BasicBlock *, 32> visited; - workList.push_back(DFCalculateWorkObject(BB, NULL, Node, NULL)); + workList.push_back(DFCalculateWorkObject(BB, nullptr, Node, nullptr)); do { DFCalculateWorkObject *currentW = &workList.back(); assert (currentW && "Missing work object."); diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index f43675b..caec253 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -21,14 +21,14 @@ using namespace llvm; // CallGraph::CallGraph(Module &M) - : M(M), Root(0), ExternalCallingNode(getOrInsertFunction(0)), - CallsExternalNode(new CallGraphNode(0)) { + : M(M), Root(nullptr), ExternalCallingNode(getOrInsertFunction(nullptr)), + CallsExternalNode(new CallGraphNode(nullptr)) { // Add every function to the call graph. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) addToCallGraph(I); // If we didn't find a main function, use the external call graph node - if (Root == 0) + if (!Root) Root = ExternalCallingNode; } @@ -210,7 +210,7 @@ void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) { for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { assert(I != CalledFunctions.end() && "Cannot find callee to remove!"); CallRecord &CR = *I; - if (CR.second == Callee && CR.first == 0) { + if (CR.second == Callee && CR.first == nullptr) { Callee->DropRef(); *I = CalledFunctions.back(); CalledFunctions.pop_back(); @@ -267,7 +267,7 @@ INITIALIZE_PASS(CallGraphWrapperPass, "basiccg", "CallGraph Construction", char CallGraphWrapperPass::ID = 0; -void CallGraphWrapperPass::releaseMemory() { G.reset(0); } +void CallGraphWrapperPass::releaseMemory() { G.reset(nullptr); } void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const { if (!G) { @@ -280,7 +280,7 @@ void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void CallGraphWrapperPass::dump() const { print(dbgs(), 0); } +void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); } #endif // Enuse that users of CallGraph.h also link with this file diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index aafc085..bfab744 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "cgscc-passmgr" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/Statistic.h" @@ -23,12 +22,15 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LegacyPassManagers.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "cgscc-passmgr" + static cl::opt<unsigned> MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4)); @@ -112,7 +114,7 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, bool Changed = false; PMDataManager *PM = P->getAsPMDataManager(); - if (PM == 0) { + if (!PM) { CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P; if (!CallGraphUpToDate) { DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false); @@ -144,8 +146,11 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, I != E; ++I) { if (Function *F = (*I)->getFunction()) { dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName()); - TimeRegion PassTimer(getPassTimer(FPP)); - Changed |= FPP->runOnFunction(*F); + { + TimeRegion PassTimer(getPassTimer(FPP)); + Changed |= FPP->runOnFunction(*F); + } + F->getContext().yield(); } } @@ -190,7 +195,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, SCCIdx != E; ++SCCIdx, ++FunctionNo) { CallGraphNode *CGN = *SCCIdx; Function *F = CGN->getFunction(); - if (F == 0 || F->isDeclaration()) continue; + if (!F || F->isDeclaration()) continue; // Walk the function body looking for call sites. Sync up the call sites in // CGN with those actually in the function. @@ -203,7 +208,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) { // If this call site is null, then the function pass deleted the call // entirely and the WeakVH nulled it out. - if (I->first == 0 || + if (!I->first || // If we've already seen this call site, then the FunctionPass RAUW'd // one call with another, which resulted in two "uses" in the edge // list of the same call. @@ -217,7 +222,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, "CallGraphSCCPass did not update the CallGraph correctly!"); // If this was an indirect call site, count it. - if (I->second->getFunction() == 0) + if (!I->second->getFunction()) ++NumIndirectRemoved; else ++NumDirectRemoved; @@ -273,7 +278,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, // site could be turned direct), don't reject it in checking mode, and // don't tweak it to be more precise. if (CheckingMode && CS.getCalledFunction() && - ExistingNode->getFunction() == 0) + ExistingNode->getFunction() == nullptr) continue; assert(!CheckingMode && @@ -286,7 +291,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, CalleeNode = CG.getOrInsertFunction(Callee); // Keep track of whether we turned an indirect call into a direct // one. - if (ExistingNode->getFunction() == 0) { + if (!ExistingNode->getFunction()) { DevirtualizedCall = true; DEBUG(dbgs() << " CGSCCPASSMGR: Devirtualized call to '" << Callee->getName() << "'\n"); @@ -434,8 +439,8 @@ bool CGPassManager::runOnModule(Module &M) { while (!CGI.isAtEnd()) { // Copy the current SCC and increment past it so that the pass can hack // on the SCC if it wants to without invalidating our iterator. - std::vector<CallGraphNode*> &NodeVec = *CGI; - CurSCC.initialize(&NodeVec[0], &NodeVec[0]+NodeVec.size()); + const std::vector<CallGraphNode *> &NodeVec = *CGI; + CurSCC.initialize(NodeVec.data(), NodeVec.data() + NodeVec.size()); ++CGI; // At the top level, we run all the passes in this pass manager on the diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index f4097e4..607c068 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "globalsmodref-aa" #include "llvm/Analysis/Passes.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/Statistic.h" @@ -33,6 +32,8 @@ #include <set> using namespace llvm; +#define DEBUG_TYPE "globalsmodref-aa" + STATISTIC(NumNonAddrTakenGlobalVars, "Number of global vars without address taken"); STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken"); @@ -177,14 +178,14 @@ namespace { FunctionInfo.find(F); if (I != FunctionInfo.end()) return &I->second; - return 0; + return nullptr; } void AnalyzeGlobals(Module &M); void AnalyzeCallGraph(CallGraph &CG, Module &M); bool AnalyzeUsesOfPointer(Value *V, std::vector<Function*> &Readers, std::vector<Function*> &Writers, - GlobalValue *OkayStoreDest = 0); + GlobalValue *OkayStoreDest = nullptr); bool AnalyzeIndirectGlobalMemory(GlobalValue *GV); }; } @@ -358,7 +359,7 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { // We do a bottom-up SCC traversal of the call graph. In other words, we // visit all callees before callers (leaf-first). for (scc_iterator<CallGraph*> I = scc_begin(&CG); !I.isAtEnd(); ++I) { - std::vector<CallGraphNode *> &SCC = *I; + const std::vector<CallGraphNode *> &SCC = *I; assert(!SCC.empty() && "SCC with no functions?"); if (!SCC[0]->getFunction()) { @@ -410,10 +411,8 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { FunctionEffect |= CalleeFR->FunctionEffect; // Incorporate callee's effects on globals into our info. - for (std::map<const GlobalValue*, unsigned>::iterator GI = - CalleeFR->GlobalInfo.begin(), E = CalleeFR->GlobalInfo.end(); - GI != E; ++GI) - FR.GlobalInfo[GI->first] |= GI->second; + for (const auto &G : CalleeFR->GlobalInfo) + FR.GlobalInfo[G.first] |= G.second; FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal; } else { // Can't say anything about it. However, if it is inside our SCC, @@ -492,8 +491,8 @@ GlobalsModRef::alias(const Location &LocA, if (GV1 || GV2) { // If the global's address is taken, pretend we don't know it's a pointer to // the global. - if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = 0; - if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = 0; + if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = nullptr; + if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = nullptr; // If the two pointers are derived from two different non-addr-taken // globals, or if one is and the other isn't, we know these can't alias. @@ -507,7 +506,7 @@ GlobalsModRef::alias(const Location &LocA, // These pointers may be based on the memory owned by an indirect global. If // so, we may be able to handle this. First check to see if the base pointer // is a direct load from an indirect global. - GV1 = GV2 = 0; + GV1 = GV2 = nullptr; if (const LoadInst *LI = dyn_cast<LoadInst>(UV1)) if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0))) if (IndirectGlobals.count(GV)) diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp index 8dafc1c..66f3f8e 100644 --- a/lib/Analysis/IPA/InlineCost.cpp +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "inline-cost" #include "llvm/Analysis/InlineCost.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -34,6 +33,8 @@ using namespace llvm; +#define DEBUG_TYPE "inline-cost" + STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); namespace { @@ -97,9 +98,6 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { void disableSROA(Value *V); void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, int InstructionCost); - bool handleSROACandidate(bool IsSROAValid, - DenseMap<Value *, int>::iterator CostIt, - int InstructionCost); bool isGEPOffsetConstant(GetElementPtrInst &GEP); bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); bool simplifyCallSite(Function *F, CallSite CS); @@ -225,21 +223,6 @@ void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, SROACostSavings += InstructionCost; } -/// \brief Helper for the common pattern of handling a SROA candidate. -/// Either accumulates the cost savings if the SROA remains valid, or disables -/// SROA for the candidate. -bool CallAnalyzer::handleSROACandidate(bool IsSROAValid, - DenseMap<Value *, int>::iterator CostIt, - int InstructionCost) { - if (IsSROAValid) { - accumulateSROACost(CostIt, InstructionCost); - return true; - } - - disableSROA(CostIt); - return false; -} - /// \brief Check whether a GEP's indices are all constant. /// /// Respects any simplified values known during the analysis of this callsite. @@ -287,8 +270,17 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { } bool CallAnalyzer::visitAlloca(AllocaInst &I) { - // FIXME: Check whether inlining will turn a dynamic alloca into a static + // Check whether inlining will turn a dynamic alloca into a static // alloca, and handle that case. + if (I.isArrayAllocation()) { + if (Constant *Size = SimplifiedValues.lookup(I.getArraySize())) { + ConstantInt *AllocSize = dyn_cast<ConstantInt>(Size); + assert(AllocSize && "Allocation size not a constant int?"); + Type *Ty = I.getAllocatedType(); + AllocatedSize += Ty->getPrimitiveSizeInBits() * AllocSize->getZExtValue(); + return Base::visitAlloca(I); + } + } // Accumulate the allocated size. if (I.isStaticAlloca()) { @@ -816,9 +808,29 @@ bool CallAnalyzer::visitBranchInst(BranchInst &BI) { bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { // We model unconditional switches as free, see the comments on handling // branches. - return isa<ConstantInt>(SI.getCondition()) || - dyn_cast_or_null<ConstantInt>( - SimplifiedValues.lookup(SI.getCondition())); + if (isa<ConstantInt>(SI.getCondition())) + return true; + if (Value *V = SimplifiedValues.lookup(SI.getCondition())) + if (isa<ConstantInt>(V)) + return true; + + // Otherwise, we need to accumulate a cost proportional to the number of + // distinct successor blocks. This fan-out in the CFG cannot be represented + // for free even if we can represent the core switch as a jumptable that + // takes a single instruction. + // + // NB: We convert large switches which are just used to initialize large phi + // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent + // inlining those. It will prevent inlining in cases where the optimization + // does not (yet) fire. + SmallPtrSet<BasicBlock *, 8> SuccessorBlocks; + SuccessorBlocks.insert(SI.getDefaultDest()); + for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I) + SuccessorBlocks.insert(I.getCaseSuccessor()); + // Add cost corresponding to the number of distinct destinations. The first + // we model as free because of fallthrough. + Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost; + return false; } bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) { @@ -934,7 +946,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { /// no constant offsets applied. ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { if (!DL || !V->getType()->isPointerTy()) - return 0; + return nullptr; unsigned IntPtrWidth = DL->getPointerSizeInBits(); APInt Offset = APInt::getNullValue(IntPtrWidth); @@ -946,7 +958,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { do { if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) - return 0; + return nullptr; V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { V = cast<Operator>(V)->getOperand(0); @@ -1247,7 +1259,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, // Calls to functions with always-inline attributes should be inlined // whenever possible. - if (Callee->hasFnAttribute(Attribute::AlwaysInline)) { + if (CS.hasFnAttr(Attribute::AlwaysInline)) { if (isInlineViable(*Callee)) return llvm::InlineCost::getAlways(); return llvm::InlineCost::getNever(); diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 5317a47..c819bd3 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "iv-users" #include "llvm/Analysis/IVUsers.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/LoopPass.h" @@ -29,6 +28,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "iv-users" + char IVUsers::ID = 0; INITIALIZE_PASS_BEGIN(IVUsers, "iv-users", "Induction Variable Users", false, true) @@ -84,7 +85,7 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT, const LoopInfo *LI, SmallPtrSet<Loop*,16> &SimpleLoopNests) { - Loop *NearestLoop = 0; + Loop *NearestLoop = nullptr; for (DomTreeNode *Rung = DT->getNode(BB); Rung; Rung = Rung->getIDom()) { BasicBlock *DomBB = Rung->getBlock(); @@ -253,7 +254,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = &getAnalysis<ScalarEvolution>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; // Find all uses of induction variables in this loop, and categorize // them by stride. Start by finding all of the PHI nodes in the header for @@ -329,16 +330,16 @@ static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) { I != E; ++I) if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L)) return AR; - return 0; + return nullptr; } - return 0; + return nullptr; } const SCEV *IVUsers::getStride(const IVStrideUse &IU, const Loop *L) const { if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(IU), L)) return AR->getStepRecurrence(*SE); - return 0; + return nullptr; } void IVStrideUse::transformToPostInc(const Loop *L) { diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp index 3d05556..de2b9c0 100644 --- a/lib/Analysis/InstCount.cpp +++ b/lib/Analysis/InstCount.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "instcount" #include "llvm/Analysis/Passes.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Function.h" @@ -22,6 +21,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "instcount" + STATISTIC(TotalInsts , "Number of instructions (of all types)"); STATISTIC(TotalBlocks, "Number of basic blocks"); STATISTIC(TotalFuncs , "Number of non-external functions"); @@ -47,7 +48,7 @@ namespace { void visitInstruction(Instruction &I) { errs() << "Instruction Count does not know about " << I; - llvm_unreachable(0); + llvm_unreachable(nullptr); } public: static char ID; // Pass identification, replacement for typeid diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index d8d8a09..3684fda 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -17,7 +17,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "instsimplify" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" @@ -35,6 +34,8 @@ using namespace llvm; using namespace llvm::PatternMatch; +#define DEBUG_TYPE "instsimplify" + enum { RecursionLimit = 3 }; STATISTIC(NumExpand, "Number of expansions"); @@ -131,7 +132,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand; // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) - return 0; + return nullptr; // Check whether the expression has the form "(A op' B) op C". if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS)) @@ -179,7 +180,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, } } - return 0; + return nullptr; } /// FactorizeBinOp - Simplify "LHS Opcode RHS" by factorizing out a common term @@ -192,14 +193,14 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract; // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) - return 0; + return nullptr; BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); if (!Op0 || Op0->getOpcode() != OpcodeToExtract || !Op1 || Op1->getOpcode() != OpcodeToExtract) - return 0; + return nullptr; // The expression has the form "(A op' B) op (C op' D)". Value *A = Op0->getOperand(0), *B = Op0->getOperand(1); @@ -251,7 +252,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, } } - return 0; + return nullptr; } /// SimplifyAssociativeBinOp - Generic simplifications for associative binary @@ -263,7 +264,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) - return 0; + return nullptr; BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); @@ -308,7 +309,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, // The remaining transforms require commutativity as well as associativity. if (!Instruction::isCommutative(Opcode)) - return 0; + return nullptr; // Transform: "(A op B) op C" ==> "(C op A) op B" if it simplifies completely. if (Op0 && Op0->getOpcode() == Opcode) { @@ -348,7 +349,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, } } - return 0; + return nullptr; } /// ThreadBinOpOverSelect - In the case of a binary operation with a select @@ -359,7 +360,7 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) - return 0; + return nullptr; SelectInst *SI; if (isa<SelectInst>(LHS)) { @@ -420,7 +421,7 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, } } - return 0; + return nullptr; } /// ThreadCmpOverSelect - In the case of a comparison with a select instruction, @@ -432,7 +433,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) - return 0; + return nullptr; // Make sure the select is on the LHS. if (!isa<SelectInst>(LHS)) { @@ -456,7 +457,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, // It didn't simplify. However if "cmp TV, RHS" is equal to the select // condition then we can replace it with 'true'. Otherwise give up. if (!isSameCompare(Cond, Pred, TV, RHS)) - return 0; + return nullptr; TCmp = getTrue(Cond->getType()); } @@ -470,7 +471,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, // It didn't simplify. However if "cmp FV, RHS" is equal to the select // condition then we can replace it with 'false'. Otherwise give up. if (!isSameCompare(Cond, Pred, FV, RHS)) - return 0; + return nullptr; FCmp = getFalse(Cond->getType()); } @@ -482,7 +483,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, // The remaining cases only make sense if the select condition has the same // type as the result of the comparison, so bail out if this is not so. if (Cond->getType()->isVectorTy() != RHS->getType()->isVectorTy()) - return 0; + return nullptr; // If the false value simplified to false, then the result of the compare // is equal to "Cond && TCmp". This also catches the case when the false // value simplified to false and the true value to true, returning "Cond". @@ -502,7 +503,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, Q, MaxRecurse)) return V; - return 0; + return nullptr; } /// ThreadBinOpOverPHI - In the case of a binary operation with an operand that @@ -513,24 +514,24 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) - return 0; + return nullptr; PHINode *PI; if (isa<PHINode>(LHS)) { PI = cast<PHINode>(LHS); // Bail out if RHS and the phi may be mutually interdependent due to a loop. if (!ValueDominatesPHI(RHS, PI, Q.DT)) - return 0; + return nullptr; } else { assert(isa<PHINode>(RHS) && "No PHI instruction operand!"); PI = cast<PHINode>(RHS); // Bail out if LHS and the phi may be mutually interdependent due to a loop. if (!ValueDominatesPHI(LHS, PI, Q.DT)) - return 0; + return nullptr; } // Evaluate the BinOp on the incoming phi values. - Value *CommonValue = 0; + Value *CommonValue = nullptr; for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) { Value *Incoming = PI->getIncomingValue(i); // If the incoming value is the phi node itself, it can safely be skipped. @@ -541,7 +542,7 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, // If the operation failed to simplify, or simplified to a different value // to previously, then give up. if (!V || (CommonValue && V != CommonValue)) - return 0; + return nullptr; CommonValue = V; } @@ -556,7 +557,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) - return 0; + return nullptr; // Make sure the phi is on the LHS. if (!isa<PHINode>(LHS)) { @@ -568,10 +569,10 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, // Bail out if RHS and the phi may be mutually interdependent due to a loop. if (!ValueDominatesPHI(RHS, PI, Q.DT)) - return 0; + return nullptr; // Evaluate the BinOp on the incoming phi values. - Value *CommonValue = 0; + Value *CommonValue = nullptr; for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) { Value *Incoming = PI->getIncomingValue(i); // If the incoming value is the phi node itself, it can safely be skipped. @@ -580,7 +581,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, // If the operation failed to simplify, or simplified to a different value // to previously, then give up. if (!V || (CommonValue && V != CommonValue)) - return 0; + return nullptr; CommonValue = V; } @@ -613,7 +614,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, // X + (Y - X) -> Y // (Y - X) + X -> Y // Eg: X + -X -> 0 - Value *Y = 0; + Value *Y = nullptr; if (match(Op1, m_Sub(m_Value(Y), m_Specific(Op0))) || match(Op0, m_Sub(m_Value(Y), m_Specific(Op1)))) return Y; @@ -647,7 +648,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, // "A+B" and "A+C" thus gains nothing, but costs compile time. Similarly // for threading over phi nodes. - return 0; + return nullptr; } Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, @@ -720,7 +721,7 @@ static Constant *computePointerDifference(const DataLayout *DL, // If LHS and RHS are not related via constant offsets to the same base // value, there is nothing we can do here. if (LHS != RHS) - return 0; + return nullptr; // Otherwise, the difference of LHS - RHS can be computed as: // LHS - RHS @@ -755,14 +756,14 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, // (X*2) - X -> X // (X<<1) - X -> X - Value *X = 0; + Value *X = nullptr; if (match(Op0, m_Mul(m_Specific(Op1), m_ConstantInt<2>())) || match(Op0, m_Shl(m_Specific(Op1), m_One()))) return Op1; // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies. // For example, (X + Y) - Y -> X; (Y + X) - Y -> X - Value *Y = 0, *Z = Op1; + Value *Y = nullptr, *Z = Op1; if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z // See if "V === Y - Z" simplifies. if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1)) @@ -853,7 +854,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, // "A-B" and "A-C" thus gains nothing, but costs compile time. Similarly // for threading over phi nodes. - return 0; + return nullptr; } Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, @@ -890,7 +891,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, // fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0 // where nnan and ninf have to occur at least once somewhere in this // expression - Value *SubOp = 0; + Value *SubOp = nullptr; if (match(Op1, m_FSub(m_AnyZero(), m_Specific(Op0)))) SubOp = Op1; else if (match(Op0, m_FSub(m_AnyZero(), m_Specific(Op1)))) @@ -902,7 +903,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, return Constant::getNullValue(Op0->getType()); } - return 0; + return nullptr; } /// Given operands for an FSub, see if we can fold the result. If not, this @@ -939,7 +940,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (FMF.noNaNs() && FMF.noInfs() && Op0 == Op1) return Constant::getNullValue(Op0->getType()); - return 0; + return nullptr; } /// Given the operands for an FMul, see if we can fold the result @@ -966,7 +967,7 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1, if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero())) return Op1; - return 0; + return nullptr; } /// SimplifyMulInst - Given operands for a Mul, see if we can @@ -997,7 +998,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, return Op0; // (X / Y) * Y -> X if the division is exact. - Value *X = 0; + Value *X = nullptr; if (match(Op0, m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) || // (X / Y) * Y match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0))))) // Y * (X / Y) return X; @@ -1031,7 +1032,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, @@ -1098,7 +1099,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, return ConstantInt::get(Op0->getType(), 1); // (X * Y) / Y -> X if the multiplication does not overflow. - Value *X = 0, *Y = 0; + Value *X = nullptr, *Y = nullptr; if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) { if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1 OverflowingBinaryOperator *Mul = cast<OverflowingBinaryOperator>(Op0); @@ -1129,7 +1130,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } /// SimplifySDivInst - Given operands for an SDiv, see if we can @@ -1139,7 +1140,7 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q, if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *DL, @@ -1155,7 +1156,7 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q, if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *DL, @@ -1174,7 +1175,7 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q, if (match(Op1, m_Undef())) return Op1; - return 0; + return nullptr; } Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const DataLayout *DL, @@ -1234,7 +1235,7 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } /// SimplifySRemInst - Given operands for an SRem, see if we can @@ -1244,7 +1245,7 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q, if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *DL, @@ -1260,7 +1261,7 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q, if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *DL, @@ -1279,7 +1280,7 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &, if (match(Op1, m_Undef())) return Op1; - return 0; + return nullptr; } Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const DataLayout *DL, @@ -1350,7 +1351,7 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } /// SimplifyShlInst - Given operands for an Shl, see if we can @@ -1368,7 +1369,7 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Value *X; if (match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1))))) return X; - return 0; + return nullptr; } Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, @@ -1399,7 +1400,7 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap()) return X; - return 0; + return nullptr; } Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, @@ -1435,7 +1436,7 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap()) return X; - return 0; + return nullptr; } Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, @@ -1483,7 +1484,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, return Constant::getNullValue(Op0->getType()); // (A | ?) & A = A - Value *A = 0, *B = 0; + Value *A = nullptr, *B = nullptr; if (match(Op0, m_Or(m_Value(A), m_Value(B))) && (A == Op1 || B == Op1)) return Op1; @@ -1536,7 +1537,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *DL, @@ -1582,7 +1583,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, return Constant::getAllOnesValue(Op0->getType()); // (A & ?) | A = A - Value *A = 0, *B = 0; + Value *A = nullptr, *B = nullptr; if (match(Op0, m_And(m_Value(A), m_Value(B))) && (A == Op1 || B == Op1)) return Op1; @@ -1630,7 +1631,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *DL, @@ -1690,7 +1691,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, // "A^B" and "A^C" thus gains nothing, but costs compile time. Similarly // for threading over phi nodes. - return 0; + return nullptr; } Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *DL, @@ -1710,17 +1711,17 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, Value *LHS, Value *RHS) { SelectInst *SI = dyn_cast<SelectInst>(V); if (!SI) - return 0; + return nullptr; CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); if (!Cmp) - return 0; + return nullptr; Value *CmpLHS = Cmp->getOperand(0), *CmpRHS = Cmp->getOperand(1); if (Pred == Cmp->getPredicate() && LHS == CmpLHS && RHS == CmpRHS) return Cmp; if (Pred == CmpInst::getSwappedPredicate(Cmp->getPredicate()) && LHS == CmpRHS && RHS == CmpLHS) return Cmp; - return 0; + return nullptr; } // A significant optimization not implemented here is assuming that alloca @@ -1768,7 +1769,7 @@ static Constant *computePointerICmp(const DataLayout *DL, // We can only fold certain predicates on pointer comparisons. switch (Pred) { default: - return 0; + return nullptr; // Equality comaprisons are easy to fold. case CmpInst::ICMP_EQ: @@ -1874,7 +1875,7 @@ static Constant *computePointerICmp(const DataLayout *DL, } // Otherwise, fail. - return 0; + return nullptr; } /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can @@ -2000,7 +2001,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Many binary operators with constant RHS have easy to compute constant // range. Use them to check whether the comparison is a tautology. - uint32_t Width = CI->getBitWidth(); + unsigned Width = CI->getBitWidth(); APInt Lower = APInt(Width, 0); APInt Upper = APInt(Width, 0); ConstantInt *CI2; @@ -2019,6 +2020,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, APInt NegOne = APInt::getAllOnesValue(Width); if (!CI2->isZero()) Upper = NegOne.udiv(CI2->getValue()) + 1; + } else if (match(LHS, m_SDiv(m_ConstantInt(CI2), m_Value()))) { + // 'sdiv CI2, x' produces [-|CI2|, |CI2|]. + Upper = CI2->getValue().abs() + 1; + Lower = (-Upper) + 1; } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) { // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2]. APInt IntMin = APInt::getSignedMinValue(Width); @@ -2033,6 +2038,13 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, APInt NegOne = APInt::getAllOnesValue(Width); if (CI2->getValue().ult(Width)) Upper = NegOne.lshr(CI2->getValue()) + 1; + } else if (match(LHS, m_LShr(m_ConstantInt(CI2), m_Value()))) { + // 'lshr CI2, x' produces [CI2 >> (Width-1), CI2]. + unsigned ShiftAmount = Width - 1; + if (!CI2->isZero() && cast<BinaryOperator>(LHS)->isExact()) + ShiftAmount = CI2->getValue().countTrailingZeros(); + Lower = CI2->getValue().lshr(ShiftAmount); + Upper = CI2->getValue() + 1; } else if (match(LHS, m_AShr(m_Value(), m_ConstantInt(CI2)))) { // 'ashr x, CI2' produces [INT_MIN >> CI2, INT_MAX >> CI2]. APInt IntMin = APInt::getSignedMinValue(Width); @@ -2041,6 +2053,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, Lower = IntMin.ashr(CI2->getValue()); Upper = IntMax.ashr(CI2->getValue()) + 1; } + } else if (match(LHS, m_AShr(m_ConstantInt(CI2), m_Value()))) { + unsigned ShiftAmount = Width - 1; + if (!CI2->isZero() && cast<BinaryOperator>(LHS)->isExact()) + ShiftAmount = CI2->getValue().countTrailingZeros(); + if (CI2->isNegative()) { + // 'ashr CI2, x' produces [CI2, CI2 >> (Width-1)] + Lower = CI2->getValue(); + Upper = CI2->getValue().ashr(ShiftAmount) + 1; + } else { + // 'ashr CI2, x' produces [CI2 >> (Width-1), CI2] + Lower = CI2->getValue().ashr(ShiftAmount); + Upper = CI2->getValue() + 1; + } } else if (match(LHS, m_Or(m_Value(), m_ConstantInt(CI2)))) { // 'or x, CI2' produces [CI2, UINT_MAX]. Lower = CI2->getValue(); @@ -2221,7 +2246,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS); if (MaxRecurse && (LBO || RBO)) { // Analyze the case when either LHS or RHS is an add instruction. - Value *A = 0, *B = 0, *C = 0, *D = 0; + Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; // LHS = A + B (or A and B are null); RHS = C + D (or C and D are null). bool NoLHSWrapProblem = false, NoRHSWrapProblem = false; if (LBO && LBO->getOpcode() == Instruction::Add) { @@ -2279,6 +2304,28 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + // 0 - (zext X) pred C + if (!CmpInst::isUnsigned(Pred) && match(LHS, m_Neg(m_ZExt(m_Value())))) { + if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) { + if (RHSC->getValue().isStrictlyPositive()) { + if (Pred == ICmpInst::ICMP_SLT) + return ConstantInt::getTrue(RHSC->getContext()); + if (Pred == ICmpInst::ICMP_SGE) + return ConstantInt::getFalse(RHSC->getContext()); + if (Pred == ICmpInst::ICMP_EQ) + return ConstantInt::getFalse(RHSC->getContext()); + if (Pred == ICmpInst::ICMP_NE) + return ConstantInt::getTrue(RHSC->getContext()); + } + if (RHSC->getValue().isNonNegative()) { + if (Pred == ICmpInst::ICMP_SLE) + return ConstantInt::getTrue(RHSC->getContext()); + if (Pred == ICmpInst::ICMP_SGT) + return ConstantInt::getFalse(RHSC->getContext()); + } + } + } + // icmp pred (urem X, Y), Y if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) { bool KnownNonNegative, KnownNegative; @@ -2605,7 +2652,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, @@ -2702,7 +2749,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, @@ -2741,7 +2788,7 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X return TrueVal; - return 0; + return nullptr; } Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, @@ -2786,7 +2833,7 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { // Check to see if this is constant foldable. for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (!isa<Constant>(Ops[i])) - return 0; + return nullptr; return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1)); } @@ -2823,7 +2870,7 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, return Agg; } - return 0; + return nullptr; } Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, @@ -2839,7 +2886,7 @@ Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { // If all of the PHI's incoming values are the same then replace the PHI node // with the common value. - Value *CommonValue = 0; + Value *CommonValue = nullptr; bool HasUndefInput = false; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *Incoming = PN->getIncomingValue(i); @@ -2851,7 +2898,7 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { continue; } if (CommonValue && Incoming != CommonValue) - return 0; // Not the same, bail out. + return nullptr; // Not the same, bail out. CommonValue = Incoming; } @@ -2864,7 +2911,7 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { // instruction, we cannot return X as the result of the PHI node unless it // dominates the PHI block. if (HasUndefInput) - return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : 0; + return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : nullptr; return CommonValue; } @@ -2873,7 +2920,7 @@ static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) { if (Constant *C = dyn_cast<Constant>(Op)) return ConstantFoldInstOperands(Instruction::Trunc, Ty, C, Q.DL, Q.TLI); - return 0; + return nullptr; } Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *DL, @@ -2945,7 +2992,7 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, Q, MaxRecurse)) return V; - return 0; + return nullptr; } } @@ -2992,7 +3039,7 @@ static Value *SimplifyIntrinsic(Intrinsic::ID IID, IterTy ArgBegin, IterTy ArgEn const Query &Q, unsigned MaxRecurse) { // Perform idempotent optimizations if (!IsIdempotent(IID)) - return 0; + return nullptr; // Unary Ops if (std::distance(ArgBegin, ArgEnd) == 1) @@ -3000,7 +3047,7 @@ static Value *SimplifyIntrinsic(Intrinsic::ID IID, IterTy ArgBegin, IterTy ArgEn if (II->getIntrinsicID() == IID) return II; - return 0; + return nullptr; } template <typename IterTy> @@ -3017,7 +3064,7 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, Function *F = dyn_cast<Function>(V); if (!F) - return 0; + return nullptr; if (unsigned IID = F->getIntrinsicID()) if (Value *Ret = @@ -3025,14 +3072,14 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, return Ret; if (!canConstantFoldCallTo(F)) - return 0; + return nullptr; SmallVector<Constant *, 4> ConstantArgs; ConstantArgs.reserve(ArgEnd - ArgBegin); for (IterTy I = ArgBegin, E = ArgEnd; I != E; ++I) { Constant *C = dyn_cast<Constant>(*I); if (!C) - return 0; + return nullptr; ConstantArgs.push_back(C); } @@ -3247,7 +3294,7 @@ bool llvm::recursivelySimplifyInstruction(Instruction *I, const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return replaceAndRecursivelySimplifyImpl(I, 0, DL, TLI, DT); + return replaceAndRecursivelySimplifyImpl(I, nullptr, DL, TLI, DT); } bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, diff --git a/lib/Analysis/IntervalPartition.cpp b/lib/Analysis/IntervalPartition.cpp index 2e259b1..a0583e8 100644 --- a/lib/Analysis/IntervalPartition.cpp +++ b/lib/Analysis/IntervalPartition.cpp @@ -29,7 +29,7 @@ void IntervalPartition::releaseMemory() { delete Intervals[i]; IntervalMap.clear(); Intervals.clear(); - RootInterval = 0; + RootInterval = nullptr; } void IntervalPartition::print(raw_ostream &O, const Module*) const { diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp index ea213f2..e073616 100644 --- a/lib/Analysis/LazyCallGraph.cpp +++ b/lib/Analysis/LazyCallGraph.cpp @@ -8,19 +8,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LazyCallGraph.h" -#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "lcg" + static void findCallees( SmallVectorImpl<Constant *> &Worklist, SmallPtrSetImpl<Constant *> &Visited, SmallVectorImpl<PointerUnion<Function *, LazyCallGraph::Node *>> &Callees, - SmallPtrSetImpl<Function *> &CalleeSet) { + DenseMap<Function *, size_t> &CalleeIndexMap) { while (!Worklist.empty()) { Constant *C = Worklist.pop_back_val(); @@ -35,8 +38,12 @@ static void findCallees( // alias. Then a test of the address of the weak function against the new // strong definition's address would be an effective way to determine the // safety of optimizing a direct call edge. - if (!F->isDeclaration() && CalleeSet.insert(F)) + if (!F->isDeclaration() && + CalleeIndexMap.insert(std::make_pair(F, Callees.size())).second) { + DEBUG(dbgs() << " Added callable function: " << F->getName() + << "\n"); Callees.push_back(F); + } continue; } @@ -46,7 +53,11 @@ static void findCallees( } } -LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F) : G(G), F(F) { +LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F) + : G(&G), F(F), DFSNumber(0), LowLink(0) { + DEBUG(dbgs() << " Adding functions called by '" << F.getName() + << "' to the graph.\n"); + SmallVector<Constant *, 16> Worklist; SmallPtrSet<Constant *, 16> Visited; // Find all the potential callees in this function. First walk the @@ -61,36 +72,41 @@ LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F) : G(G), F(F) { // We've collected all the constant (and thus potentially function or // function containing) operands to all of the instructions in the function. // Process them (recursively) collecting every function found. - findCallees(Worklist, Visited, Callees, CalleeSet); + findCallees(Worklist, Visited, Callees, CalleeIndexMap); } -LazyCallGraph::Node::Node(LazyCallGraph &G, const Node &OtherN) - : G(G), F(OtherN.F), CalleeSet(OtherN.CalleeSet) { - // Loop over the other node's callees, adding the Function*s to our list - // directly, and recursing to add the Node*s. - Callees.reserve(OtherN.Callees.size()); - for (const auto &OtherCallee : OtherN.Callees) - if (Function *Callee = OtherCallee.dyn_cast<Function *>()) - Callees.push_back(Callee); - else - Callees.push_back(G.copyInto(*OtherCallee.get<Node *>())); +void LazyCallGraph::Node::insertEdgeInternal(Function &Callee) { + if (Node *N = G->lookup(Callee)) + return insertEdgeInternal(*N); + + CalleeIndexMap.insert(std::make_pair(&Callee, Callees.size())); + Callees.push_back(&Callee); } -LazyCallGraph::Node::Node(LazyCallGraph &G, Node &&OtherN) - : G(G), F(OtherN.F), Callees(std::move(OtherN.Callees)), - CalleeSet(std::move(OtherN.CalleeSet)) { - // Loop over our Callees. They've been moved from another node, but we need - // to move the Node*s to live under our bump ptr allocator. - for (auto &Callee : Callees) - if (Node *ChildN = Callee.dyn_cast<Node *>()) - Callee = G.moveInto(std::move(*ChildN)); +void LazyCallGraph::Node::insertEdgeInternal(Node &CalleeN) { + CalleeIndexMap.insert(std::make_pair(&CalleeN.getFunction(), Callees.size())); + Callees.push_back(&CalleeN); } -LazyCallGraph::LazyCallGraph(Module &M) : M(M) { +void LazyCallGraph::Node::removeEdgeInternal(Function &Callee) { + auto IndexMapI = CalleeIndexMap.find(&Callee); + assert(IndexMapI != CalleeIndexMap.end() && + "Callee not in the callee set for this caller?"); + + Callees[IndexMapI->second] = nullptr; + CalleeIndexMap.erase(IndexMapI); +} + +LazyCallGraph::LazyCallGraph(Module &M) : NextDFSNumber(0) { + DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier() + << "\n"); for (Function &F : M) if (!F.isDeclaration() && !F.hasLocalLinkage()) - if (EntryNodeSet.insert(&F)) + if (EntryIndexMap.insert(std::make_pair(&F, EntryNodes.size())).second) { + DEBUG(dbgs() << " Adding '" << F.getName() + << "' to entry set of the graph.\n"); EntryNodes.push_back(&F); + } // Now add entry nodes for functions reachable via initializers to globals. SmallVector<Constant *, 16> Worklist; @@ -100,51 +116,568 @@ LazyCallGraph::LazyCallGraph(Module &M) : M(M) { if (Visited.insert(GV.getInitializer())) Worklist.push_back(GV.getInitializer()); - findCallees(Worklist, Visited, EntryNodes, EntryNodeSet); -} + DEBUG(dbgs() << " Adding functions referenced by global initializers to the " + "entry set.\n"); + findCallees(Worklist, Visited, EntryNodes, EntryIndexMap); -LazyCallGraph::LazyCallGraph(const LazyCallGraph &G) - : M(G.M), EntryNodeSet(G.EntryNodeSet) { - EntryNodes.reserve(G.EntryNodes.size()); - for (const auto &EntryNode : G.EntryNodes) - if (Function *Callee = EntryNode.dyn_cast<Function *>()) - EntryNodes.push_back(Callee); + for (auto &Entry : EntryNodes) { + assert(!Entry.isNull() && + "We can't have removed edges before we finish the constructor!"); + if (Function *F = Entry.dyn_cast<Function *>()) + SCCEntryNodes.push_back(F); else - EntryNodes.push_back(copyInto(*EntryNode.get<Node *>())); + SCCEntryNodes.push_back(&Entry.get<Node *>()->getFunction()); + } } -// FIXME: This would be crazy simpler if BumpPtrAllocator were movable without -// invalidating any of the allocated memory. We should make that be the case at -// some point and delete this. LazyCallGraph::LazyCallGraph(LazyCallGraph &&G) - : M(G.M), EntryNodes(std::move(G.EntryNodes)), - EntryNodeSet(std::move(G.EntryNodeSet)) { - // Loop over our EntryNodes. They've been moved from another graph, so we - // need to move the Node*s to live under our bump ptr allocator. We can just - // do this in-place. - for (auto &Entry : EntryNodes) - if (Node *EntryN = Entry.dyn_cast<Node *>()) - Entry = moveInto(std::move(*EntryN)); + : BPA(std::move(G.BPA)), NodeMap(std::move(G.NodeMap)), + EntryNodes(std::move(G.EntryNodes)), + EntryIndexMap(std::move(G.EntryIndexMap)), SCCBPA(std::move(G.SCCBPA)), + SCCMap(std::move(G.SCCMap)), LeafSCCs(std::move(G.LeafSCCs)), + DFSStack(std::move(G.DFSStack)), + SCCEntryNodes(std::move(G.SCCEntryNodes)), + NextDFSNumber(G.NextDFSNumber) { + updateGraphPtrs(); +} + +LazyCallGraph &LazyCallGraph::operator=(LazyCallGraph &&G) { + BPA = std::move(G.BPA); + NodeMap = std::move(G.NodeMap); + EntryNodes = std::move(G.EntryNodes); + EntryIndexMap = std::move(G.EntryIndexMap); + SCCBPA = std::move(G.SCCBPA); + SCCMap = std::move(G.SCCMap); + LeafSCCs = std::move(G.LeafSCCs); + DFSStack = std::move(G.DFSStack); + SCCEntryNodes = std::move(G.SCCEntryNodes); + NextDFSNumber = G.NextDFSNumber; + updateGraphPtrs(); + return *this; } -LazyCallGraph::Node *LazyCallGraph::insertInto(Function &F, Node *&MappedN) { - return new (MappedN = BPA.Allocate()) Node(*this, F); +void LazyCallGraph::SCC::insert(Node &N) { + N.DFSNumber = N.LowLink = -1; + Nodes.push_back(&N); + G->SCCMap[&N] = this; } -LazyCallGraph::Node *LazyCallGraph::copyInto(const Node &OtherN) { - Node *&N = NodeMap[&OtherN.F]; - if (N) - return N; +bool LazyCallGraph::SCC::isDescendantOf(const SCC &C) const { + // Walk up the parents of this SCC and verify that we eventually find C. + SmallVector<const SCC *, 4> AncestorWorklist; + AncestorWorklist.push_back(this); + do { + const SCC *AncestorC = AncestorWorklist.pop_back_val(); + if (AncestorC->isChildOf(C)) + return true; + for (const SCC *ParentC : AncestorC->ParentSCCs) + AncestorWorklist.push_back(ParentC); + } while (!AncestorWorklist.empty()); - return new (N = BPA.Allocate()) Node(*this, OtherN); + return false; } -LazyCallGraph::Node *LazyCallGraph::moveInto(Node &&OtherN) { - Node *&N = NodeMap[&OtherN.F]; - if (N) - return N; +void LazyCallGraph::SCC::insertIntraSCCEdge(Node &CallerN, Node &CalleeN) { + // First insert it into the caller. + CallerN.insertEdgeInternal(CalleeN); + + assert(G->SCCMap.lookup(&CallerN) == this && "Caller must be in this SCC."); + assert(G->SCCMap.lookup(&CalleeN) == this && "Callee must be in this SCC."); - return new (N = BPA.Allocate()) Node(*this, std::move(OtherN)); + // Nothing changes about this SCC or any other. +} + +void LazyCallGraph::SCC::insertOutgoingEdge(Node &CallerN, Node &CalleeN) { + // First insert it into the caller. + CallerN.insertEdgeInternal(CalleeN); + + assert(G->SCCMap.lookup(&CallerN) == this && "Caller must be in this SCC."); + + SCC &CalleeC = *G->SCCMap.lookup(&CalleeN); + assert(&CalleeC != this && "Callee must not be in this SCC."); + assert(CalleeC.isDescendantOf(*this) && + "Callee must be a descendant of the Caller."); + + // The only change required is to add this SCC to the parent set of the callee. + CalleeC.ParentSCCs.insert(this); +} + +SmallVector<LazyCallGraph::SCC *, 1> +LazyCallGraph::SCC::insertIncomingEdge(Node &CallerN, Node &CalleeN) { + // First insert it into the caller. + CallerN.insertEdgeInternal(CalleeN); + + assert(G->SCCMap.lookup(&CalleeN) == this && "Callee must be in this SCC."); + + SCC &CallerC = *G->SCCMap.lookup(&CallerN); + assert(&CallerC != this && "Caller must not be in this SCC."); + assert(CallerC.isDescendantOf(*this) && + "Caller must be a descendant of the Callee."); + + // The algorithm we use for merging SCCs based on the cycle introduced here + // is to walk the SCC inverted DAG formed by the parent SCC sets. The inverse + // graph has the same cycle properties as the actual DAG of the SCCs, and + // when forming SCCs lazily by a DFS, the bottom of the graph won't exist in + // many cases which should prune the search space. + // + // FIXME: We can get this pruning behavior even after the incremental SCC + // formation by leaving behind (conservative) DFS numberings in the nodes, + // and pruning the search with them. These would need to be cleverly updated + // during the removal of intra-SCC edges, but could be preserved + // conservatively. + + // The set of SCCs that are connected to the caller, and thus will + // participate in the merged connected component. + SmallPtrSet<SCC *, 8> ConnectedSCCs; + ConnectedSCCs.insert(this); + ConnectedSCCs.insert(&CallerC); + + // We build up a DFS stack of the parents chains. + SmallVector<std::pair<SCC *, SCC::parent_iterator>, 8> DFSSCCs; + SmallPtrSet<SCC *, 8> VisitedSCCs; + int ConnectedDepth = -1; + SCC *C = this; + parent_iterator I = parent_begin(), E = parent_end(); + for (;;) { + while (I != E) { + SCC &ParentSCC = *I++; + + // If we have already processed this parent SCC, skip it, and remember + // whether it was connected so we don't have to check the rest of the + // stack. This also handles when we reach a child of the 'this' SCC (the + // callee) which terminates the search. + if (ConnectedSCCs.count(&ParentSCC)) { + ConnectedDepth = std::max<int>(ConnectedDepth, DFSSCCs.size()); + continue; + } + if (VisitedSCCs.count(&ParentSCC)) + continue; + + // We fully explore the depth-first space, adding nodes to the connected + // set only as we pop them off, so "recurse" by rotating to the parent. + DFSSCCs.push_back(std::make_pair(C, I)); + C = &ParentSCC; + I = ParentSCC.parent_begin(); + E = ParentSCC.parent_end(); + } + + // If we've found a connection anywhere below this point on the stack (and + // thus up the parent graph from the caller), the current node needs to be + // added to the connected set now that we've processed all of its parents. + if ((int)DFSSCCs.size() == ConnectedDepth) { + --ConnectedDepth; // We're finished with this connection. + ConnectedSCCs.insert(C); + } else { + // Otherwise remember that its parents don't ever connect. + assert(ConnectedDepth < (int)DFSSCCs.size() && + "Cannot have a connected depth greater than the DFS depth!"); + VisitedSCCs.insert(C); + } + + if (DFSSCCs.empty()) + break; // We've walked all the parents of the caller transitively. + + // Pop off the prior node and position to unwind the depth first recursion. + std::tie(C, I) = DFSSCCs.pop_back_val(); + E = C->parent_end(); + } + + // Now that we have identified all of the SCCs which need to be merged into + // a connected set with the inserted edge, merge all of them into this SCC. + // FIXME: This operation currently creates ordering stability problems + // because we don't use stably ordered containers for the parent SCCs or the + // connected SCCs. + unsigned NewNodeBeginIdx = Nodes.size(); + for (SCC *C : ConnectedSCCs) { + if (C == this) + continue; + for (SCC *ParentC : C->ParentSCCs) + if (!ConnectedSCCs.count(ParentC)) + ParentSCCs.insert(ParentC); + C->ParentSCCs.clear(); + + for (Node *N : *C) { + for (Node &ChildN : *N) { + SCC &ChildC = *G->SCCMap.lookup(&ChildN); + if (&ChildC != C) + ChildC.ParentSCCs.erase(C); + } + G->SCCMap[N] = this; + Nodes.push_back(N); + } + C->Nodes.clear(); + } + for (auto I = Nodes.begin() + NewNodeBeginIdx, E = Nodes.end(); I != E; ++I) + for (Node &ChildN : **I) { + SCC &ChildC = *G->SCCMap.lookup(&ChildN); + if (&ChildC != this) + ChildC.ParentSCCs.insert(this); + } + + // We return the list of SCCs which were merged so that callers can + // invalidate any data they have associated with those SCCs. Note that these + // SCCs are no longer in an interesting state (they are totally empty) but + // the pointers will remain stable for the life of the graph itself. + return SmallVector<SCC *, 1>(ConnectedSCCs.begin(), ConnectedSCCs.end()); +} + +void LazyCallGraph::SCC::removeInterSCCEdge(Node &CallerN, Node &CalleeN) { + // First remove it from the node. + CallerN.removeEdgeInternal(CalleeN.getFunction()); + + assert(G->SCCMap.lookup(&CallerN) == this && + "The caller must be a member of this SCC."); + + SCC &CalleeC = *G->SCCMap.lookup(&CalleeN); + assert(&CalleeC != this && + "This API only supports the rmoval of inter-SCC edges."); + + assert(std::find(G->LeafSCCs.begin(), G->LeafSCCs.end(), this) == + G->LeafSCCs.end() && + "Cannot have a leaf SCC caller with a different SCC callee."); + + bool HasOtherCallToCalleeC = false; + bool HasOtherCallOutsideSCC = false; + for (Node *N : *this) { + for (Node &OtherCalleeN : *N) { + SCC &OtherCalleeC = *G->SCCMap.lookup(&OtherCalleeN); + if (&OtherCalleeC == &CalleeC) { + HasOtherCallToCalleeC = true; + break; + } + if (&OtherCalleeC != this) + HasOtherCallOutsideSCC = true; + } + if (HasOtherCallToCalleeC) + break; + } + // Because the SCCs form a DAG, deleting such an edge cannot change the set + // of SCCs in the graph. However, it may cut an edge of the SCC DAG, making + // the caller no longer a parent of the callee. Walk the other call edges + // in the caller to tell. + if (!HasOtherCallToCalleeC) { + bool Removed = CalleeC.ParentSCCs.erase(this); + (void)Removed; + assert(Removed && + "Did not find the caller SCC in the callee SCC's parent list!"); + + // It may orphan an SCC if it is the last edge reaching it, but that does + // not violate any invariants of the graph. + if (CalleeC.ParentSCCs.empty()) + DEBUG(dbgs() << "LCG: Update removing " << CallerN.getFunction().getName() + << " -> " << CalleeN.getFunction().getName() + << " edge orphaned the callee's SCC!\n"); + } + + // It may make the Caller SCC a leaf SCC. + if (!HasOtherCallOutsideSCC) + G->LeafSCCs.push_back(this); +} + +void LazyCallGraph::SCC::internalDFS( + SmallVectorImpl<std::pair<Node *, Node::iterator>> &DFSStack, + SmallVectorImpl<Node *> &PendingSCCStack, Node *N, + SmallVectorImpl<SCC *> &ResultSCCs) { + Node::iterator I = N->begin(); + N->LowLink = N->DFSNumber = 1; + int NextDFSNumber = 2; + for (;;) { + assert(N->DFSNumber != 0 && "We should always assign a DFS number " + "before processing a node."); + + // We simulate recursion by popping out of the nested loop and continuing. + Node::iterator E = N->end(); + while (I != E) { + Node &ChildN = *I; + if (SCC *ChildSCC = G->SCCMap.lookup(&ChildN)) { + // Check if we have reached a node in the new (known connected) set of + // this SCC. If so, the entire stack is necessarily in that set and we + // can re-start. + if (ChildSCC == this) { + insert(*N); + while (!PendingSCCStack.empty()) + insert(*PendingSCCStack.pop_back_val()); + while (!DFSStack.empty()) + insert(*DFSStack.pop_back_val().first); + return; + } + + // If this child isn't currently in this SCC, no need to process it. + // However, we do need to remove this SCC from its SCC's parent set. + ChildSCC->ParentSCCs.erase(this); + ++I; + continue; + } + + if (ChildN.DFSNumber == 0) { + // Mark that we should start at this child when next this node is the + // top of the stack. We don't start at the next child to ensure this + // child's lowlink is reflected. + DFSStack.push_back(std::make_pair(N, I)); + + // Continue, resetting to the child node. + ChildN.LowLink = ChildN.DFSNumber = NextDFSNumber++; + N = &ChildN; + I = ChildN.begin(); + E = ChildN.end(); + continue; + } + + // Track the lowest link of the children, if any are still in the stack. + // Any child not on the stack will have a LowLink of -1. + assert(ChildN.LowLink != 0 && + "Low-link must not be zero with a non-zero DFS number."); + if (ChildN.LowLink >= 0 && ChildN.LowLink < N->LowLink) + N->LowLink = ChildN.LowLink; + ++I; + } + + if (N->LowLink == N->DFSNumber) { + ResultSCCs.push_back(G->formSCC(N, PendingSCCStack)); + if (DFSStack.empty()) + return; + } else { + // At this point we know that N cannot ever be an SCC root. Its low-link + // is not its dfs-number, and we've processed all of its children. It is + // just sitting here waiting until some node further down the stack gets + // low-link == dfs-number and pops it off as well. Move it to the pending + // stack which is pulled into the next SCC to be formed. + PendingSCCStack.push_back(N); + + assert(!DFSStack.empty() && "We shouldn't have an empty stack!"); + } + + N = DFSStack.back().first; + I = DFSStack.back().second; + DFSStack.pop_back(); + } +} + +SmallVector<LazyCallGraph::SCC *, 1> +LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN, + Node &CalleeN) { + // First remove it from the node. + CallerN.removeEdgeInternal(CalleeN.getFunction()); + + // We return a list of the resulting *new* SCCs in postorder. + SmallVector<SCC *, 1> ResultSCCs; + + // Direct recursion doesn't impact the SCC graph at all. + if (&CallerN == &CalleeN) + return ResultSCCs; + + // The worklist is every node in the original SCC. + SmallVector<Node *, 1> Worklist; + Worklist.swap(Nodes); + for (Node *N : Worklist) { + // The nodes formerly in this SCC are no longer in any SCC. + N->DFSNumber = 0; + N->LowLink = 0; + G->SCCMap.erase(N); + } + assert(Worklist.size() > 1 && "We have to have at least two nodes to have an " + "edge between them that is within the SCC."); + + // The callee can already reach every node in this SCC (by definition). It is + // the only node we know will stay inside this SCC. Everything which + // transitively reaches Callee will also remain in the SCC. To model this we + // incrementally add any chain of nodes which reaches something in the new + // node set to the new node set. This short circuits one side of the Tarjan's + // walk. + insert(CalleeN); + + // We're going to do a full mini-Tarjan's walk using a local stack here. + SmallVector<std::pair<Node *, Node::iterator>, 4> DFSStack; + SmallVector<Node *, 4> PendingSCCStack; + do { + Node *N = Worklist.pop_back_val(); + if (N->DFSNumber == 0) + internalDFS(DFSStack, PendingSCCStack, N, ResultSCCs); + + assert(DFSStack.empty() && "Didn't flush the entire DFS stack!"); + assert(PendingSCCStack.empty() && "Didn't flush all pending SCC nodes!"); + } while (!Worklist.empty()); + + // Now we need to reconnect the current SCC to the graph. + bool IsLeafSCC = true; + for (Node *N : Nodes) { + for (Node &ChildN : *N) { + SCC &ChildSCC = *G->SCCMap.lookup(&ChildN); + if (&ChildSCC == this) + continue; + ChildSCC.ParentSCCs.insert(this); + IsLeafSCC = false; + } + } +#ifndef NDEBUG + if (!ResultSCCs.empty()) + assert(!IsLeafSCC && "This SCC cannot be a leaf as we have split out new " + "SCCs by removing this edge."); + if (!std::any_of(G->LeafSCCs.begin(), G->LeafSCCs.end(), + [&](SCC *C) { return C == this; })) + assert(!IsLeafSCC && "This SCC cannot be a leaf as it already had child " + "SCCs before we removed this edge."); +#endif + // If this SCC stopped being a leaf through this edge removal, remove it from + // the leaf SCC list. + if (!IsLeafSCC && !ResultSCCs.empty()) + G->LeafSCCs.erase(std::remove(G->LeafSCCs.begin(), G->LeafSCCs.end(), this), + G->LeafSCCs.end()); + + // Return the new list of SCCs. + return ResultSCCs; +} + +void LazyCallGraph::insertEdge(Node &CallerN, Function &Callee) { + assert(SCCMap.empty() && DFSStack.empty() && + "This method cannot be called after SCCs have been formed!"); + + return CallerN.insertEdgeInternal(Callee); +} + +void LazyCallGraph::removeEdge(Node &CallerN, Function &Callee) { + assert(SCCMap.empty() && DFSStack.empty() && + "This method cannot be called after SCCs have been formed!"); + + return CallerN.removeEdgeInternal(Callee); +} + +LazyCallGraph::Node &LazyCallGraph::insertInto(Function &F, Node *&MappedN) { + return *new (MappedN = BPA.Allocate()) Node(*this, F); +} + +void LazyCallGraph::updateGraphPtrs() { + // Process all nodes updating the graph pointers. + { + SmallVector<Node *, 16> Worklist; + for (auto &Entry : EntryNodes) + if (Node *EntryN = Entry.dyn_cast<Node *>()) + Worklist.push_back(EntryN); + + while (!Worklist.empty()) { + Node *N = Worklist.pop_back_val(); + N->G = this; + for (auto &Callee : N->Callees) + if (!Callee.isNull()) + if (Node *CalleeN = Callee.dyn_cast<Node *>()) + Worklist.push_back(CalleeN); + } + } + + // Process all SCCs updating the graph pointers. + { + SmallVector<SCC *, 16> Worklist(LeafSCCs.begin(), LeafSCCs.end()); + + while (!Worklist.empty()) { + SCC *C = Worklist.pop_back_val(); + C->G = this; + Worklist.insert(Worklist.end(), C->ParentSCCs.begin(), + C->ParentSCCs.end()); + } + } +} + +LazyCallGraph::SCC *LazyCallGraph::formSCC(Node *RootN, + SmallVectorImpl<Node *> &NodeStack) { + // The tail of the stack is the new SCC. Allocate the SCC and pop the stack + // into it. + SCC *NewSCC = new (SCCBPA.Allocate()) SCC(*this); + + while (!NodeStack.empty() && NodeStack.back()->DFSNumber > RootN->DFSNumber) { + assert(NodeStack.back()->LowLink >= RootN->LowLink && + "We cannot have a low link in an SCC lower than its root on the " + "stack!"); + NewSCC->insert(*NodeStack.pop_back_val()); + } + NewSCC->insert(*RootN); + + // A final pass over all edges in the SCC (this remains linear as we only + // do this once when we build the SCC) to connect it to the parent sets of + // its children. + bool IsLeafSCC = true; + for (Node *SCCN : NewSCC->Nodes) + for (Node &SCCChildN : *SCCN) { + SCC &ChildSCC = *SCCMap.lookup(&SCCChildN); + if (&ChildSCC == NewSCC) + continue; + ChildSCC.ParentSCCs.insert(NewSCC); + IsLeafSCC = false; + } + + // For the SCCs where we fine no child SCCs, add them to the leaf list. + if (IsLeafSCC) + LeafSCCs.push_back(NewSCC); + + return NewSCC; +} + +LazyCallGraph::SCC *LazyCallGraph::getNextSCCInPostOrder() { + Node *N; + Node::iterator I; + if (!DFSStack.empty()) { + N = DFSStack.back().first; + I = DFSStack.back().second; + DFSStack.pop_back(); + } else { + // If we've handled all candidate entry nodes to the SCC forest, we're done. + do { + if (SCCEntryNodes.empty()) + return nullptr; + + N = &get(*SCCEntryNodes.pop_back_val()); + } while (N->DFSNumber != 0); + I = N->begin(); + N->LowLink = N->DFSNumber = 1; + NextDFSNumber = 2; + } + + for (;;) { + assert(N->DFSNumber != 0 && "We should always assign a DFS number " + "before placing a node onto the stack."); + + Node::iterator E = N->end(); + while (I != E) { + Node &ChildN = *I; + if (ChildN.DFSNumber == 0) { + // Mark that we should start at this child when next this node is the + // top of the stack. We don't start at the next child to ensure this + // child's lowlink is reflected. + DFSStack.push_back(std::make_pair(N, N->begin())); + + // Recurse onto this node via a tail call. + assert(!SCCMap.count(&ChildN) && + "Found a node with 0 DFS number but already in an SCC!"); + ChildN.LowLink = ChildN.DFSNumber = NextDFSNumber++; + N = &ChildN; + I = ChildN.begin(); + E = ChildN.end(); + continue; + } + + // Track the lowest link of the children, if any are still in the stack. + assert(ChildN.LowLink != 0 && + "Low-link must not be zero with a non-zero DFS number."); + if (ChildN.LowLink >= 0 && ChildN.LowLink < N->LowLink) + N->LowLink = ChildN.LowLink; + ++I; + } + + if (N->LowLink == N->DFSNumber) + // Form the new SCC out of the top of the DFS stack. + return formSCC(N, PendingSCCStack); + + // At this point we know that N cannot ever be an SCC root. Its low-link + // is not its dfs-number, and we've processed all of its children. It is + // just sitting here waiting until some node further down the stack gets + // low-link == dfs-number and pops it off as well. Move it to the pending + // stack which is pulled into the next SCC to be formed. + PendingSCCStack.push_back(N); + + assert(!DFSStack.empty() && "We never found a viable root!"); + N = DFSStack.back().first; + I = DFSStack.back().second; + DFSStack.pop_back(); + } } char LazyCallGraphAnalysis::PassID; @@ -154,9 +687,9 @@ LazyCallGraphPrinterPass::LazyCallGraphPrinterPass(raw_ostream &OS) : OS(OS) {} static void printNodes(raw_ostream &OS, LazyCallGraph::Node &N, SmallPtrSetImpl<LazyCallGraph::Node *> &Printed) { // Recurse depth first through the nodes. - for (LazyCallGraph::Node *ChildN : N) - if (Printed.insert(ChildN)) - printNodes(OS, *ChildN, Printed); + for (LazyCallGraph::Node &ChildN : N) + if (Printed.insert(&ChildN)) + printNodes(OS, ChildN, Printed); OS << " Call edges in function: " << N.getFunction().getName() << "\n"; for (LazyCallGraph::iterator I = N.begin(), E = N.end(); I != E; ++I) @@ -165,6 +698,16 @@ static void printNodes(raw_ostream &OS, LazyCallGraph::Node &N, OS << "\n"; } +static void printSCC(raw_ostream &OS, LazyCallGraph::SCC &SCC) { + ptrdiff_t SCCSize = std::distance(SCC.begin(), SCC.end()); + OS << " SCC with " << SCCSize << " functions:\n"; + + for (LazyCallGraph::Node *N : SCC) + OS << " " << N->getFunction().getName() << "\n"; + + OS << "\n"; +} + PreservedAnalyses LazyCallGraphPrinterPass::run(Module *M, ModuleAnalysisManager *AM) { LazyCallGraph &G = AM->getResult<LazyCallGraphAnalysis>(M); @@ -173,9 +716,13 @@ PreservedAnalyses LazyCallGraphPrinterPass::run(Module *M, << "\n\n"; SmallPtrSet<LazyCallGraph::Node *, 16> Printed; - for (LazyCallGraph::Node *N : G) - if (Printed.insert(N)) - printNodes(OS, *N, Printed); + for (LazyCallGraph::Node &N : G) + if (Printed.insert(&N)) + printNodes(OS, N, Printed); + + for (LazyCallGraph::SCC &SCC : G.postorder_sccs()) + printSCC(OS, SCC); return PreservedAnalyses::all(); + } diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 3d6c583..9f919f7 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "lazy-value-info" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" @@ -34,6 +33,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "lazy-value-info" + char LazyValueInfo::ID = 0; INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info", "Lazy Value Information Analysis", false, true) @@ -82,7 +83,7 @@ class LVILatticeVal { ConstantRange Range; public: - LVILatticeVal() : Tag(undefined), Val(0), Range(1, true) {} + LVILatticeVal() : Tag(undefined), Val(nullptr), Range(1, true) {} static LVILatticeVal get(Constant *C) { LVILatticeVal Res; @@ -516,7 +517,7 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { BBLV.markOverdefined(); Instruction *BBI = dyn_cast<Instruction>(Val); - if (BBI == 0 || BBI->getParent() != BB) { + if (!BBI || BBI->getParent() != BB) { return ODCacheUpdater.markResult(solveBlockValueNonLocal(BBLV, Val, BB)); } @@ -595,7 +596,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, Value *UnderlyingVal = GetUnderlyingObject(Val); // If 'GetUnderlyingObject' didn't converge, skip it. It won't converge // inside InstructionDereferencesPointer either. - if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, NULL, 1)) { + if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, nullptr, 1)) { for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { if (InstructionDereferencesPointer(BI, UnderlyingVal)) { @@ -813,7 +814,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, // Recognize the range checking idiom that InstCombine produces. // (X-C1) u< C2 --> [C1, C1+C2) - ConstantInt *NegOffset = 0; + ConstantInt *NegOffset = nullptr; if (ICI->getPredicate() == ICmpInst::ICMP_ULT) match(ICI->getOperand(0), m_Add(m_Specific(Val), m_ConstantInt(NegOffset))); @@ -1014,7 +1015,7 @@ bool LazyValueInfo::runOnFunction(Function &F) { getCache(PImpl).clear(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfo>(); // Fully lazy. @@ -1030,7 +1031,7 @@ void LazyValueInfo::releaseMemory() { // If the cache was allocated, free it. if (PImpl) { delete &getCache(PImpl); - PImpl = 0; + PImpl = nullptr; } } @@ -1044,7 +1045,7 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) { if (const APInt *SingleVal = CR.getSingleElement()) return ConstantInt::get(V->getContext(), *SingleVal); } - return 0; + return nullptr; } /// getConstantOnEdge - Determine whether the specified value is known to be a @@ -1060,7 +1061,7 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, if (const APInt *SingleVal = CR.getSingleElement()) return ConstantInt::get(V->getContext(), *SingleVal); } - return 0; + return nullptr; } /// getPredicateOnEdge - Determine whether the specified value comparison @@ -1072,7 +1073,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB); // If we know the value is a constant, evaluate the conditional. - Constant *Res = 0; + Constant *Res = nullptr; if (Result.isConstant()) { Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, DL, TLI); diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp index fefa516..016f8c5 100644 --- a/lib/Analysis/LibCallAliasAnalysis.cpp +++ b/lib/Analysis/LibCallAliasAnalysis.cpp @@ -54,7 +54,7 @@ LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI, // if we have detailed info and if 'P' is any of the locations we know // about. const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails; - if (Details == 0) + if (Details == nullptr) return MRInfo; // If the details array is of the 'DoesNot' kind, we only know something if diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp index 0592ccb..7d4e254 100644 --- a/lib/Analysis/LibCallSemantics.cpp +++ b/lib/Analysis/LibCallSemantics.cpp @@ -46,11 +46,11 @@ LibCallInfo::getFunctionInfo(const Function *F) const { /// If this is the first time we are querying for this info, lazily construct /// the StringMap to index it. - if (Map == 0) { + if (!Map) { Impl = Map = new StringMap<const LibCallFunctionInfo*>(); const LibCallFunctionInfo *Array = getFunctionInfoArray(); - if (Array == 0) return 0; + if (!Array) return nullptr; // We now have the array of entries. Populate the StringMap. for (unsigned i = 0; Array[i].Name; ++i) diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index b2182b1..b14f329 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -137,8 +137,8 @@ namespace { // that failed. This provides a nice place to put a breakpoint if you want // to see why something is not correct. void CheckFailed(const Twine &Message, - const Value *V1 = 0, const Value *V2 = 0, - const Value *V3 = 0, const Value *V4 = 0) { + const Value *V1 = nullptr, const Value *V2 = nullptr, + const Value *V3 = nullptr, const Value *V4 = nullptr) { MessagesStr << Message.str() << "\n"; WriteValue(V1); WriteValue(V2); @@ -177,7 +177,7 @@ bool Lint::runOnFunction(Function &F) { AA = &getAnalysis<AliasAnalysis>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfo>(); visit(F); dbgs() << MessagesStr.str(); @@ -199,7 +199,7 @@ void Lint::visitCallSite(CallSite CS) { Value *Callee = CS.getCalledValue(); visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize, - 0, 0, MemRef::Callee); + 0, nullptr, MemRef::Callee); if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) { Assert1(CS.getCallingConv() == F->getCallingConv(), @@ -275,10 +275,10 @@ void Lint::visitCallSite(CallSite CS) { MemCpyInst *MCI = cast<MemCpyInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize, - MCI->getAlignment(), 0, + MCI->getAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize, - MCI->getAlignment(), 0, + MCI->getAlignment(), nullptr, MemRef::Read); // Check that the memcpy arguments don't overlap. The AliasAnalysis API @@ -299,10 +299,10 @@ void Lint::visitCallSite(CallSite CS) { MemMoveInst *MMI = cast<MemMoveInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize, - MMI->getAlignment(), 0, + MMI->getAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize, - MMI->getAlignment(), 0, + MMI->getAlignment(), nullptr, MemRef::Read); break; } @@ -310,7 +310,7 @@ void Lint::visitCallSite(CallSite CS) { MemSetInst *MSI = cast<MemSetInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize, - MSI->getAlignment(), 0, + MSI->getAlignment(), nullptr, MemRef::Write); break; } @@ -321,17 +321,17 @@ void Lint::visitCallSite(CallSite CS) { &I); visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, - 0, 0, MemRef::Read | MemRef::Write); + 0, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::vacopy: visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, - 0, 0, MemRef::Write); + 0, nullptr, MemRef::Write); visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize, - 0, 0, MemRef::Read); + 0, nullptr, MemRef::Read); break; case Intrinsic::vaend: visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, - 0, 0, MemRef::Read | MemRef::Write); + 0, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::stackrestore: @@ -339,7 +339,7 @@ void Lint::visitCallSite(CallSite CS) { // stack pointer, which the compiler may read from or write to // at any time, so check it for both readability and writeability. visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, - 0, 0, MemRef::Read | MemRef::Write); + 0, nullptr, MemRef::Read | MemRef::Write); break; } } @@ -513,7 +513,7 @@ static bool isZero(Value *V, const DataLayout *DL) { if (!VecTy) { unsigned BitWidth = V->getType()->getIntegerBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(V, KnownZero, KnownOne, DL); + computeKnownBits(V, KnownZero, KnownOne, DL); return KnownZero.isAllOnesValue(); } @@ -534,7 +534,7 @@ static bool isZero(Value *V, const DataLayout *DL) { return true; APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(Elem, KnownZero, KnownOne, DL); + computeKnownBits(Elem, KnownZero, KnownOne, DL); if (KnownZero.isAllOnesValue()) return true; } @@ -572,13 +572,13 @@ void Lint::visitAllocaInst(AllocaInst &I) { } void Lint::visitVAArgInst(VAArgInst &I) { - visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, 0, - MemRef::Read | MemRef::Write); + visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, + nullptr, MemRef::Read | MemRef::Write); } void Lint::visitIndirectBrInst(IndirectBrInst &I) { - visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, 0, - MemRef::Branchee); + visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, + nullptr, MemRef::Branchee); Assert1(I.getNumDestinations() != 0, "Undefined behavior: indirectbr with no destinations", &I); diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 0902a39..005d309 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -62,7 +62,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, if (ByteOffset < 0) // out of bounds return false; - Type *BaseType = 0; + Type *BaseType = nullptr; unsigned BaseAlign = 0; if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { // An alloca is safe to load from as load as it is suitably aligned. @@ -161,7 +161,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, ScanFrom++; // Don't scan huge blocks. - if (MaxInstsToScan-- == 0) return 0; + if (MaxInstsToScan-- == 0) return nullptr; --ScanFrom; // If this is a load of Ptr, the loaded value is available. @@ -198,7 +198,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // Otherwise the store that may or may not alias the pointer, bail out. ++ScanFrom; - return 0; + return nullptr; } // If this is some other instruction that may clobber Ptr, bail out. @@ -211,11 +211,11 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // May modify the pointer, bail out. ++ScanFrom; - return 0; + return nullptr; } } // Got to the start of the block, we didn't find it, but are done for this // block. - return 0; + return nullptr; } diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index b38672e..46c0eaa 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -141,21 +141,21 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, PHINode *Loop::getCanonicalInductionVariable() const { BasicBlock *H = getHeader(); - BasicBlock *Incoming = 0, *Backedge = 0; + BasicBlock *Incoming = nullptr, *Backedge = nullptr; pred_iterator PI = pred_begin(H); assert(PI != pred_end(H) && "Loop must have at least one backedge!"); Backedge = *PI++; - if (PI == pred_end(H)) return 0; // dead loop + if (PI == pred_end(H)) return nullptr; // dead loop Incoming = *PI++; - if (PI != pred_end(H)) return 0; // multiple backedges? + if (PI != pred_end(H)) return nullptr; // multiple backedges? if (contains(Incoming)) { if (contains(Backedge)) - return 0; + return nullptr; std::swap(Incoming, Backedge); } else if (!contains(Backedge)) - return 0; + return nullptr; // Loop over all of the PHI nodes, looking for a canonical indvar. for (BasicBlock::iterator I = H->begin(); isa<PHINode>(I); ++I) { @@ -171,7 +171,7 @@ PHINode *Loop::getCanonicalInductionVariable() const { if (CI->equalsInt(1)) return PN; } - return 0; + return nullptr; } /// isLCSSAForm - Return true if the Loop is in LCSSA form @@ -232,7 +232,7 @@ bool Loop::isSafeToClone() const { } MDNode *Loop::getLoopID() const { - MDNode *LoopID = 0; + MDNode *LoopID = nullptr; if (isLoopSimplifyForm()) { LoopID = getLoopLatch()->getTerminator()->getMetadata(LoopMDName); } else { @@ -241,7 +241,7 @@ MDNode *Loop::getLoopID() const { BasicBlock *H = getHeader(); for (block_iterator I = block_begin(), IE = block_end(); I != IE; ++I) { TerminatorInst *TI = (*I)->getTerminator(); - MDNode *MD = 0; + MDNode *MD = nullptr; // Check if this terminator branches to the loop header. for (unsigned i = 0, ie = TI->getNumSuccessors(); i != ie; ++i) { @@ -251,17 +251,17 @@ MDNode *Loop::getLoopID() const { } } if (!MD) - return 0; + return nullptr; if (!LoopID) LoopID = MD; else if (MD != LoopID) - return 0; + return nullptr; } } if (!LoopID || LoopID->getNumOperands() == 0 || LoopID->getOperand(0) != LoopID) - return 0; + return nullptr; return LoopID; } @@ -402,7 +402,7 @@ BasicBlock *Loop::getUniqueExitBlock() const { getUniqueExitBlocks(UniqueExitBlocks); if (UniqueExitBlocks.size() == 1) return UniqueExitBlocks[0]; - return 0; + return nullptr; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -548,7 +548,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { // is considered uninitialized. Loop *NearLoop = BBLoop; - Loop *Subloop = 0; + Loop *Subloop = nullptr; if (NearLoop != Unloop && Unloop->contains(NearLoop)) { Subloop = NearLoop; // Find the subloop ancestor that is directly contained within Unloop. @@ -564,7 +564,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { succ_iterator I = succ_begin(BB), E = succ_end(BB); if (I == E) { assert(!Subloop && "subloop blocks must have a successor"); - NearLoop = 0; // unloop blocks may now exit the function. + NearLoop = nullptr; // unloop blocks may now exit the function. } for (; I != E; ++I) { if (*I == BB) @@ -637,7 +637,7 @@ void LoopInfo::updateUnloop(Loop *Unloop) { // Blocks no longer have a parent but are still referenced by Unloop until // the Unloop object is deleted. - LI.changeLoopFor(*I, 0); + LI.changeLoopFor(*I, nullptr); } // Remove the loop from the top-level LoopInfo object. diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index 38e753f..8df18e7 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -15,10 +15,13 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" using namespace llvm; +#define DEBUG_TYPE "loop-pass-manager" + namespace { /// PrintLoopPass - Print a Function corresponding to a Loop. @@ -61,8 +64,8 @@ LPPassManager::LPPassManager() : FunctionPass(ID), PMDataManager() { skipThisLoop = false; redoThisLoop = false; - LI = NULL; - CurrentLoop = NULL; + LI = nullptr; + CurrentLoop = nullptr; } /// Delete loop from the loop queue and loop hierarchy (LoopInfo). @@ -251,6 +254,8 @@ bool LPPassManager::runOnFunction(Function &F) { // Then call the regular verifyAnalysis functions. verifyPreservedAnalysis(P); + + F.getContext().yield(); } removeNotPreservedAnalysis(P); diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp index bc1dc69..10da3d5 100644 --- a/lib/Analysis/MemDepPrinter.cpp +++ b/lib/Analysis/MemDepPrinter.cpp @@ -46,7 +46,7 @@ namespace { bool runOnFunction(Function &F) override; - void print(raw_ostream &OS, const Module * = 0) const override; + void print(raw_ostream &OS, const Module * = nullptr) const override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequiredTransitive<AliasAnalysis>(); @@ -56,7 +56,7 @@ namespace { void releaseMemory() override { Deps.clear(); - F = 0; + F = nullptr; } private: @@ -106,7 +106,7 @@ bool MemDepPrinter::runOnFunction(Function &F) { MemDepResult Res = MDA.getDependency(Inst); if (!Res.isNonLocal()) { Deps[Inst].insert(std::make_pair(getInstTypePair(Res), - static_cast<BasicBlock *>(0))); + static_cast<BasicBlock *>(nullptr))); } else if (CallSite CS = cast<Value>(Inst)) { const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI = MDA.getNonLocalCallDependency(CS); @@ -122,8 +122,8 @@ bool MemDepPrinter::runOnFunction(Function &F) { if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { if (!LI->isUnordered()) { // FIXME: Handle atomic/volatile loads. - Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown), - static_cast<BasicBlock *>(0))); + Deps[Inst].insert(std::make_pair(getInstTypePair(nullptr, Unknown), + static_cast<BasicBlock *>(nullptr))); continue; } AliasAnalysis::Location Loc = AA.getLocation(LI); @@ -131,8 +131,8 @@ bool MemDepPrinter::runOnFunction(Function &F) { } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { if (!SI->isUnordered()) { // FIXME: Handle atomic/volatile stores. - Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown), - static_cast<BasicBlock *>(0))); + Deps[Inst].insert(std::make_pair(getInstTypePair(nullptr, Unknown), + static_cast<BasicBlock *>(nullptr))); continue; } AliasAnalysis::Location Loc = AA.getLocation(SI); diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 1dba323..64d339f 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "memory-builtins" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" @@ -30,6 +29,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "memory-builtins" + enum AllocType { OpNewLike = 1<<0, // allocates; never returns null MallocLike = 1<<1 | OpNewLike, // allocates; may return null @@ -76,14 +77,14 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) { CallSite CS(const_cast<Value*>(V)); if (!CS.getInstruction()) - return 0; + return nullptr; if (CS.isNoBuiltin()) - return 0; + return nullptr; Function *Callee = CS.getCalledFunction(); if (!Callee || !Callee->isDeclaration()) - return 0; + return nullptr; return Callee; } @@ -94,17 +95,17 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, bool LookThroughBitCast = false) { // Skip intrinsics if (isa<IntrinsicInst>(V)) - return 0; + return nullptr; Function *Callee = getCalledFunction(V, LookThroughBitCast); if (!Callee) - return 0; + return nullptr; // Make sure that the function is available. StringRef FnName = Callee->getName(); LibFunc::Func TLIFn; if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) - return 0; + return nullptr; unsigned i = 0; bool found = false; @@ -115,11 +116,11 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, } } if (!found) - return 0; + return nullptr; const AllocFnsTy *FnData = &AllocationFnData[i]; if ((FnData->AllocTy & AllocTy) != FnData->AllocTy) - return 0; + return nullptr; // Check function prototype. int FstParam = FnData->FstParam; @@ -135,7 +136,7 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, FTy->getParamType(SndParam)->isIntegerTy(32) || FTy->getParamType(SndParam)->isIntegerTy(64))) return FnData; - return 0; + return nullptr; } static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) { @@ -202,19 +203,19 @@ bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI, /// ignore InvokeInst here. const CallInst *llvm::extractMallocCall(const Value *I, const TargetLibraryInfo *TLI) { - return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : 0; + return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : nullptr; } static Value *computeArraySize(const CallInst *CI, const DataLayout *DL, const TargetLibraryInfo *TLI, bool LookThroughSExt = false) { if (!CI) - return 0; + return nullptr; // The size of the malloc's result type must be known to determine array size. Type *T = getMallocAllocatedType(CI, TLI); if (!T || !T->isSized() || !DL) - return 0; + return nullptr; unsigned ElementSize = DL->getTypeAllocSize(T); if (StructType *ST = dyn_cast<StructType>(T)) @@ -223,12 +224,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *DL, // If malloc call's arg can be determined to be a multiple of ElementSize, // return the multiple. Otherwise, return NULL. Value *MallocArg = CI->getArgOperand(0); - Value *Multiple = 0; + Value *Multiple = nullptr; if (ComputeMultiple(MallocArg, ElementSize, Multiple, LookThroughSExt)) return Multiple; - return 0; + return nullptr; } /// isArrayMalloc - Returns the corresponding CallInst if the instruction @@ -245,7 +246,7 @@ const CallInst *llvm::isArrayMalloc(const Value *I, return CI; // CI is a non-array malloc or we can't figure out that it is an array malloc. - return 0; + return nullptr; } /// getMallocType - Returns the PointerType resulting from the malloc call. @@ -257,7 +258,7 @@ PointerType *llvm::getMallocType(const CallInst *CI, const TargetLibraryInfo *TLI) { assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call"); - PointerType *MallocType = 0; + PointerType *MallocType = nullptr; unsigned NumOfBitCastUses = 0; // Determine if CallInst has a bitcast use. @@ -277,7 +278,7 @@ PointerType *llvm::getMallocType(const CallInst *CI, return cast<PointerType>(CI->getType()); // Type could not be determined. - return 0; + return nullptr; } /// getMallocAllocatedType - Returns the Type allocated by malloc call. @@ -288,7 +289,7 @@ PointerType *llvm::getMallocType(const CallInst *CI, Type *llvm::getMallocAllocatedType(const CallInst *CI, const TargetLibraryInfo *TLI) { PointerType *PT = getMallocType(CI, TLI); - return PT ? PT->getElementType() : 0; + return PT ? PT->getElementType() : nullptr; } /// getMallocArraySize - Returns the array size of a malloc call. If the @@ -308,7 +309,7 @@ Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *DL, /// is a calloc call. const CallInst *llvm::extractCallocCall(const Value *I, const TargetLibraryInfo *TLI) { - return isCallocLikeFn(I, TLI) ? cast<CallInst>(I) : 0; + return isCallocLikeFn(I, TLI) ? cast<CallInst>(I) : nullptr; } @@ -316,15 +317,15 @@ const CallInst *llvm::extractCallocCall(const Value *I, const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { const CallInst *CI = dyn_cast<CallInst>(I); if (!CI || isa<IntrinsicInst>(CI)) - return 0; + return nullptr; Function *Callee = CI->getCalledFunction(); - if (Callee == 0 || !Callee->isDeclaration()) - return 0; + if (Callee == nullptr || !Callee->isDeclaration()) + return nullptr; StringRef FnName = Callee->getName(); LibFunc::Func TLIFn; if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) - return 0; + return nullptr; unsigned ExpectedNumParams; if (TLIFn == LibFunc::free || @@ -335,18 +336,18 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { TLIFn == LibFunc::ZdaPvRKSt9nothrow_t) // delete[](void*, nothrow) ExpectedNumParams = 2; else - return 0; + return nullptr; // Check free prototype. // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin // attribute will exist. FunctionType *FTy = Callee->getFunctionType(); if (!FTy->getReturnType()->isVoidTy()) - return 0; + return nullptr; if (FTy->getNumParams() != ExpectedNumParams) - return 0; + return nullptr; if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext())) - return 0; + return nullptr; return CI; } diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 015ded1..9eaf109 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "memdep" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" @@ -33,6 +32,8 @@ #include "llvm/Support/Debug.h" using namespace llvm; +#define DEBUG_TYPE "memdep" + STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses"); STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses"); STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses"); @@ -88,10 +89,10 @@ void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool MemoryDependenceAnalysis::runOnFunction(Function &) { AA = &getAnalysis<AliasAnalysis>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DT = DTWP ? &DTWP->getDomTree() : 0; + DT = DTWP ? &DTWP->getDomTree() : nullptr; if (!PredCache) PredCache.reset(new PredIteratorCache()); return false; @@ -261,10 +262,10 @@ isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, const LoadInst *LI, const DataLayout *DL) { // If we have no target data, we can't do this. - if (DL == 0) return false; + if (!DL) return false; // If we haven't already computed the base/offset of MemLoc, do so now. - if (MemLocBase == 0) + if (!MemLocBase) MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL); unsigned Size = MemoryDependenceAnalysis:: @@ -362,13 +363,13 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst) { - const Value *MemLocBase = 0; + const Value *MemLocBase = nullptr; int64_t MemLocOffset = 0; unsigned Limit = BlockScanLimit; bool isInvariantLoad = false; if (isLoad && QueryInst) { LoadInst *LI = dyn_cast<LoadInst>(QueryInst); - if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != 0) + if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr) isInvariantLoad = true; } @@ -696,7 +697,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB) --Entry; - NonLocalDepEntry *ExistingResult = 0; + NonLocalDepEntry *ExistingResult = nullptr; if (Entry != Cache.begin()+NumSortedEntries && Entry->getBB() == DirtyBB) { // If we already have an entry, and if it isn't already dirty, the block @@ -807,7 +808,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, if (Entry != Cache->begin() && (Entry-1)->getBB() == BB) --Entry; - NonLocalDepEntry *ExistingResult = 0; + NonLocalDepEntry *ExistingResult = nullptr; if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB) ExistingResult = &*Entry; @@ -960,7 +961,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, if (CacheInfo->TBAATag != Loc.TBAATag) { if (CacheInfo->TBAATag) { CacheInfo->Pair = BBSkipFirstBlockPair(); - CacheInfo->TBAATag = 0; + CacheInfo->TBAATag = nullptr; for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(), DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI) if (Instruction *Inst = DI->getResult().getInst()) @@ -1116,7 +1117,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, SortNonLocalDepInfoCache(*Cache, NumSortedEntries); NumSortedEntries = Cache->size(); } - Cache = 0; + Cache = nullptr; PredList.clear(); for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { @@ -1126,7 +1127,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // Get the PHI translated pointer in this predecessor. This can fail if // not translatable, in which case the getAddr() returns null. PHITransAddr &PredPointer = PredList.back().second; - PredPointer.PHITranslateValue(BB, Pred, 0); + PredPointer.PHITranslateValue(BB, Pred, nullptr); Value *PredPtrVal = PredPointer.getAddr(); @@ -1175,7 +1176,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // predecessor, then we have to assume that the pointer is clobbered in // that predecessor. We can still do PRE of the load, which would insert // a computation of the pointer in this predecessor. - if (PredPtrVal == 0) + if (!PredPtrVal) CanTranslate = false; // FIXME: it is entirely possible that PHI translating will end up with @@ -1224,7 +1225,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // for the given block. It assumes that we haven't modified any of // our datastructures while processing the current block. - if (Cache == 0) { + if (!Cache) { // Refresh the CacheInfo/Cache pointer if it got invalidated. CacheInfo = &NonLocalPointerDeps[CacheKey]; Cache = &CacheInfo->NonLocalDeps; @@ -1279,7 +1280,7 @@ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) { for (unsigned i = 0, e = PInfo.size(); i != e; ++i) { Instruction *Target = PInfo[i].getResult().getInst(); - if (Target == 0) continue; // Ignore non-local dep results. + if (!Target) continue; // Ignore non-local dep results. assert(Target->getParent() == PInfo[i].getBB()); // Eliminating the dirty entry from 'Cache', so update the reverse info. diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp index 0c119d6..4e11e50 100644 --- a/lib/Analysis/NoAliasAnalysis.cpp +++ b/lib/Analysis/NoAliasAnalysis.cpp @@ -36,7 +36,7 @@ namespace { // Note: NoAA does not call InitializeAliasAnalysis because it's // special and does not support chaining. DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; } AliasResult alias(const Location &LocA, const Location &LocB) override { diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index ad3685a..bfe8642 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -43,7 +43,7 @@ static bool CanPHITrans(Instruction *Inst) { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void PHITransAddr::dump() const { - if (Addr == 0) { + if (!Addr) { dbgs() << "PHITransAddr: null\n"; return; } @@ -58,7 +58,7 @@ static bool VerifySubExpr(Value *Expr, SmallVectorImpl<Instruction*> &InstInputs) { // If this is a non-instruction value, there is nothing to do. Instruction *I = dyn_cast<Instruction>(Expr); - if (I == 0) return true; + if (!I) return true; // If it's an instruction, it is either in Tmp or its operands recursively // are. @@ -90,7 +90,7 @@ static bool VerifySubExpr(Value *Expr, /// structure is valid, it returns true. If invalid, it prints errors and /// returns false. bool PHITransAddr::Verify() const { - if (Addr == 0) return true; + if (!Addr) return true; SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end()); @@ -116,14 +116,14 @@ bool PHITransAddr::IsPotentiallyPHITranslatable() const { // If the input value is not an instruction, or if it is not defined in CurBB, // then we don't need to phi translate it. Instruction *Inst = dyn_cast<Instruction>(Addr); - return Inst == 0 || CanPHITrans(Inst); + return !Inst || CanPHITrans(Inst); } static void RemoveInstInputs(Value *V, SmallVectorImpl<Instruction*> &InstInputs) { Instruction *I = dyn_cast<Instruction>(V); - if (I == 0) return; + if (!I) return; // If the instruction is in the InstInputs list, remove it. SmallVectorImpl<Instruction*>::iterator Entry = @@ -147,7 +147,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, const DominatorTree *DT) { // If this is a non-instruction value, it can't require PHI translation. Instruction *Inst = dyn_cast<Instruction>(V); - if (Inst == 0) return V; + if (!Inst) return V; // Determine whether 'Inst' is an input to our PHI translatable expression. bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst); @@ -173,7 +173,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, // If this is a non-phi value, and it is analyzable, we can incorporate it // into the expression by making all instruction operands be inputs. if (!CanPHITrans(Inst)) - return 0; + return nullptr; // All instruction operands are now inputs (and of course, they may also be // defined in this block, so they may need to be phi translated themselves. @@ -187,9 +187,9 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, // operands need to be phi translated, and if so, reconstruct it. if (CastInst *Cast = dyn_cast<CastInst>(Inst)) { - if (!isSafeToSpeculativelyExecute(Cast)) return 0; + if (!isSafeToSpeculativelyExecute(Cast)) return nullptr; Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT); - if (PHIIn == 0) return 0; + if (!PHIIn) return nullptr; if (PHIIn == Cast->getOperand(0)) return Cast; @@ -209,7 +209,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, (!DT || DT->dominates(CastI->getParent(), PredBB))) return CastI; } - return 0; + return nullptr; } // Handle getelementptr with at least one PHI translatable operand. @@ -218,7 +218,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, bool AnyChanged = false; for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT); - if (GEPOp == 0) return 0; + if (!GEPOp) return nullptr; AnyChanged |= GEPOp != GEP->getOperand(i); GEPOps.push_back(GEPOp); @@ -253,7 +253,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, return GEPI; } } - return 0; + return nullptr; } // Handle add with a constant RHS. @@ -265,7 +265,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap(); Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT); - if (LHS == 0) return 0; + if (!LHS) return nullptr; // If the PHI translated LHS is an add of a constant, fold the immediates. if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS)) @@ -304,11 +304,11 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, return BO; } - return 0; + return nullptr; } // Otherwise, we failed. - return 0; + return nullptr; } @@ -326,10 +326,10 @@ bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, // Make sure the value is live in the predecessor. if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr)) if (!DT->dominates(Inst->getParent(), PredBB)) - Addr = 0; + Addr = nullptr; } - return Addr == 0; + return Addr == nullptr; } /// PHITranslateWithInsertion - PHI translate this value into the specified @@ -354,7 +354,7 @@ PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB, // If not, destroy any intermediate instructions inserted. while (NewInsts.size() != NISize) NewInsts.pop_back_val()->eraseFromParent(); - return 0; + return nullptr; } @@ -379,10 +379,10 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, // Handle cast of PHI translatable value. if (CastInst *Cast = dyn_cast<CastInst>(Inst)) { - if (!isSafeToSpeculativelyExecute(Cast)) return 0; + if (!isSafeToSpeculativelyExecute(Cast)) return nullptr; Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0), CurBB, PredBB, DT, NewInsts); - if (OpVal == 0) return 0; + if (!OpVal) return nullptr; // Otherwise insert a cast at the end of PredBB. CastInst *New = CastInst::Create(Cast->getOpcode(), @@ -400,7 +400,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { Value *OpVal = InsertPHITranslatedSubExpr(GEP->getOperand(i), CurBB, PredBB, DT, NewInsts); - if (OpVal == 0) return 0; + if (!OpVal) return nullptr; GEPOps.push_back(OpVal); } @@ -436,5 +436,5 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, } #endif - return 0; + return nullptr; } diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp index f23833a..6d92909 100644 --- a/lib/Analysis/PostDominators.cpp +++ b/lib/Analysis/PostDominators.cpp @@ -11,8 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "postdomtree" - #include "llvm/Analysis/PostDominators.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" @@ -22,6 +20,8 @@ #include "llvm/Support/GenericDomTreeConstruction.h" using namespace llvm; +#define DEBUG_TYPE "postdomtree" + //===----------------------------------------------------------------------===// // PostDominatorTree Implementation //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp index f4da598..7f88ae1 100644 --- a/lib/Analysis/RegionInfo.cpp +++ b/lib/Analysis/RegionInfo.cpp @@ -9,7 +9,6 @@ // Detects single entry single exit regions in the control flow graph. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "region" #include "llvm/Analysis/RegionInfo.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" @@ -19,10 +18,13 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include <algorithm> +#include <iterator> #include <set> using namespace llvm; +#define DEBUG_TYPE "region" + // Always verify if expensive checking is enabled. #ifdef XDEBUG static bool VerifyRegionInfo = true; @@ -62,9 +64,6 @@ Region::~Region() { // Only clean the cache for this Region. Caches of child Regions will be // cleaned when the child Regions are deleted. BBNodeMap.clear(); - - for (iterator I = begin(), E = end(); I != E; ++I) - delete *I; } void Region::replaceEntry(BasicBlock *BB) { @@ -88,7 +87,7 @@ void Region::replaceEntryRecursive(BasicBlock *NewEntry) { R->replaceEntry(NewEntry); for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI) if ((*RI)->getEntry() == OldEntry) - RegionQueue.push_back(*RI); + RegionQueue.push_back(RI->get()); } } @@ -104,7 +103,7 @@ void Region::replaceExitRecursive(BasicBlock *NewExit) { R->replaceExit(NewExit); for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI) if ((*RI)->getExit() == OldExit) - RegionQueue.push_back(*RI); + RegionQueue.push_back(RI->get()); } } @@ -128,8 +127,8 @@ bool Region::contains(const Loop *L) const { // BBs that are not part of any loop are element of the Loop // described by the NULL pointer. This loop is not part of any region, // except if the region describes the whole function. - if (L == 0) - return getExit() == 0; + if (!L) + return getExit() == nullptr; if (!contains(L->getHeader())) return false; @@ -147,7 +146,7 @@ bool Region::contains(const Loop *L) const { Loop *Region::outermostLoopInRegion(Loop *L) const { if (!contains(L)) - return 0; + return nullptr; while (L && contains(L->getParentLoop())) { L = L->getParentLoop(); @@ -165,14 +164,14 @@ Loop *Region::outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const { BasicBlock *Region::getEnteringBlock() const { BasicBlock *entry = getEntry(); BasicBlock *Pred; - BasicBlock *enteringBlock = 0; + BasicBlock *enteringBlock = nullptr; for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE; ++PI) { Pred = *PI; if (DT->getNode(Pred) && !contains(Pred)) { if (enteringBlock) - return 0; + return nullptr; enteringBlock = Pred; } @@ -184,17 +183,17 @@ BasicBlock *Region::getEnteringBlock() const { BasicBlock *Region::getExitingBlock() const { BasicBlock *exit = getExit(); BasicBlock *Pred; - BasicBlock *exitingBlock = 0; + BasicBlock *exitingBlock = nullptr; if (!exit) - return 0; + return nullptr; for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE; ++PI) { Pred = *PI; if (contains(Pred)) { if (exitingBlock) - return 0; + return nullptr; exitingBlock = Pred; } @@ -295,7 +294,7 @@ Region* Region::getSubRegionNode(BasicBlock *BB) const { Region *R = RI->getRegionFor(BB); if (!R || R == this) - return 0; + return nullptr; // If we pass the BB out of this region, that means our code is broken. assert(contains(R) && "BB not in current region!"); @@ -304,7 +303,7 @@ Region* Region::getSubRegionNode(BasicBlock *BB) const { R = R->getParent(); if (R->getEntry() != BB) - return 0; + return nullptr; return R; } @@ -333,18 +332,20 @@ RegionNode* Region::getNode(BasicBlock *BB) const { void Region::transferChildrenTo(Region *To) { for (iterator I = begin(), E = end(); I != E; ++I) { (*I)->parent = To; - To->children.push_back(*I); + To->children.push_back(std::move(*I)); } children.clear(); } void Region::addSubRegion(Region *SubRegion, bool moveChildren) { - assert(SubRegion->parent == 0 && "SubRegion already has a parent!"); - assert(std::find(begin(), end(), SubRegion) == children.end() - && "Subregion already exists!"); + assert(!SubRegion->parent && "SubRegion already has a parent!"); + assert(std::find_if(begin(), end(), [&](const std::unique_ptr<Region> &R) { + return R.get() == SubRegion; + }) == children.end() && + "Subregion already exists!"); SubRegion->parent = this; - children.push_back(SubRegion); + children.push_back(std::unique_ptr<Region>(SubRegion)); if (!moveChildren) return; @@ -360,23 +361,27 @@ void Region::addSubRegion(Region *SubRegion, bool moveChildren) { RI->setRegionFor(BB, SubRegion); } - std::vector<Region*> Keep; + std::vector<std::unique_ptr<Region>> Keep; for (iterator I = begin(), E = end(); I != E; ++I) - if (SubRegion->contains(*I) && *I != SubRegion) { - SubRegion->children.push_back(*I); + if (SubRegion->contains(I->get()) && I->get() != SubRegion) { (*I)->parent = SubRegion; + SubRegion->children.push_back(std::move(*I)); } else - Keep.push_back(*I); + Keep.push_back(std::move(*I)); children.clear(); - children.insert(children.begin(), Keep.begin(), Keep.end()); + children.insert(children.begin(), + std::move_iterator<RegionSet::iterator>(Keep.begin()), + std::move_iterator<RegionSet::iterator>(Keep.end())); } Region *Region::removeSubRegion(Region *Child) { assert(Child->parent == this && "Child is not a child of this region!"); - Child->parent = 0; - RegionSet::iterator I = std::find(children.begin(), children.end(), Child); + Child->parent = nullptr; + RegionSet::iterator I = std::find_if( + children.begin(), children.end(), + [&](const std::unique_ptr<Region> &R) { return R.get() == Child; }); assert(I != children.end() && "Region does not exit. Unable to remove."); children.erase(children.begin()+(I-begin())); return Child; @@ -385,7 +390,7 @@ Region *Region::removeSubRegion(Region *Child) { unsigned Region::getDepth() const { unsigned Depth = 0; - for (Region *R = parent; R != 0; R = R->parent) + for (Region *R = parent; R != nullptr; R = R->parent) ++Depth; return Depth; @@ -395,12 +400,12 @@ Region *Region::getExpandedRegion() const { unsigned NumSuccessors = exit->getTerminator()->getNumSuccessors(); if (NumSuccessors == 0) - return NULL; + return nullptr; for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit()); PI != PE; ++PI) if (!DT->dominates(getEntry(), *PI)) - return NULL; + return nullptr; Region *R = RI->getRegionFor(exit); @@ -408,7 +413,7 @@ Region *Region::getExpandedRegion() const { if (exit->getTerminator()->getNumSuccessors() == 1) return new Region(getEntry(), *succ_begin(exit), RI, DT); else - return NULL; + return nullptr; } while (R->getParent() && R->getParent()->getEntry() == exit) @@ -418,7 +423,7 @@ Region *Region::getExpandedRegion() const { for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit()); PI != PE; ++PI) if (!DT->dominates(R->getExit(), *PI)) - return NULL; + return nullptr; return new Region(getEntry(), R->getExit(), RI, DT); } @@ -577,7 +582,7 @@ Region *RegionInfo::createRegion(BasicBlock *entry, BasicBlock *exit) { assert(entry && exit && "entry and exit must not be null!"); if (isTrivialRegion(entry, exit)) - return 0; + return nullptr; Region *region = new Region(entry, exit, this, DT); BBtoRegion.insert(std::make_pair(entry, region)); @@ -600,7 +605,7 @@ void RegionInfo::findRegionsWithEntry(BasicBlock *entry, BBtoBBMap *ShortCut) { if (!N) return; - Region *lastRegion= 0; + Region *lastRegion= nullptr; BasicBlock *lastExit = entry; // As only a BasicBlock that postdominates entry can finish a region, walk the @@ -680,12 +685,12 @@ void RegionInfo::releaseMemory() { BBtoRegion.clear(); if (TopLevelRegion) delete TopLevelRegion; - TopLevelRegion = 0; + TopLevelRegion = nullptr; } RegionInfo::RegionInfo() : FunctionPass(ID) { initializeRegionInfoPass(*PassRegistry::getPassRegistry()); - TopLevelRegion = 0; + TopLevelRegion = nullptr; } RegionInfo::~RegionInfo() { @@ -710,7 +715,7 @@ bool RegionInfo::runOnFunction(Function &F) { PDT = &getAnalysis<PostDominatorTree>(); DF = &getAnalysis<DominanceFrontier>(); - TopLevelRegion = new Region(&F.getEntryBlock(), 0, this, DT, 0); + TopLevelRegion = new Region(&F.getEntryBlock(), nullptr, this, DT, nullptr); updateStatistics(TopLevelRegion); Calculate(F); @@ -744,7 +749,7 @@ void RegionInfo::verifyAnalysis() const { Region *RegionInfo::getRegionFor(BasicBlock *BB) const { BBtoRegionMap::const_iterator I= BBtoRegion.find(BB); - return I != BBtoRegion.end() ? I->second : 0; + return I != BBtoRegion.end() ? I->second : nullptr; } void RegionInfo::setRegionFor(BasicBlock *BB, Region *R) { @@ -756,7 +761,7 @@ Region *RegionInfo::operator[](BasicBlock *BB) const { } BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const { - BasicBlock *Exit = NULL; + BasicBlock *Exit = nullptr; while (true) { // Get largest region that starts at BB. diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index 12d7ca3..3c7798f 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -17,10 +17,11 @@ #include "llvm/Analysis/RegionIterator.h" #include "llvm/Support/Timer.h" -#define DEBUG_TYPE "regionpassmgr" #include "llvm/Support/Debug.h" using namespace llvm; +#define DEBUG_TYPE "regionpassmgr" + //===----------------------------------------------------------------------===// // RGPassManager // @@ -31,15 +32,15 @@ RGPassManager::RGPassManager() : FunctionPass(ID), PMDataManager() { skipThisRegion = false; redoThisRegion = false; - RI = NULL; - CurrentRegion = NULL; + RI = nullptr; + CurrentRegion = nullptr; } // Recurse through all subregions and all regions into RQ. -static void addRegionIntoQueue(Region *R, std::deque<Region *> &RQ) { - RQ.push_back(R); - for (Region::iterator I = R->begin(), E = R->end(); I != E; ++I) - addRegionIntoQueue(*I, RQ); +static void addRegionIntoQueue(Region &R, std::deque<Region *> &RQ) { + RQ.push_back(&R); + for (const auto &E : R) + addRegionIntoQueue(*E, RQ); } /// Pass Manager itself does not invalidate any analysis info. @@ -57,7 +58,7 @@ bool RGPassManager::runOnFunction(Function &F) { // Collect inherited analysis from Module level pass manager. populateInheritedAnalysis(TPM->activeStack); - addRegionIntoQueue(RI->getTopLevelRegion(), RQ); + addRegionIntoQueue(*RI->getTopLevelRegion(), RQ); if (RQ.empty()) // No regions, skip calling finalizers return false; @@ -185,7 +186,6 @@ private: public: static char ID; - PrintRegionPass() : RegionPass(ID), Out(dbgs()) {} PrintRegionPass(const std::string &B, raw_ostream &o) : RegionPass(ID), Banner(B), Out(o) {} diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp index 6467f47..893210a 100644 --- a/lib/Analysis/RegionPrinter.cpp +++ b/lib/Analysis/RegionPrinter.cpp @@ -98,31 +98,31 @@ struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> { // Print the cluster of the subregions. This groups the single basic blocks // and adds a different background color for each group. - static void printRegionCluster(const Region *R, GraphWriter<RegionInfo*> &GW, + static void printRegionCluster(const Region &R, GraphWriter<RegionInfo*> &GW, unsigned depth = 0) { raw_ostream &O = GW.getOStream(); - O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(R) + O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(&R) << " {\n"; O.indent(2 * (depth + 1)) << "label = \"\";\n"; - if (!onlySimpleRegions || R->isSimple()) { + if (!onlySimpleRegions || R.isSimple()) { O.indent(2 * (depth + 1)) << "style = filled;\n"; O.indent(2 * (depth + 1)) << "color = " - << ((R->getDepth() * 2 % 12) + 1) << "\n"; + << ((R.getDepth() * 2 % 12) + 1) << "\n"; } else { O.indent(2 * (depth + 1)) << "style = solid;\n"; O.indent(2 * (depth + 1)) << "color = " - << ((R->getDepth() * 2 % 12) + 2) << "\n"; + << ((R.getDepth() * 2 % 12) + 2) << "\n"; } - for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI) - printRegionCluster(*RI, GW, depth + 1); + for (Region::const_iterator RI = R.begin(), RE = R.end(); RI != RE; ++RI) + printRegionCluster(**RI, GW, depth + 1); - RegionInfo *RI = R->getRegionInfo(); + RegionInfo *RI = R.getRegionInfo(); - for (const auto &BB : R->blocks()) - if (RI->getRegionFor(BB) == R) + for (const auto &BB : R.blocks()) + if (RI->getRegionFor(BB) == &R) O.indent(2 * (depth + 1)) << "Node" << static_cast<const void*>(RI->getTopLevelRegion()->getBBNode(BB)) << ";\n"; @@ -134,7 +134,7 @@ struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> { GraphWriter<RegionInfo*> &GW) { raw_ostream &O = GW.getOStream(); O << "\tcolorscheme = \"paired12\"\n"; - printRegionCluster(RI->getTopLevelRegion(), GW, 4); + printRegionCluster(*RI->getTopLevelRegion(), GW, 4); } }; } //end namespace llvm diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 08de621..42a7aa2 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -58,7 +58,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "scalar-evolution" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -89,6 +88,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "scalar-evolution" + STATISTIC(NumArrayLenItCounts, "Number of trip counts computed with array length"); STATISTIC(NumTripCountsComputed, @@ -182,7 +183,7 @@ void SCEV::print(raw_ostream &OS) const { case scUMaxExpr: case scSMaxExpr: { const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this); - const char *OpStr = 0; + const char *OpStr = nullptr; switch (NAry->getSCEVType()) { case scAddExpr: OpStr = " + "; break; case scMulExpr: OpStr = " * "; break; @@ -312,7 +313,7 @@ const SCEV *ScalarEvolution::getConstant(ConstantInt *V) { FoldingSetNodeID ID; ID.AddInteger(scConstant); ID.AddPointer(V); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V); UniqueSCEVs.InsertNode(S, IP); @@ -365,7 +366,7 @@ void SCEVUnknown::deleted() { SE->UniqueSCEVs.RemoveNode(this); // Release the value. - setValPtr(0); + setValPtr(nullptr); } void SCEVUnknown::allUsesReplacedWith(Value *New) { @@ -829,7 +830,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, ID.AddInteger(scTruncate); ID.AddPointer(Op); ID.AddPointer(Ty); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; // Fold if the operand is constant. @@ -919,7 +920,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, ID.AddInteger(scZeroExtend); ID.AddPointer(Op); ID.AddPointer(Ty); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; // zext(trunc(x)) --> zext(x) or x or trunc(x) @@ -1072,7 +1073,7 @@ static const SCEV *getOverflowLimitForStep(const SCEV *Step, return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - SE->getSignedRange(Step).getSignedMin()); } - return 0; + return nullptr; } // The recurrence AR has been shown to have no signed wrap. Typically, if we can @@ -1091,19 +1092,18 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, // Check for a simple looking step prior to loop entry. const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start); if (!SA) - return 0; + return nullptr; // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV // subtraction is expensive. For this purpose, perform a quick and dirty // difference, by checking for Step in the operand list. SmallVector<const SCEV *, 4> DiffOps; - for (SCEVAddExpr::op_iterator I = SA->op_begin(), E = SA->op_end(); - I != E; ++I) { - if (*I != Step) - DiffOps.push_back(*I); - } + for (const SCEV *Op : SA->operands()) + if (Op != Step) + DiffOps.push_back(Op); + if (DiffOps.size() == SA->getNumOperands()) - return 0; + return nullptr; // This is a postinc AR. Check for overflow on the preinc recurrence using the // same three conditions that getSignExtendedExpr checks. @@ -1139,7 +1139,7 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) { return PreStart; } - return 0; + return nullptr; } // Get the normalized sign-extended expression for this AddRec's Start. @@ -1181,7 +1181,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, ID.AddInteger(scSignExtend); ID.AddPointer(Op); ID.AddPointer(Ty); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; // If the input value is provably positive, build a zext instead. @@ -1201,6 +1201,23 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, return getTruncateOrSignExtend(X, Ty); } + // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2 + if (auto SA = dyn_cast<SCEVAddExpr>(Op)) { + if (SA->getNumOperands() == 2) { + auto SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0)); + auto SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1)); + if (SMul && SC1) { + if (auto SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) { + const APInt &C1 = SC1->getValue()->getValue(); + const APInt &C2 = SC2->getValue()->getValue(); + if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && + C2.ugt(C1) && C2.isPowerOf2()) + return getAddExpr(getSignExtendExpr(SC1, Ty), + getSignExtendExpr(SMul, Ty)); + } + } + } + } // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can sign extend all of the // operands (often constants). This allows analysis of something like @@ -1292,6 +1309,22 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, L, AR->getNoWrapFlags()); } } + // If Start and Step are constants, check if we can apply this + // transformation: + // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2 + auto SC1 = dyn_cast<SCEVConstant>(Start); + auto SC2 = dyn_cast<SCEVConstant>(Step); + if (SC1 && SC2) { + const APInt &C1 = SC1->getValue()->getValue(); + const APInt &C2 = SC2->getValue()->getValue(); + if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) && + C2.isPowerOf2()) { + Start = getSignExtendExpr(Start, Ty); + const SCEV *NewAR = getAddRecExpr(getConstant(AR->getType(), 0), Step, + L, AR->getNoWrapFlags()); + return getAddExpr(Start, getSignExtendExpr(NewAR, Ty)); + } + } } // The cast wasn't folded; create an explicit cast node. @@ -1340,9 +1373,8 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, // Force the cast to be folded into the operands of an addrec. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) { SmallVector<const SCEV *, 4> Ops; - for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); - I != E; ++I) - Ops.push_back(getAnyExtendExpr(*I, Ty)); + for (const SCEV *Op : AR->operands()) + Ops.push_back(getAnyExtendExpr(Op, Ty)); return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW); } @@ -1811,7 +1843,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, ID.AddInteger(scAddExpr); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); - void *IP = 0; + void *IP = nullptr; SCEVAddExpr *S = static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { @@ -2105,7 +2137,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, ID.AddInteger(scMulExpr); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); - void *IP = 0; + void *IP = nullptr; SCEVMulExpr *S = static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { @@ -2230,7 +2262,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, ID.AddInteger(scUDivExpr); ID.AddPointer(LHS); ID.AddPointer(RHS); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), LHS, RHS); @@ -2425,7 +2457,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, for (unsigned i = 0, e = Operands.size(); i != e; ++i) ID.AddPointer(Operands[i]); ID.AddPointer(L); - void *IP = 0; + void *IP = nullptr; SCEVAddRecExpr *S = static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { @@ -2533,7 +2565,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { ID.AddInteger(scSMaxExpr); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); @@ -2637,7 +2669,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { ID.AddInteger(scUMaxExpr); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); @@ -2704,7 +2736,7 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) { FoldingSetNodeID ID; ID.AddInteger(scUnknown); ID.AddPointer(V); - void *IP = 0; + void *IP = nullptr; if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) { assert(cast<SCEVUnknown>(S)->getValue() == V && "Stale SCEVUnknown in uniquing map!"); @@ -3010,7 +3042,7 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { return getPointerBase(Cast->getOperand()); } else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) { - const SCEV *PtrOp = 0; + const SCEV *PtrOp = nullptr; for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); I != E; ++I) { if ((*I)->getType()->isPointerTy()) { @@ -3090,20 +3122,20 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // The loop may have multiple entrances or multiple exits; we can analyze // this phi as an addrec if it has a unique entry value and a unique // backedge value. - Value *BEValueV = 0, *StartValueV = 0; + Value *BEValueV = nullptr, *StartValueV = nullptr; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *V = PN->getIncomingValue(i); if (L->contains(PN->getIncomingBlock(i))) { if (!BEValueV) { BEValueV = V; } else if (BEValueV != V) { - BEValueV = 0; + BEValueV = nullptr; break; } } else if (!StartValueV) { StartValueV = V; } else if (StartValueV != V) { - StartValueV = 0; + StartValueV = nullptr; break; } } @@ -3363,7 +3395,7 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { // For a SCEVUnknown, ask ValueTracking. unsigned BitWidth = getTypeSizeInBits(U->getType()); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - ComputeMaskedBits(U->getValue(), Zeros, Ones); + computeKnownBits(U->getValue(), Zeros, Ones); return Zeros.countTrailingOnes(); } @@ -3502,7 +3534,7 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { // For a SCEVUnknown, ask ValueTracking. APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - ComputeMaskedBits(U->getValue(), Zeros, Ones, DL); + computeKnownBits(U->getValue(), Zeros, Ones, DL); if (Ones == ~Zeros + 1) return setUnsignedRange(U, ConservativeResult); return setUnsignedRange(U, @@ -3755,13 +3787,13 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // Instcombine's ShrinkDemandedConstant may strip bits out of // constants, obscuring what would otherwise be a low-bits mask. - // Use ComputeMaskedBits to compute what ShrinkDemandedConstant + // Use computeKnownBits to compute what ShrinkDemandedConstant // knew about to reconstruct a low-bits mask value. unsigned LZ = A.countLeadingZeros(); unsigned TZ = A.countTrailingZeros(); unsigned BitWidth = A.getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, DL); + computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); @@ -4316,9 +4348,9 @@ ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute(); assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info"); - const SCEV *BECount = 0; + const SCEV *BECount = nullptr; for (const ExitNotTakenInfo *ENT = &ExitNotTaken; - ENT != 0; ENT = ENT->getNextExit()) { + ENT != nullptr; ENT = ENT->getNextExit()) { assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV"); @@ -4336,7 +4368,7 @@ const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock, ScalarEvolution *SE) const { for (const ExitNotTakenInfo *ENT = &ExitNotTaken; - ENT != 0; ENT = ENT->getNextExit()) { + ENT != nullptr; ENT = ENT->getNextExit()) { if (ENT->ExitingBlock == ExitingBlock) return ENT->ExactNotTaken; @@ -4359,7 +4391,7 @@ bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, return false; for (const ExitNotTakenInfo *ENT = &ExitNotTaken; - ENT != 0; ENT = ENT->getNextExit()) { + ENT != nullptr; ENT = ENT->getNextExit()) { if (ENT->ExactNotTaken != SE->getCouldNotCompute() && SE->hasOperand(ENT->ExactNotTaken, S)) { @@ -4398,8 +4430,8 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( /// clear - Invalidate this result and free the ExitNotTakenInfo array. void ScalarEvolution::BackedgeTakenInfo::clear() { - ExitNotTaken.ExitingBlock = 0; - ExitNotTaken.ExactNotTaken = 0; + ExitNotTaken.ExitingBlock = nullptr; + ExitNotTaken.ExactNotTaken = nullptr; delete[] ExitNotTaken.getNextExit(); } @@ -4410,38 +4442,63 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { SmallVector<BasicBlock *, 8> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - // Examine all exits and pick the most conservative values. - const SCEV *MaxBECount = getCouldNotCompute(); + SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts; bool CouldComputeBECount = true; BasicBlock *Latch = L->getLoopLatch(); // may be NULL. - const SCEV *LatchMaxCount = 0; - SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts; + const SCEV *MustExitMaxBECount = nullptr; + const SCEV *MayExitMaxBECount = nullptr; + + // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts + // and compute maxBECount. for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { - ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]); + BasicBlock *ExitBB = ExitingBlocks[i]; + ExitLimit EL = ComputeExitLimit(L, ExitBB); + + // 1. For each exit that can be computed, add an entry to ExitCounts. + // CouldComputeBECount is true only if all exits can be computed. if (EL.Exact == getCouldNotCompute()) // We couldn't compute an exact value for this exit, so // we won't be able to compute an exact value for the loop. CouldComputeBECount = false; else - ExitCounts.push_back(std::make_pair(ExitingBlocks[i], EL.Exact)); - - if (MaxBECount == getCouldNotCompute()) - MaxBECount = EL.Max; - else if (EL.Max != getCouldNotCompute()) { - // We cannot take the "min" MaxBECount, because non-unit stride loops may - // skip some loop tests. Taking the max over the exits is sufficiently - // conservative. TODO: We could do better taking into consideration - // non-latch exits that dominate the latch. - if (EL.MustExit && ExitingBlocks[i] == Latch) - LatchMaxCount = EL.Max; - else - MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max); + ExitCounts.push_back(std::make_pair(ExitBB, EL.Exact)); + + // 2. Derive the loop's MaxBECount from each exit's max number of + // non-exiting iterations. Partition the loop exits into two kinds: + // LoopMustExits and LoopMayExits. + // + // A LoopMustExit meets two requirements: + // + // (a) Its ExitLimit.MustExit flag must be set which indicates that the exit + // test condition cannot be skipped (the tested variable has unit stride or + // the test is less-than or greater-than, rather than a strict inequality). + // + // (b) It must dominate the loop latch, hence must be tested on every loop + // iteration. + // + // If any computable LoopMustExit is found, then MaxBECount is the minimum + // EL.Max of computable LoopMustExits. Otherwise, MaxBECount is + // conservatively the maximum EL.Max, where CouldNotCompute is considered + // greater than any computable EL.Max. + if (EL.MustExit && EL.Max != getCouldNotCompute() && Latch && + DT->dominates(ExitBB, Latch)) { + if (!MustExitMaxBECount) + MustExitMaxBECount = EL.Max; + else { + MustExitMaxBECount = + getUMinFromMismatchedTypes(MustExitMaxBECount, EL.Max); + } + } else if (MayExitMaxBECount != getCouldNotCompute()) { + if (!MayExitMaxBECount || EL.Max == getCouldNotCompute()) + MayExitMaxBECount = EL.Max; + else { + MayExitMaxBECount = + getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.Max); + } } } - // Be more precise in the easy case of a loop latch that must exit. - if (LatchMaxCount) { - MaxBECount = getUMinFromMismatchedTypes(MaxBECount, LatchMaxCount); - } + const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount : + (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute()); return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount); } @@ -4454,7 +4511,7 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { // exit at this block and remember the exit block and whether all other targets // lead to the loop header. bool MustExecuteLoopHeader = true; - BasicBlock *Exit = 0; + BasicBlock *Exit = nullptr; for (succ_iterator SI = succ_begin(ExitingBlock), SE = succ_end(ExitingBlock); SI != SE; ++SI) if (!L->contains(*SI)) { @@ -4800,7 +4857,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( return getCouldNotCompute(); // Okay, we allow one non-constant index into the GEP instruction. - Value *VarIdx = 0; + Value *VarIdx = nullptr; std::vector<Constant*> Indexes; unsigned VarIdxNum = 0; for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i) @@ -4810,7 +4867,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's. VarIdx = GEP->getOperand(i); VarIdxNum = i-2; - Indexes.push_back(0); + Indexes.push_back(nullptr); } // Loop-invariant loads may be a byproduct of loop optimization. Skip them. @@ -4841,7 +4898,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(), Indexes); - if (Result == 0) break; // Cannot compute! + if (!Result) break; // Cannot compute! // Evaluate the condition for this iteration. Result = ConstantExpr::getICmp(predicate, Result, RHS); @@ -4902,14 +4959,14 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, // Otherwise, we can evaluate this instruction if all of its operands are // constant or derived from a PHI node themselves. - PHINode *PHI = 0; + PHINode *PHI = nullptr; for (Instruction::op_iterator OpI = UseInst->op_begin(), OpE = UseInst->op_end(); OpI != OpE; ++OpI) { if (isa<Constant>(*OpI)) continue; Instruction *OpInst = dyn_cast<Instruction>(*OpI); - if (!OpInst || !canConstantEvolve(OpInst, L)) return 0; + if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr; PHINode *P = dyn_cast<PHINode>(OpInst); if (!P) @@ -4923,8 +4980,10 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap); PHIMap[OpInst] = P; } - if (P == 0) return 0; // Not evolving from PHI - if (PHI && PHI != P) return 0; // Evolving from multiple different PHIs. + if (!P) + return nullptr; // Not evolving from PHI + if (PHI && PHI != P) + return nullptr; // Evolving from multiple different PHIs. PHI = P; } // This is a expression evolving from a constant PHI! @@ -4938,7 +4997,7 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, /// constraints, return null. static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { Instruction *I = dyn_cast<Instruction>(V); - if (I == 0 || !canConstantEvolve(I, L)) return 0; + if (!I || !canConstantEvolve(I, L)) return nullptr; if (PHINode *PN = dyn_cast<PHINode>(I)) { return PN; @@ -4960,18 +5019,18 @@ static Constant *EvaluateExpression(Value *V, const Loop *L, // Convenient constant check, but redundant for recursive calls. if (Constant *C = dyn_cast<Constant>(V)) return C; Instruction *I = dyn_cast<Instruction>(V); - if (!I) return 0; + if (!I) return nullptr; if (Constant *C = Vals.lookup(I)) return C; // An instruction inside the loop depends on a value outside the loop that we // weren't given a mapping for, or a value such as a call inside the loop. - if (!canConstantEvolve(I, L)) return 0; + if (!canConstantEvolve(I, L)) return nullptr; // An unmapped PHI can be due to a branch or another loop inside this loop, // or due to this not being the initial iteration through a loop where we // couldn't compute the evolution of this particular PHI last time. - if (isa<PHINode>(I)) return 0; + if (isa<PHINode>(I)) return nullptr; std::vector<Constant*> Operands(I->getNumOperands()); @@ -4979,12 +5038,12 @@ static Constant *EvaluateExpression(Value *V, const Loop *L, Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i)); if (!Operand) { Operands[i] = dyn_cast<Constant>(I->getOperand(i)); - if (!Operands[i]) return 0; + if (!Operands[i]) return nullptr; continue; } Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI); Vals[Operand] = C; - if (!C) return 0; + if (!C) return nullptr; Operands[i] = C; } @@ -5013,7 +5072,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, return I->second; if (BEs.ugt(MaxBruteForceIterations)) - return ConstantEvolutionLoopExitValue[PN] = 0; // Not going to evaluate it. + return ConstantEvolutionLoopExitValue[PN] = nullptr; // Not going to evaluate it. Constant *&RetVal = ConstantEvolutionLoopExitValue[PN]; @@ -5025,22 +5084,22 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, // entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - PHINode *PHI = 0; + PHINode *PHI = nullptr; for (BasicBlock::iterator I = Header->begin(); (PHI = dyn_cast<PHINode>(I)); ++I) { Constant *StartCST = dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge)); - if (StartCST == 0) continue; + if (!StartCST) continue; CurrentIterVals[PHI] = StartCST; } if (!CurrentIterVals.count(PN)) - return RetVal = 0; + return RetVal = nullptr; Value *BEValue = PN->getIncomingValue(SecondIsBackedge); // Execute the loop symbolically to determine the exit value. if (BEs.getActiveBits() >= 32) - return RetVal = 0; // More than 2^32-1 iterations?? Not doing it! + return RetVal = nullptr; // More than 2^32-1 iterations?? Not doing it! unsigned NumIterations = BEs.getZExtValue(); // must be in range unsigned IterationNum = 0; @@ -5053,8 +5112,8 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, DenseMap<Instruction *, Constant *> NextIterVals; Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI); - if (NextPHI == 0) - return 0; // Couldn't evaluate! + if (!NextPHI) + return nullptr; // Couldn't evaluate! NextIterVals[PN] = NextPHI; bool StoppedEvolving = NextPHI == CurrentIterVals[PN]; @@ -5101,7 +5160,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen) { PHINode *PN = getConstantEvolvingPHI(Cond, L); - if (PN == 0) return getCouldNotCompute(); + if (!PN) return getCouldNotCompute(); // If the loop is canonicalized, the PHI will have exactly two entries. // That's the only form we support here. @@ -5114,12 +5173,12 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // One entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - PHINode *PHI = 0; + PHINode *PHI = nullptr; for (BasicBlock::iterator I = Header->begin(); (PHI = dyn_cast<PHINode>(I)); ++I) { Constant *StartCST = dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge)); - if (StartCST == 0) continue; + if (!StartCST) continue; CurrentIterVals[PHI] = StartCST; } if (!CurrentIterVals.count(PN)) @@ -5189,7 +5248,7 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { if (Values[u].first == L) return Values[u].second ? Values[u].second : V; } - Values.push_back(std::make_pair(L, static_cast<const SCEV *>(0))); + Values.push_back(std::make_pair(L, static_cast<const SCEV *>(nullptr))); // Otherwise compute it. const SCEV *C = computeSCEVAtScope(V, L); SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V]; @@ -5243,7 +5302,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { } for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) { Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i)); - if (!C2) return 0; + if (!C2) return nullptr; // First pointer! if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) { @@ -5258,7 +5317,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { // Don't bother trying to sum two pointers. We probably can't // statically compute a load that results from it anyway. if (C2->getType()->isPointerTy()) - return 0; + return nullptr; if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) { if (PTy->getElementType()->isStructTy()) @@ -5276,10 +5335,10 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { const SCEVMulExpr *SM = cast<SCEVMulExpr>(V); if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) { // Don't bother with pointers at all. - if (C->getType()->isPointerTy()) return 0; + if (C->getType()->isPointerTy()) return nullptr; for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) { Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i)); - if (!C2 || C2->getType()->isPointerTy()) return 0; + if (!C2 || C2->getType()->isPointerTy()) return nullptr; C = ConstantExpr::getMul(C, C2); } return C; @@ -5298,7 +5357,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { case scUMaxExpr: break; // TODO: smax, umax. } - return 0; + return nullptr; } const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { @@ -5365,7 +5424,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { // Check to see if getSCEVAtScope actually made an improvement. if (MadeImprovement) { - Constant *C = 0; + Constant *C = nullptr; if (const CmpInst *CI = dyn_cast<CmpInst>(I)) C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], Operands[1], DL, @@ -5697,7 +5756,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) { // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step. // We have not yet seen any such cases. const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step); - if (StepC == 0 || StepC->getValue()->equalsInt(0)) + if (!StepC || StepC->getValue()->equalsInt(0)) return getCouldNotCompute(); // For positive steps (counting up until unsigned overflow): @@ -6136,18 +6195,30 @@ bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, // If LHS or RHS is an addrec, check to see if the condition is true in // every iteration of the loop. - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) - if (isLoopEntryGuardedByCond( - AR->getLoop(), Pred, AR->getStart(), RHS) && - isLoopBackedgeGuardedByCond( - AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS)) - return true; - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) - if (isLoopEntryGuardedByCond( - AR->getLoop(), Pred, LHS, AR->getStart()) && - isLoopBackedgeGuardedByCond( - AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this))) - return true; + // If LHS and RHS are both addrec, both conditions must be true in + // every iteration of the loop. + const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS); + const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS); + bool LeftGuarded = false; + bool RightGuarded = false; + if (LAR) { + const Loop *L = LAR->getLoop(); + if (isLoopEntryGuardedByCond(L, Pred, LAR->getStart(), RHS) && + isLoopBackedgeGuardedByCond(L, Pred, LAR->getPostIncExpr(*this), RHS)) { + if (!RAR) return true; + LeftGuarded = true; + } + } + if (RAR) { + const Loop *L = RAR->getLoop(); + if (isLoopEntryGuardedByCond(L, Pred, LHS, RAR->getStart()) && + isLoopBackedgeGuardedByCond(L, Pred, LHS, RAR->getPostIncExpr(*this))) { + if (!LAR) return true; + RightGuarded = true; + } + } + if (LeftGuarded && RightGuarded) + return true; // Otherwise see what can be done with known constant ranges. return isKnownPredicateWithRanges(Pred, LHS, RHS); @@ -6814,6 +6885,105 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, return SE.getCouldNotCompute(); } +namespace { +struct FindUndefs { + bool Found; + FindUndefs() : Found(false) {} + + bool follow(const SCEV *S) { + if (const SCEVUnknown *C = dyn_cast<SCEVUnknown>(S)) { + if (isa<UndefValue>(C->getValue())) + Found = true; + } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { + if (isa<UndefValue>(C->getValue())) + Found = true; + } + + // Keep looking if we haven't found it yet. + return !Found; + } + bool isDone() const { + // Stop recursion if we have found an undef. + return Found; + } +}; +} + +// Return true when S contains at least an undef value. +static inline bool +containsUndefs(const SCEV *S) { + FindUndefs F; + SCEVTraversal<FindUndefs> ST(F); + ST.visitAll(S); + + return F.Found; +} + +namespace { +// Collect all steps of SCEV expressions. +struct SCEVCollectStrides { + ScalarEvolution &SE; + SmallVectorImpl<const SCEV *> &Strides; + + SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S) + : SE(SE), Strides(S) {} + + bool follow(const SCEV *S) { + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) + Strides.push_back(AR->getStepRecurrence(SE)); + return true; + } + bool isDone() const { return false; } +}; + +// Collect all SCEVUnknown and SCEVMulExpr expressions. +struct SCEVCollectTerms { + SmallVectorImpl<const SCEV *> &Terms; + + SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) + : Terms(T) {} + + bool follow(const SCEV *S) { + if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S)) { + if (!containsUndefs(S)) + Terms.push_back(S); + + // Stop recursion: once we collected a term, do not walk its operands. + return false; + } + + // Keep looking. + return true; + } + bool isDone() const { return false; } +}; +} + +/// Find parametric terms in this SCEVAddRecExpr. +void SCEVAddRecExpr::collectParametricTerms( + ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &Terms) const { + SmallVector<const SCEV *, 4> Strides; + SCEVCollectStrides StrideCollector(SE, Strides); + visitAll(this, StrideCollector); + + DEBUG({ + dbgs() << "Strides:\n"; + for (const SCEV *S : Strides) + dbgs() << *S << "\n"; + }); + + for (const SCEV *S : Strides) { + SCEVCollectTerms TermCollector(Terms); + visitAll(S, TermCollector); + } + + DEBUG({ + dbgs() << "Terms:\n"; + for (const SCEV *T : Terms) + dbgs() << *T << "\n"; + }); +} + static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) { APInt A = C1->getValue()->getValue(); APInt B = C2->getValue()->getValue(); @@ -6843,351 +7013,479 @@ static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) { } namespace { -struct SCEVGCD : public SCEVVisitor<SCEVGCD, const SCEV *> { -public: - // Pattern match Step into Start. When Step is a multiply expression, find - // the largest subexpression of Step that appears in Start. When Start is an - // add expression, try to match Step in the subexpressions of Start, non - // matching subexpressions are returned under Remainder. - static const SCEV *findGCD(ScalarEvolution &SE, const SCEV *Start, - const SCEV *Step, const SCEV **Remainder) { - assert(Remainder && "Remainder should not be NULL"); - SCEVGCD R(SE, Step, SE.getConstant(Step->getType(), 0)); - const SCEV *Res = R.visit(Start); - *Remainder = R.Remainder; - return Res; - } +struct FindSCEVSize { + int Size; + FindSCEVSize() : Size(0) {} - SCEVGCD(ScalarEvolution &S, const SCEV *G, const SCEV *R) - : SE(S), GCD(G), Remainder(R) { - Zero = SE.getConstant(GCD->getType(), 0); - One = SE.getConstant(GCD->getType(), 1); + bool follow(const SCEV *S) { + ++Size; + // Keep looking at all operands of S. + return true; } + bool isDone() const { + return false; + } +}; +} - const SCEV *visitConstant(const SCEVConstant *Constant) { - if (GCD == Constant || Constant == Zero) - return GCD; +// Returns the size of the SCEV S. +static inline int sizeOfSCEV(const SCEV *S) { + FindSCEVSize F; + SCEVTraversal<FindSCEVSize> ST(F); + ST.visitAll(S); + return F.Size; +} - if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD)) { - const SCEV *Res = SE.getConstant(gcd(Constant, CGCD)); - if (Res != One) - return Res; +namespace { - Remainder = SE.getConstant(srem(Constant, CGCD)); - Constant = cast<SCEVConstant>(SE.getMinusSCEV(Constant, Remainder)); - Res = SE.getConstant(gcd(Constant, CGCD)); - return Res; +struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> { +public: + // Computes the Quotient and Remainder of the division of Numerator by + // Denominator. + static void divide(ScalarEvolution &SE, const SCEV *Numerator, + const SCEV *Denominator, const SCEV **Quotient, + const SCEV **Remainder) { + assert(Numerator && Denominator && "Uninitialized SCEV"); + + SCEVDivision D(SE, Numerator, Denominator); + + // Check for the trivial case here to avoid having to check for it in the + // rest of the code. + if (Numerator == Denominator) { + *Quotient = D.One; + *Remainder = D.Zero; + return; } - // When GCD is not a constant, it could be that the GCD is an Add, Mul, - // AddRec, etc., in which case we want to find out how many times the - // Constant divides the GCD: we then return that as the new GCD. - const SCEV *Rem = Zero; - const SCEV *Res = findGCD(SE, GCD, Constant, &Rem); + if (Numerator->isZero()) { + *Quotient = D.Zero; + *Remainder = D.Zero; + return; + } - if (Res == One || Rem != Zero) { - Remainder = Constant; - return One; + // Split the Denominator when it is a product. + if (const SCEVMulExpr *T = dyn_cast<const SCEVMulExpr>(Denominator)) { + const SCEV *Q, *R; + *Quotient = Numerator; + for (const SCEV *Op : T->operands()) { + divide(SE, *Quotient, Op, &Q, &R); + *Quotient = Q; + + // Bail out when the Numerator is not divisible by one of the terms of + // the Denominator. + if (!R->isZero()) { + *Quotient = D.Zero; + *Remainder = Numerator; + return; + } + } + *Remainder = D.Zero; + return; } - assert(isa<SCEVConstant>(Res) && "Res should be a constant"); - Remainder = SE.getConstant(srem(Constant, cast<SCEVConstant>(Res))); - return Res; + D.visit(Numerator); + *Quotient = D.Quotient; + *Remainder = D.Remainder; + } + + SCEVDivision(ScalarEvolution &S, const SCEV *Numerator, const SCEV *Denominator) + : SE(S), Denominator(Denominator) { + Zero = SE.getConstant(Denominator->getType(), 0); + One = SE.getConstant(Denominator->getType(), 1); + + // By default, we don't know how to divide Expr by Denominator. + // Providing the default here simplifies the rest of the code. + Quotient = Zero; + Remainder = Numerator; + } + + // Except in the trivial case described above, we do not know how to divide + // Expr by Denominator for the following functions with empty implementation. + void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {} + void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {} + void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {} + void visitUDivExpr(const SCEVUDivExpr *Numerator) {} + void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {} + void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {} + void visitUnknown(const SCEVUnknown *Numerator) {} + void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {} + + void visitConstant(const SCEVConstant *Numerator) { + if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) { + Quotient = SE.getConstant(sdiv(Numerator, D)); + Remainder = SE.getConstant(srem(Numerator, D)); + return; + } } - const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; + void visitAddRecExpr(const SCEVAddRecExpr *Numerator) { + const SCEV *StartQ, *StartR, *StepQ, *StepR; + assert(Numerator->isAffine() && "Numerator should be affine"); + divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR); + divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR); + Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(), + Numerator->getNoWrapFlags()); + Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(), + Numerator->getNoWrapFlags()); } - const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; - } + void visitAddExpr(const SCEVAddExpr *Numerator) { + SmallVector<const SCEV *, 2> Qs, Rs; + Type *Ty = Denominator->getType(); - const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; - } + for (const SCEV *Op : Numerator->operands()) { + const SCEV *Q, *R; + divide(SE, Op, Denominator, &Q, &R); - const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { - if (GCD == Expr) - return GCD; + // Bail out if types do not match. + if (Ty != Q->getType() || Ty != R->getType()) { + Quotient = Zero; + Remainder = Numerator; + return; + } - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { - const SCEV *Rem = Zero; - const SCEV *Res = findGCD(SE, Expr->getOperand(e - 1 - i), GCD, &Rem); + Qs.push_back(Q); + Rs.push_back(R); + } - // FIXME: There may be ambiguous situations: for instance, - // GCD(-4 + (3 * %m), 2 * %m) where 2 divides -4 and %m divides (3 * %m). - // The order in which the AddExpr is traversed computes a different GCD - // and Remainder. - if (Res != One) - GCD = Res; - if (Rem != Zero) - Remainder = SE.getAddExpr(Remainder, Rem); + if (Qs.size() == 1) { + Quotient = Qs[0]; + Remainder = Rs[0]; + return; } - return GCD; + Quotient = SE.getAddExpr(Qs); + Remainder = SE.getAddExpr(Rs); } - const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { - if (GCD == Expr) - return GCD; + void visitMulExpr(const SCEVMulExpr *Numerator) { + SmallVector<const SCEV *, 2> Qs; + Type *Ty = Denominator->getType(); - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { - if (Expr->getOperand(i) == GCD) - return GCD; - } + bool FoundDenominatorTerm = false; + for (const SCEV *Op : Numerator->operands()) { + // Bail out if types do not match. + if (Ty != Op->getType()) { + Quotient = Zero; + Remainder = Numerator; + return; + } + + if (FoundDenominatorTerm) { + Qs.push_back(Op); + continue; + } - // If we have not returned yet, it means that GCD is not part of Expr. - const SCEV *PartialGCD = One; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { - const SCEV *Rem = Zero; - const SCEV *Res = findGCD(SE, Expr->getOperand(i), GCD, &Rem); - if (Rem != Zero) - // GCD does not divide Expr->getOperand(i). + // Check whether Denominator divides one of the product operands. + const SCEV *Q, *R; + divide(SE, Op, Denominator, &Q, &R); + if (!R->isZero()) { + Qs.push_back(Op); continue; + } - if (Res == GCD) - return GCD; - PartialGCD = SE.getMulExpr(PartialGCD, Res); - if (PartialGCD == GCD) - return GCD; - } - - if (PartialGCD != One) - return PartialGCD; - - Remainder = Expr; - const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(GCD); - if (!Mul) - return PartialGCD; - - // When the GCD is a multiply expression, try to decompose it: - // this occurs when Step does not divide the Start expression - // as in: {(-4 + (3 * %m)),+,(2 * %m)} - for (int i = 0, e = Mul->getNumOperands(); i < e; ++i) { - const SCEV *Rem = Zero; - const SCEV *Res = findGCD(SE, Expr, Mul->getOperand(i), &Rem); - if (Rem == Zero) { - Remainder = Rem; - return Res; + // Bail out if types do not match. + if (Ty != Q->getType()) { + Quotient = Zero; + Remainder = Numerator; + return; } + + FoundDenominatorTerm = true; + Qs.push_back(Q); } - return PartialGCD; - } + if (FoundDenominatorTerm) { + Remainder = Zero; + if (Qs.size() == 1) + Quotient = Qs[0]; + else + Quotient = SE.getMulExpr(Qs); + return; + } - const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; + if (!isa<SCEVUnknown>(Denominator)) { + Quotient = Zero; + Remainder = Numerator; + return; + } + + // The Remainder is obtained by replacing Denominator by 0 in Numerator. + ValueToValueMap RewriteMap; + RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] = + cast<SCEVConstant>(Zero)->getValue(); + Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); + + // Quotient is (Numerator - Remainder) divided by Denominator. + const SCEV *Q, *R; + const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder); + if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) { + // This SCEV does not seem to simplify: fail the division here. + Quotient = Zero; + Remainder = Numerator; + return; + } + divide(SE, Diff, Denominator, &Q, &R); + assert(R == Zero && + "(Numerator - Remainder) should evenly divide Denominator"); + Quotient = Q; } - const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { - if (GCD == Expr) - return GCD; +private: + ScalarEvolution &SE; + const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One; +}; +} - if (!Expr->isAffine()) { - Remainder = Expr; - return GCD; - } +static bool findArrayDimensionsRec(ScalarEvolution &SE, + SmallVectorImpl<const SCEV *> &Terms, + SmallVectorImpl<const SCEV *> &Sizes) { + int Last = Terms.size() - 1; + const SCEV *Step = Terms[Last]; - const SCEV *Rem = Zero; - const SCEV *Res = findGCD(SE, Expr->getOperand(0), GCD, &Rem); - if (Rem != Zero) - Remainder = SE.getAddExpr(Remainder, Rem); + // End of recursion. + if (Last == 0) { + if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) { + SmallVector<const SCEV *, 2> Qs; + for (const SCEV *Op : M->operands()) + if (!isa<SCEVConstant>(Op)) + Qs.push_back(Op); - Rem = Zero; - Res = findGCD(SE, Expr->getOperand(1), Res, &Rem); - if (Rem != Zero) { - Remainder = Expr; - return GCD; + Step = SE.getMulExpr(Qs); } - return Res; + Sizes.push_back(Step); + return true; } - const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; - } + for (const SCEV *&Term : Terms) { + // Normalize the terms before the next call to findArrayDimensionsRec. + const SCEV *Q, *R; + SCEVDivision::divide(SE, Term, Step, &Q, &R); - const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; - } + // Bail out when GCD does not evenly divide one of the terms. + if (!R->isZero()) + return false; - const SCEV *visitUnknown(const SCEVUnknown *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; + Term = Q; } - const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { - return One; - } + // Remove all SCEVConstants. + Terms.erase(std::remove_if(Terms.begin(), Terms.end(), [](const SCEV *E) { + return isa<SCEVConstant>(E); + }), + Terms.end()); -private: - ScalarEvolution &SE; - const SCEV *GCD, *Remainder, *Zero, *One; -}; + if (Terms.size() > 0) + if (!findArrayDimensionsRec(SE, Terms, Sizes)) + return false; -struct SCEVDivision : public SCEVVisitor<SCEVDivision, const SCEV *> { -public: - // Remove from Start all multiples of Step. - static const SCEV *divide(ScalarEvolution &SE, const SCEV *Start, - const SCEV *Step) { - SCEVDivision D(SE, Step); - const SCEV *Rem = D.Zero; - (void)Rem; - // The division is guaranteed to succeed: Step should divide Start with no - // remainder. - assert(Step == SCEVGCD::findGCD(SE, Start, Step, &Rem) && Rem == D.Zero && - "Step should divide Start with no remainder."); - return D.visit(Start); - } + Sizes.push_back(Step); + return true; +} + +namespace { +struct FindParameter { + bool FoundParameter; + FindParameter() : FoundParameter(false) {} - SCEVDivision(ScalarEvolution &S, const SCEV *G) : SE(S), GCD(G) { - Zero = SE.getConstant(GCD->getType(), 0); - One = SE.getConstant(GCD->getType(), 1); + bool follow(const SCEV *S) { + if (isa<SCEVUnknown>(S)) { + FoundParameter = true; + // Stop recursion: we found a parameter. + return false; + } + // Keep looking. + return true; } + bool isDone() const { + // Stop recursion if we have found a parameter. + return FoundParameter; + } +}; +} - const SCEV *visitConstant(const SCEVConstant *Constant) { - if (GCD == Constant) - return One; +// Returns true when S contains at least a SCEVUnknown parameter. +static inline bool +containsParameters(const SCEV *S) { + FindParameter F; + SCEVTraversal<FindParameter> ST(F); + ST.visitAll(S); - if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD)) - return SE.getConstant(sdiv(Constant, CGCD)); - return Constant; - } + return F.FoundParameter; +} - const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { - if (GCD == Expr) - return One; - return Expr; - } +// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter. +static inline bool +containsParameters(SmallVectorImpl<const SCEV *> &Terms) { + for (const SCEV *T : Terms) + if (containsParameters(T)) + return true; + return false; +} - const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { - if (GCD == Expr) - return One; - return Expr; - } +// Return the number of product terms in S. +static inline int numberOfTerms(const SCEV *S) { + if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S)) + return Expr->getNumOperands(); + return 1; +} - const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { - if (GCD == Expr) - return One; - return Expr; - } +static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) { + if (isa<SCEVConstant>(T)) + return nullptr; - const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { - if (GCD == Expr) - return One; + if (isa<SCEVUnknown>(T)) + return T; - SmallVector<const SCEV *, 2> Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(divide(SE, Expr->getOperand(i), GCD)); + if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) { + SmallVector<const SCEV *, 2> Factors; + for (const SCEV *Op : M->operands()) + if (!isa<SCEVConstant>(Op)) + Factors.push_back(Op); - if (Operands.size() == 1) - return Operands[0]; - return SE.getAddExpr(Operands); + return SE.getMulExpr(Factors); } - const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { - if (GCD == Expr) - return One; + return T; +} - bool FoundGCDTerm = false; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - if (Expr->getOperand(i) == GCD) - FoundGCDTerm = true; +/// Return the size of an element read or written by Inst. +const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) { + Type *Ty; + if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) + Ty = Store->getValueOperand()->getType(); + else if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) + Ty = Load->getPointerOperand()->getType(); + else + return nullptr; - SmallVector<const SCEV *, 2> Operands; - if (FoundGCDTerm) { - FoundGCDTerm = false; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { - if (FoundGCDTerm) - Operands.push_back(Expr->getOperand(i)); - else if (Expr->getOperand(i) == GCD) - FoundGCDTerm = true; - else - Operands.push_back(Expr->getOperand(i)); - } - } else { - const SCEV *PartialGCD = One; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { - if (PartialGCD == GCD) { - Operands.push_back(Expr->getOperand(i)); - continue; - } + Type *ETy = getEffectiveSCEVType(PointerType::getUnqual(Ty)); + return getSizeOfExpr(ETy, Ty); +} - const SCEV *Rem = Zero; - const SCEV *Res = SCEVGCD::findGCD(SE, Expr->getOperand(i), GCD, &Rem); - if (Rem == Zero) { - PartialGCD = SE.getMulExpr(PartialGCD, Res); - Operands.push_back(divide(SE, Expr->getOperand(i), GCD)); - } else { - Operands.push_back(Expr->getOperand(i)); - } - } - } +/// Second step of delinearization: compute the array dimensions Sizes from the +/// set of Terms extracted from the memory access function of this SCEVAddRec. +void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms, + SmallVectorImpl<const SCEV *> &Sizes, + const SCEV *ElementSize) const { - if (Operands.size() == 1) - return Operands[0]; - return SE.getMulExpr(Operands); - } + if (Terms.size() < 1) + return; - const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { - if (GCD == Expr) - return One; - return Expr; - } + // Early return when Terms do not contain parameters: we do not delinearize + // non parametric SCEVs. + if (!containsParameters(Terms)) + return; - const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { - if (GCD == Expr) - return One; + DEBUG({ + dbgs() << "Terms:\n"; + for (const SCEV *T : Terms) + dbgs() << *T << "\n"; + }); - assert(Expr->isAffine() && "Expr should be affine"); + // Remove duplicates. + std::sort(Terms.begin(), Terms.end()); + Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end()); - const SCEV *Start = divide(SE, Expr->getStart(), GCD); - const SCEV *Step = divide(SE, Expr->getStepRecurrence(SE), GCD); + // Put larger terms first. + std::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) { + return numberOfTerms(LHS) > numberOfTerms(RHS); + }); - return SE.getAddRecExpr(Start, Step, Expr->getLoop(), - Expr->getNoWrapFlags()); - } + ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); - const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { - if (GCD == Expr) - return One; - return Expr; + // Divide all terms by the element size. + for (const SCEV *&Term : Terms) { + const SCEV *Q, *R; + SCEVDivision::divide(SE, Term, ElementSize, &Q, &R); + Term = Q; } - const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { - if (GCD == Expr) - return One; - return Expr; - } + SmallVector<const SCEV *, 4> NewTerms; - const SCEV *visitUnknown(const SCEVUnknown *Expr) { - if (GCD == Expr) - return One; - return Expr; + // Remove constant factors. + for (const SCEV *T : Terms) + if (const SCEV *NewT = removeConstantFactors(SE, T)) + NewTerms.push_back(NewT); + + DEBUG({ + dbgs() << "Terms after sorting:\n"; + for (const SCEV *T : NewTerms) + dbgs() << *T << "\n"; + }); + + if (NewTerms.empty() || + !findArrayDimensionsRec(SE, NewTerms, Sizes)) { + Sizes.clear(); + return; } - const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { - return Expr; + // The last element to be pushed into Sizes is the size of an element. + Sizes.push_back(ElementSize); + + DEBUG({ + dbgs() << "Sizes:\n"; + for (const SCEV *S : Sizes) + dbgs() << *S << "\n"; + }); +} + +/// Third step of delinearization: compute the access functions for the +/// Subscripts based on the dimensions in Sizes. +void SCEVAddRecExpr::computeAccessFunctions( + ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &Subscripts, + SmallVectorImpl<const SCEV *> &Sizes) const { + + // Early exit in case this SCEV is not an affine multivariate function. + if (Sizes.empty() || !this->isAffine()) + return; + + const SCEV *Res = this; + int Last = Sizes.size() - 1; + for (int i = Last; i >= 0; i--) { + const SCEV *Q, *R; + SCEVDivision::divide(SE, Res, Sizes[i], &Q, &R); + + DEBUG({ + dbgs() << "Res: " << *Res << "\n"; + dbgs() << "Sizes[i]: " << *Sizes[i] << "\n"; + dbgs() << "Res divided by Sizes[i]:\n"; + dbgs() << "Quotient: " << *Q << "\n"; + dbgs() << "Remainder: " << *R << "\n"; + }); + + Res = Q; + + // Do not record the last subscript corresponding to the size of elements in + // the array. + if (i == Last) { + + // Bail out if the remainder is too complex. + if (isa<SCEVAddRecExpr>(R)) { + Subscripts.clear(); + Sizes.clear(); + return; + } + + continue; + } + + // Record the access function for the current subscript. + Subscripts.push_back(R); } -private: - ScalarEvolution &SE; - const SCEV *GCD, *Zero, *One; -}; + // Also push in last position the remainder of the last division: it will be + // the access function of the innermost dimension. + Subscripts.push_back(Res); + + std::reverse(Subscripts.begin(), Subscripts.end()); + + DEBUG({ + dbgs() << "Subscripts:\n"; + for (const SCEV *S : Subscripts) + dbgs() << *S << "\n"; + }); } /// Splits the SCEV into two vectors of SCEVs representing the subscripts and @@ -7239,84 +7537,40 @@ private: /// asking for the SCEV of the memory access with respect to all enclosing /// loops, calling SCEV->delinearize on that and printing the results. -const SCEV * -SCEVAddRecExpr::delinearize(ScalarEvolution &SE, - SmallVectorImpl<const SCEV *> &Subscripts, - SmallVectorImpl<const SCEV *> &Sizes) const { - // Early exit in case this SCEV is not an affine multivariate function. - if (!this->isAffine()) - return this; - - const SCEV *Start = this->getStart(); - const SCEV *Step = this->getStepRecurrence(SE); - - // Build the SCEV representation of the canonical induction variable in the - // loop of this SCEV. - const SCEV *Zero = SE.getConstant(this->getType(), 0); - const SCEV *One = SE.getConstant(this->getType(), 1); - const SCEV *IV = - SE.getAddRecExpr(Zero, One, this->getLoop(), this->getNoWrapFlags()); - - DEBUG(dbgs() << "(delinearize: " << *this << "\n"); - - // When the stride of this SCEV is 1, do not compute the GCD: the size of this - // subscript is 1, and this same SCEV for the access function. - const SCEV *Remainder = Zero; - const SCEV *GCD = One; - - // Find the GCD and Remainder of the Start and Step coefficients of this SCEV. - if (Step != One && !Step->isAllOnesValue()) - GCD = SCEVGCD::findGCD(SE, Start, Step, &Remainder); - - DEBUG(dbgs() << "GCD: " << *GCD << "\n"); - DEBUG(dbgs() << "Remainder: " << *Remainder << "\n"); - - const SCEV *Quotient = Start; - if (GCD != One && !GCD->isAllOnesValue()) - // As findGCD computed Remainder, GCD divides "Start - Remainder." The - // Quotient is then this SCEV without Remainder, scaled down by the GCD. The - // Quotient is what will be used in the next subscript delinearization. - Quotient = SCEVDivision::divide(SE, SE.getMinusSCEV(Start, Remainder), GCD); - - DEBUG(dbgs() << "Quotient: " << *Quotient << "\n"); - - const SCEV *Rem = Quotient; - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Quotient)) - // Recursively call delinearize on the Quotient until there are no more - // multiples that can be recognized. - Rem = AR->delinearize(SE, Subscripts, Sizes); - - // Scale up the canonical induction variable IV by whatever remains from the - // Step after division by the GCD: the GCD is the size of all the sub-array. - if (Step != One && !Step->isAllOnesValue() && GCD != One && - !GCD->isAllOnesValue() && Step != GCD) { - Step = SCEVDivision::divide(SE, Step, GCD); - IV = SE.getMulExpr(IV, Step); - } - // The access function in the current subscript is computed as the canonical - // induction variable IV (potentially scaled up by the step) and offset by - // Rem, the offset of delinearization in the sub-array. - const SCEV *Index = SE.getAddExpr(IV, Rem); - - // Record the access function and the size of the current subscript. - Subscripts.push_back(Index); - Sizes.push_back(GCD); +void SCEVAddRecExpr::delinearize(ScalarEvolution &SE, + SmallVectorImpl<const SCEV *> &Subscripts, + SmallVectorImpl<const SCEV *> &Sizes, + const SCEV *ElementSize) const { + // First step: collect parametric terms. + SmallVector<const SCEV *, 4> Terms; + collectParametricTerms(SE, Terms); -#ifndef NDEBUG - int Size = Sizes.size(); - DEBUG(dbgs() << "succeeded to delinearize " << *this << "\n"); - DEBUG(dbgs() << "ArrayDecl[UnknownSize]"); - for (int i = 0; i < Size - 1; i++) - DEBUG(dbgs() << "[" << *Sizes[i] << "]"); - DEBUG(dbgs() << " with elements of " << *Sizes[Size - 1] << " bytes.\n"); - - DEBUG(dbgs() << "ArrayRef"); - for (int i = 0; i < Size; i++) - DEBUG(dbgs() << "[" << *Subscripts[i] << "]"); - DEBUG(dbgs() << "\n)\n"); -#endif + if (Terms.empty()) + return; + + // Second step: find subscript sizes. + SE.findArrayDimensions(Terms, Sizes, ElementSize); + + if (Sizes.empty()) + return; + + // Third step: compute the access functions for each subscript. + computeAccessFunctions(SE, Subscripts, Sizes); + + if (Subscripts.empty()) + return; + + DEBUG({ + dbgs() << "succeeded to delinearize " << *this << "\n"; + dbgs() << "ArrayDecl[UnknownSize]"; + for (const SCEV *S : Sizes) + dbgs() << "[" << *S << "]"; - return Remainder; + dbgs() << "\nArrayRef"; + for (const SCEV *S : Subscripts) + dbgs() << "[" << *S << "]"; + dbgs() << "\n"; + }); } //===----------------------------------------------------------------------===// @@ -7368,7 +7622,8 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) //===----------------------------------------------------------------------===// ScalarEvolution::ScalarEvolution() - : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64), FirstUnknown(0) { + : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), + BlockDispositions(64), FirstUnknown(nullptr) { initializeScalarEvolutionPass(*PassRegistry::getPassRegistry()); } @@ -7376,7 +7631,7 @@ bool ScalarEvolution::runOnFunction(Function &F) { this->F = &F; LI = &getAnalysis<LoopInfo>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfo>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); return false; @@ -7387,7 +7642,7 @@ void ScalarEvolution::releaseMemory() { // destructors, so that they release their references to their values. for (SCEVUnknown *U = FirstUnknown; U; U = U->Next) U->~SCEVUnknown(); - FirstUnknown = 0; + FirstUnknown = nullptr; ValueExprMap.clear(); diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index 7be6aca..6933f74 100644 --- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -34,7 +34,7 @@ namespace { public: static char ID; // Class identification, replacement for typeinfo - ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) { + ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(nullptr) { initializeScalarEvolutionAliasAnalysisPass( *PassRegistry::getPassRegistry()); } @@ -102,7 +102,7 @@ ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) { return U->getValue(); } // No Identified object found. - return 0; + return nullptr; } AliasAnalysis::AliasResult @@ -162,10 +162,10 @@ ScalarEvolutionAliasAnalysis::alias(const Location &LocA, if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr)) if (alias(Location(AO ? AO : LocA.Ptr, AO ? +UnknownSize : LocA.Size, - AO ? 0 : LocA.TBAATag), + AO ? nullptr : LocA.TBAATag), Location(BO ? BO : LocB.Ptr, BO ? +UnknownSize : LocB.Size, - BO ? 0 : LocB.TBAATag)) == NoAlias) + BO ? nullptr : LocB.TBAATag)) == NoAlias) return NoAlias; // Forward the query to the next analysis. diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index fb3d595..b507043 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -44,7 +44,7 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, // not allowed to move it. BasicBlock::iterator BIP = Builder.GetInsertPoint(); - Instruction *Ret = NULL; + Instruction *Ret = nullptr; // Check to see if there is already a cast! for (User *U : V->users()) @@ -627,21 +627,21 @@ static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { // Test whether we've already computed the most relevant loop for this SCEV. std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair = - RelevantLoops.insert(std::make_pair(S, static_cast<const Loop *>(0))); + RelevantLoops.insert(std::make_pair(S, nullptr)); if (!Pair.second) return Pair.first->second; if (isa<SCEVConstant>(S)) // A constant has no relevant loops. - return 0; + return nullptr; if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { if (const Instruction *I = dyn_cast<Instruction>(U->getValue())) return Pair.first->second = SE.LI->getLoopFor(I->getParent()); // A non-instruction has no relevant loops. - return 0; + return nullptr; } if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) { - const Loop *L = 0; + const Loop *L = nullptr; if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) L = AR->getLoop(); for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end(); @@ -716,7 +716,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { // Emit instructions to add all the operands. Hoist as much as possible // out of loops, and form meaningful getelementptrs where possible. - Value *Sum = 0; + Value *Sum = nullptr; for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { const Loop *CurLoop = I->first; @@ -784,7 +784,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { // Emit instructions to mul all the operands. Hoist as much as possible // out of loops. - Value *Prod = 0; + Value *Prod = nullptr; for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { const SCEV *Op = I->second; @@ -892,18 +892,18 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, Instruction *InsertPos, bool allowScale) { if (IncV == InsertPos) - return NULL; + return nullptr; switch (IncV->getOpcode()) { default: - return NULL; + return nullptr; // Check for a simple Add/Sub or GEP of a loop invariant step. case Instruction::Add: case Instruction::Sub: { Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1)); if (!OInst || SE.DT->dominates(OInst, InsertPos)) return dyn_cast<Instruction>(IncV->getOperand(0)); - return NULL; + return nullptr; } case Instruction::BitCast: return dyn_cast<Instruction>(IncV->getOperand(0)); @@ -914,7 +914,7 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, continue; if (Instruction *OInst = dyn_cast<Instruction>(*I)) { if (!SE.DT->dominates(OInst, InsertPos)) - return NULL; + return nullptr; } if (allowScale) { // allow any kind of GEP as long as it can be hoisted. @@ -925,11 +925,11 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, // have 2 operands. i1* is used by the expander to represent an // address-size element. if (IncV->getNumOperands() != 2) - return NULL; + return nullptr; unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace(); if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) - return NULL; + return nullptr; break; } return dyn_cast<Instruction>(IncV->getOperand(0)); @@ -1077,9 +1077,9 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // Reuse a previously-inserted PHI, if present. BasicBlock *LatchBlock = L->getLoopLatch(); if (LatchBlock) { - PHINode *AddRecPhiMatch = 0; - Instruction *IncV = 0; - TruncTy = 0; + PHINode *AddRecPhiMatch = nullptr; + Instruction *IncV = nullptr; + TruncTy = nullptr; InvertStep = false; // Only try partially matching scevs that need truncation and/or @@ -1120,7 +1120,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // Stop if we have found an exact match SCEV. if (IsMatchingSCEV) { IncV = TempIncV; - TruncTy = 0; + TruncTy = nullptr; InvertStep = false; AddRecPhiMatch = PN; break; @@ -1243,13 +1243,13 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { PostIncLoopSet Loops; Loops.insert(L); Normalized = - cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0, - Loops, SE, *SE.DT)); + cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, nullptr, + nullptr, Loops, SE, *SE.DT)); } // Strip off any non-loop-dominating component from the addrec start. const SCEV *Start = Normalized->getStart(); - const SCEV *PostLoopOffset = 0; + const SCEV *PostLoopOffset = nullptr; if (!SE.properlyDominates(Start, L->getHeader())) { PostLoopOffset = Start; Start = SE.getConstant(Normalized->getType(), 0); @@ -1261,7 +1261,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Strip off any non-loop-dominating component from the addrec step. const SCEV *Step = Normalized->getStepRecurrence(SE); - const SCEV *PostLoopScale = 0; + const SCEV *PostLoopScale = nullptr; if (!SE.dominates(Step, L->getHeader())) { PostLoopScale = Step; Step = SE.getConstant(Normalized->getType(), 1); @@ -1276,7 +1276,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { Type *ExpandTy = PostLoopScale ? IntTy : STy; // In some cases, we decide to reuse an existing phi node but need to truncate // it and/or invert the step. - Type *TruncTy = 0; + Type *TruncTy = nullptr; bool InvertStep = false; PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy, TruncTy, InvertStep); @@ -1372,7 +1372,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { const Loop *L = S->getLoop(); // First check for an existing canonical IV in a suitable type. - PHINode *CanonicalIV = 0; + PHINode *CanonicalIV = nullptr; if (PHINode *PN = L->getCanonicalInductionVariable()) if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) CanonicalIV = PN; @@ -1393,7 +1393,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) || isa<LandingPadInst>(NewInsertPt)) ++NewInsertPt; - V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, + V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr, NewInsertPt); return V; } @@ -1666,7 +1666,8 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, // Emit code for it. BuilderType::InsertPointGuard Guard(Builder); - PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin())); + PHINode *V = cast<PHINode>(expandCodeFor(H, nullptr, + L->getHeader()->begin())); return V; } diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp index 1e4c0bd..e9db295 100644 --- a/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -113,7 +113,7 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) { // Transform each operand. for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); I != E; ++I) { - Operands.push_back(TransformSubExpr(*I, LUser, 0)); + Operands.push_back(TransformSubExpr(*I, LUser, nullptr)); } // Conservatively use AnyWrap until/unless we need FlagNW. const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp index 87a4fa4..edd82f5 100644 --- a/lib/Analysis/SparsePropagation.cpp +++ b/lib/Analysis/SparsePropagation.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sparseprop" #include "llvm/Analysis/SparsePropagation.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -21,6 +20,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "sparseprop" + //===----------------------------------------------------------------------===// // AbstractLatticeFunction Implementation //===----------------------------------------------------------------------===// @@ -147,7 +148,7 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI, return; Constant *C = LatticeFunc->GetConstant(BCValue, BI->getCondition(), *this); - if (C == 0 || !isa<ConstantInt>(C)) { + if (!C || !isa<ConstantInt>(C)) { // Non-constant values can go either way. Succs[0] = Succs[1] = true; return; @@ -189,7 +190,7 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI, return; Constant *C = LatticeFunc->GetConstant(SCValue, SI.getCondition(), *this); - if (C == 0 || !isa<ConstantInt>(C)) { + if (!C || !isa<ConstantInt>(C)) { // All destinations are executable! Succs.assign(TI.getNumSuccessors(), true); return; diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 04d09f1..cdb0b79 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "tti" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" @@ -19,6 +18,8 @@ using namespace llvm; +#define DEBUG_TYPE "tti" + // Setup the analysis group to manage the TargetTransformInfo passes. INITIALIZE_ANALYSIS_GROUP(TargetTransformInfo, "Target Information", NoTTI) char TargetTransformInfo::ID = 0; @@ -234,7 +235,7 @@ namespace { struct NoTTI final : ImmutablePass, TargetTransformInfo { const DataLayout *DL; - NoTTI() : ImmutablePass(ID), DL(0) { + NoTTI() : ImmutablePass(ID), DL(nullptr) { initializeNoTTIPass(*PassRegistry::getPassRegistry()); } @@ -242,9 +243,9 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo { // Note that this subclass is special, and must *not* call initializeTTI as // it does not chain. TopTTI = this; - PrevTTI = 0; + PrevTTI = nullptr; DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; } virtual void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -443,7 +444,7 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo { // Otherwise delegate to the fully generic implementations. return getOperationCost(Operator::getOpcode(U), U->getType(), U->getNumOperands() == 1 ? - U->getOperand(0)->getType() : 0); + U->getOperand(0)->getType() : nullptr); } bool hasBranchDivergence() const override { return false; } @@ -567,7 +568,7 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo { } unsigned getShuffleCost(ShuffleKind Kind, Type *Ty, - int Index = 0, Type *SubTp = 0) const override { + int Index = 0, Type *SubTp = nullptr) const override { return 1; } @@ -581,7 +582,7 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo { } unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy = 0) const override { + Type *CondTy = nullptr) const override { return 1; } diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp index 05daf18..f36f6f8 100644 --- a/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -144,7 +144,7 @@ namespace { const MDNode *Node; public: - TBAANode() : Node(0) {} + TBAANode() : Node(nullptr) {} explicit TBAANode(const MDNode *N) : Node(N) {} /// getNode - Get the MDNode for this TBAANode. @@ -182,7 +182,6 @@ namespace { const MDNode *Node; public: - TBAAStructTagNode() : Node(0) {} explicit TBAAStructTagNode(const MDNode *N) : Node(N) {} /// Get the MDNode for this TBAAStructTagNode. @@ -218,7 +217,7 @@ namespace { const MDNode *Node; public: - TBAAStructTypeNode() : Node(0) {} + TBAAStructTypeNode() : Node(nullptr) {} explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {} /// Get the MDNode for this TBAAStructTypeNode. @@ -340,7 +339,8 @@ static bool isStructPathTBAA(const MDNode *MD) { bool TypeBasedAliasAnalysis::Aliases(const MDNode *A, const MDNode *B) const { - if (isStructPathTBAA(A)) + // Make sure that both MDNodes are struct-path aware. + if (isStructPathTBAA(A) && isStructPathTBAA(B)) return PathAliases(A, B); // Keep track of the root node for A and B. @@ -386,6 +386,10 @@ TypeBasedAliasAnalysis::Aliases(const MDNode *A, bool TypeBasedAliasAnalysis::PathAliases(const MDNode *A, const MDNode *B) const { + // Verify that both input nodes are struct-path aware. + assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware."); + assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware."); + // Keep track of the root node for A and B. TBAAStructTypeNode RootA, RootB; TBAAStructTagNode TagA(A), TagB(B); @@ -555,38 +559,40 @@ bool MDNode::isTBAAVtableAccess() const { MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { if (!A || !B) - return NULL; + return nullptr; if (A == B) return A; // For struct-path aware TBAA, we use the access type of the tag. - bool StructPath = isStructPathTBAA(A); + bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B); if (StructPath) { A = cast_or_null<MDNode>(A->getOperand(1)); - if (!A) return 0; + if (!A) return nullptr; B = cast_or_null<MDNode>(B->getOperand(1)); - if (!B) return 0; + if (!B) return nullptr; } SmallVector<MDNode *, 4> PathA; MDNode *T = A; while (T) { PathA.push_back(T); - T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0; + T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) + : nullptr; } SmallVector<MDNode *, 4> PathB; T = B; while (T) { PathB.push_back(T); - T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0; + T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) + : nullptr; } int IA = PathA.size() - 1; int IB = PathB.size() - 1; - MDNode *Ret = 0; + MDNode *Ret = nullptr; while (IA >= 0 && IB >=0) { if (PathA[IA] == PathB[IB]) Ret = PathA[IA]; @@ -599,7 +605,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { return Ret; if (!Ret) - return 0; + return nullptr; // We need to convert from a type node to a tag node. Type *Int64 = IntegerType::get(A->getContext(), 64); Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) }; diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 72617a0..4f48753 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -44,10 +45,10 @@ static unsigned getBitWidth(Type *Ty, const DataLayout *TD) { return TD ? TD->getPointerTypeSizeInBits(Ty) : 0; } -static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, - APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2, - const DataLayout *TD, unsigned Depth) { +static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2, + const DataLayout *TD, unsigned Depth) { if (!Add) { if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) { // We know that the top bits of C-X are clear if X contains less bits @@ -58,7 +59,7 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); + llvm::computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is @@ -79,13 +80,10 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, // result. For an add, this works with either operand. For a subtract, // this only works if the known zeros are in the right operand. APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - llvm::ComputeMaskedBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1); - assert((LHSKnownZero & LHSKnownOne) == 0 && - "Bits known to be one AND zero?"); + llvm::computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1); unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes(); - llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + llvm::computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes(); // Determine which operand has more trailing zeros, and use that @@ -130,15 +128,13 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, } } -static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW, - APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2, - const DataLayout *TD, unsigned Depth) { +static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2, + const DataLayout *TD, unsigned Depth) { unsigned BitWidth = KnownZero.getBitWidth(); - ComputeMaskedBits(Op1, KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(Op0, KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op1, KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(Op0, KnownZero2, KnownOne2, TD, Depth+1); bool isKnownNegative = false; bool isKnownNonNegative = false; @@ -192,7 +188,7 @@ static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW, KnownOne.setBit(BitWidth - 1); } -void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) { +void llvm::computeKnownBitsLoad(const MDNode &Ranges, APInt &KnownZero) { unsigned BitWidth = KnownZero.getBitWidth(); unsigned NumRanges = Ranges.getNumOperands() / 2; assert(NumRanges >= 1); @@ -211,8 +207,9 @@ void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) { KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros); } -/// ComputeMaskedBits - Determine which of the bits are known to be either zero -/// or one and return them in the KnownZero/KnownOne bit sets. + +/// Determine which bits of V are known to be either zero or one and return +/// them in the KnownZero/KnownOne bit sets. /// /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that /// we cannot optimize based on the assumption that it is zero without changing @@ -226,8 +223,8 @@ void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) { /// where V is a vector, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, - const DataLayout *TD, unsigned Depth) { +void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, + const DataLayout *TD, unsigned Depth) { assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); unsigned BitWidth = KnownZero.getBitWidth(); @@ -241,7 +238,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, V->getType()->getScalarSizeInBits() == BitWidth) && KnownZero.getBitWidth() == BitWidth && KnownOne.getBitWidth() == BitWidth && - "V, Mask, KnownOne and KnownZero should have same BitWidth"); + "V, KnownOne and KnownZero should have same BitWidth"); if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { // We know all of the bits for a constant! @@ -303,7 +300,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (GA->mayBeOverridden()) { KnownZero.clearAllBits(); KnownOne.clearAllBits(); } else { - ComputeMaskedBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1); } return; } @@ -341,49 +338,43 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, default: break; case Instruction::Load: if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range)) - computeMaskedBitsLoad(*MD, KnownZero); - return; + computeKnownBitsLoad(*MD, KnownZero); + break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); // Output known-1 bits are only known if set in both the LHS & RHS. KnownOne &= KnownOne2; // Output known-0 are known to be clear if zero in either the LHS | RHS. KnownZero |= KnownZero2; - return; + break; } case Instruction::Or: { - ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); // Output known-0 bits are only known if clear in both the LHS & RHS. KnownZero &= KnownZero2; // Output known-1 are known to be set if set in either the LHS | RHS. KnownOne |= KnownOne2; - return; + break; } case Instruction::Xor: { - ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); // Output known-0 bits are known if clear or set in both the LHS & RHS. APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); // Output known-1 are known to be set if set in only one of the LHS, RHS. KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); KnownZero = KnownZeroOut; - return; + break; } case Instruction::Mul: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); - ComputeMaskedBitsMul(I->getOperand(0), I->getOperand(1), NSW, + computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth); break; } @@ -391,42 +382,40 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); unsigned LeadZ = KnownZero2.countLeadingOnes(); KnownOne2.clearAllBits(); KnownZero2.clearAllBits(); - ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); - return; + break; } case Instruction::Select: - ComputeMaskedBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, + computeKnownBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; KnownZero &= KnownZero2; - return; + break; case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::SIToFP: case Instruction::UIToFP: - return; // Can't work with floating point. + break; // Can't work with floating point. case Instruction::PtrToInt: case Instruction::IntToPtr: // We can't handle these if we don't know the pointer size. - if (!TD) return; + if (!TD) break; // FALL THROUGH and handle them the same as zext/trunc. case Instruction::ZExt: case Instruction::Trunc: { @@ -439,19 +428,19 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType()); } else { SrcBitWidth = SrcTy->getScalarSizeInBits(); - if (!SrcBitWidth) return; + if (!SrcBitWidth) break; } assert(SrcBitWidth && "SrcBitWidth can't be zero"); KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); KnownOne = KnownOne.zextOrTrunc(SrcBitWidth); - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); KnownZero = KnownZero.zextOrTrunc(BitWidth); KnownOne = KnownOne.zextOrTrunc(BitWidth); // Any top bits are known to be zero. if (BitWidth > SrcBitWidth) KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); - return; + break; } case Instruction::BitCast: { Type *SrcTy = I->getOperand(0)->getType(); @@ -459,8 +448,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - return; + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + break; } break; } @@ -470,8 +459,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -481,18 +469,17 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); else if (KnownOne[SrcBitWidth-1]) // Input sign bit known set KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); - return; + break; } case Instruction::Shl: // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); KnownZero <<= ShiftAmt; KnownOne <<= ShiftAmt; KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 - return; + break; } break; case Instruction::LShr: @@ -502,13 +489,12 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); // Unsigned shift right. - ComputeMaskedBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); // high bits known zero. KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt); - return; + break; } break; case Instruction::AShr: @@ -518,8 +504,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Signed shift right. - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); @@ -528,19 +513,19 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownZero |= HighBits; else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one. KnownOne |= HighBits; - return; + break; } break; case Instruction::Sub: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); - ComputeMaskedBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, + computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth); break; } case Instruction::Add: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); - ComputeMaskedBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, + computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth); break; @@ -550,7 +535,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); // The low bits of the first operand are unchanged by the srem. KnownZero = KnownZero2 & LowBits; @@ -574,8 +559,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // remainder is zero. if (KnownZero.isNonNegative()) { APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD, - Depth+1); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD, + Depth+1); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) KnownZero.setBit(BitWidth - 1); @@ -587,9 +572,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, APInt RA = Rem->getValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, - Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, + Depth+1); KnownZero |= ~LowBits; KnownOne &= LowBits; break; @@ -598,8 +582,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); unsigned Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); @@ -622,8 +606,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // Analyze all of the subscripts of this getelementptr instruction // to determine if we can prove known low zero bits. APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0); - ComputeMaskedBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD, - Depth+1); + computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD, + Depth+1); unsigned TrailZ = LocalKnownZero.countTrailingOnes(); gep_type_iterator GTI = gep_type_begin(I); @@ -631,8 +615,10 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, Value *Index = I->getOperand(i); if (StructType *STy = dyn_cast<StructType>(*GTI)) { // Handle struct member offset arithmetic. - if (!TD) - return; + if (!TD) { + TrailZ = 0; + break; + } // Handle case when index is vector zeroinitializer Constant *CIndex = cast<Constant>(Index); @@ -650,11 +636,14 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, } else { // Handle array index arithmetic. Type *IndexedTy = GTI.getIndexedType(); - if (!IndexedTy->isSized()) return; + if (!IndexedTy->isSized()) { + TrailZ = 0; + break; + } unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1; LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); - ComputeMaskedBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1); + computeKnownBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1); TrailZ = std::min(TrailZ, unsigned(countTrailingZeros(TypeSize) + LocalKnownZero.countTrailingOnes())); @@ -696,11 +685,11 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; // Ok, we have a PHI of the form L op= R. Check for low // zero bits. - ComputeMaskedBits(R, KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(R, KnownZero2, KnownOne2, TD, Depth+1); // We need to take the minimum number of known bits APInt KnownZero3(KnownZero), KnownOne3(KnownOne); - ComputeMaskedBits(L, KnownZero3, KnownOne3, TD, Depth+1); + computeKnownBits(L, KnownZero3, KnownOne3, TD, Depth+1); KnownZero = APInt::getLowBitsSet(BitWidth, std::min(KnownZero2.countTrailingOnes(), @@ -712,7 +701,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // Unreachable blocks may have zero-operand PHI nodes. if (P->getNumIncomingValues() == 0) - return; + break; // Otherwise take the unions of the known bit sets of the operands, // taking conservative care to avoid excessive recursion. @@ -731,8 +720,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownOne2 = APInt(BitWidth, 0); // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. - ComputeMaskedBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD, - MaxDepth-1); + computeKnownBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD, + MaxDepth-1); KnownZero &= KnownZero2; KnownOne &= KnownOne2; // If all bits have been ruled out, there's no need to check @@ -776,30 +765,32 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, default: break; case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: - ComputeMaskedBitsAddSub(true, II->getArgOperand(0), - II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, TD, Depth); + computeKnownBitsAddSub(true, II->getArgOperand(0), + II->getArgOperand(1), false, KnownZero, + KnownOne, KnownZero2, KnownOne2, TD, Depth); break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: - ComputeMaskedBitsAddSub(false, II->getArgOperand(0), - II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, TD, Depth); + computeKnownBitsAddSub(false, II->getArgOperand(0), + II->getArgOperand(1), false, KnownZero, + KnownOne, KnownZero2, KnownOne2, TD, Depth); break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: - ComputeMaskedBitsMul(II->getArgOperand(0), II->getArgOperand(1), - false, KnownZero, KnownOne, - KnownZero2, KnownOne2, TD, Depth); + computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), + false, KnownZero, KnownOne, + KnownZero2, KnownOne2, TD, Depth); break; } } } } + + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } /// ComputeSignBit - Determine whether the sign bit is known to be zero or -/// one. Convenience wrapper around ComputeMaskedBits. +/// one. Convenience wrapper around computeKnownBits. void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, const DataLayout *TD, unsigned Depth) { unsigned BitWidth = getBitWidth(V->getType(), TD); @@ -810,7 +801,7 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, } APInt ZeroBits(BitWidth, 0); APInt OneBits(BitWidth, 0); - ComputeMaskedBits(V, ZeroBits, OneBits, TD, Depth); + computeKnownBits(V, ZeroBits, OneBits, TD, Depth); KnownOne = OneBits[BitWidth - 1]; KnownZero = ZeroBits[BitWidth - 1]; } @@ -842,7 +833,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) { if (Depth++ == MaxDepth) return false; - Value *X = 0, *Y = 0; + Value *X = nullptr, *Y = nullptr; // A shift of a power of two is a power of two or zero. if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || match(V, m_Shr(m_Value(X), m_Value())))) @@ -882,10 +873,10 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) { unsigned BitWidth = V->getType()->getScalarSizeInBits(); APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0); - ComputeMaskedBits(X, LHSZeroBits, LHSOneBits, 0, Depth); + computeKnownBits(X, LHSZeroBits, LHSOneBits, nullptr, Depth); APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0); - ComputeMaskedBits(Y, RHSZeroBits, RHSOneBits, 0, Depth); + computeKnownBits(Y, RHSZeroBits, RHSOneBits, nullptr, Depth); // If i8 V is a power of two or zero: // ZeroBits: 1 1 1 0 1 1 1 1 // ~ZeroBits: 0 0 0 1 0 0 0 0 @@ -1005,7 +996,7 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), TD); // X | Y != 0 if X != 0 or Y != 0. - Value *X = 0, *Y = 0; + Value *X = nullptr, *Y = nullptr; if (match(V, m_Or(m_Value(X), m_Value(Y)))) return isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth); @@ -1023,7 +1014,7 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth); + computeKnownBits(X, KnownZero, KnownOne, TD, Depth); if (KnownOne[0]) return true; } @@ -1065,12 +1056,12 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { APInt Mask = APInt::getSignedMaxValue(BitWidth); // The sign bit of X is set. If some other bit is set then X is not equal // to INT_MIN. - ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth); + computeKnownBits(X, KnownZero, KnownOne, TD, Depth); if ((KnownOne & Mask) != 0) return true; // The sign bit of Y is set. If some other bit is set then Y is not equal // to INT_MIN. - ComputeMaskedBits(Y, KnownZero, KnownOne, TD, Depth); + computeKnownBits(Y, KnownZero, KnownOne, TD, Depth); if ((KnownOne & Mask) != 0) return true; } @@ -1100,7 +1091,7 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { if (!BitWidth) return false; APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); + computeKnownBits(V, KnownZero, KnownOne, TD, Depth); return KnownOne != 0; } @@ -1116,8 +1107,7 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout *TD, unsigned Depth) { APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); - ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(V, KnownZero, KnownOne, TD, Depth); return (KnownZero & Mask) == Mask; } @@ -1142,7 +1132,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; - // Note that ConstantInt is handled by the general ComputeMaskedBits case + // Note that ConstantInt is handled by the general computeKnownBits case // below. if (Depth == 6) @@ -1187,7 +1177,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, FirstAnswer = std::min(Tmp, Tmp2); // We computed what we know about the sign bits as our first // answer. Now proceed to the generic code that uses - // ComputeMaskedBits, and pick whichever answer is better. + // computeKnownBits, and pick whichever answer is better. } break; @@ -1207,7 +1197,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. @@ -1232,7 +1222,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - ComputeMaskedBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) @@ -1278,7 +1268,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, // use this information. APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); APInt Mask; - ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); + computeKnownBits(V, KnownZero, KnownOne, TD, Depth); if (KnownZero.isNegative()) { // sign bit is 0 Mask = KnownZero; @@ -1364,7 +1354,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, Op1 = ConstantInt::get(V->getContext(), API); } - Value *Mul0 = NULL; + Value *Mul0 = nullptr; if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) { if (Constant *Op1C = dyn_cast<Constant>(Op1)) if (Constant *MulC = dyn_cast<Constant>(Mul0)) { @@ -1388,7 +1378,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, } } - Value *Mul1 = NULL; + Value *Mul1 = nullptr; if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) { if (Constant *Op0C = dyn_cast<Constant>(Op0)) if (Constant *MulC = dyn_cast<Constant>(Mul1)) { @@ -1432,7 +1422,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { return 1; // Limit search depth. const Operator *I = dyn_cast<Operator>(V); - if (I == 0) return false; + if (!I) return false; // Check if the nsz fast-math flag is set if (const FPMathOperator *FPO = dyn_cast<FPMathOperator>(I)) @@ -1513,7 +1503,7 @@ Value *llvm::isBytewiseValue(Value *V) { // If the top/bottom halves aren't the same, reject it. if (Val != Val2) - return 0; + return nullptr; } return ConstantInt::get(V->getContext(), Val); } @@ -1525,11 +1515,11 @@ Value *llvm::isBytewiseValue(Value *V) { Value *Elt = CA->getElementAsConstant(0); Value *Val = isBytewiseValue(Elt); if (!Val) - return 0; + return nullptr; for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I) if (CA->getElementAsConstant(I) != Elt) - return 0; + return nullptr; return Val; } @@ -1540,7 +1530,7 @@ Value *llvm::isBytewiseValue(Value *V) { // %c = or i16 %a, %b // but until there is an example that actually needs this, it doesn't seem // worth worrying about. - return 0; + return nullptr; } @@ -1590,7 +1580,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, Value *V = FindInsertedValue(From, Idxs); if (!V) - return NULL; + return nullptr; // Insert the value in the new (sub) aggregrate return llvm::InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip), @@ -1641,7 +1631,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, if (Constant *C = dyn_cast<Constant>(V)) { C = C->getAggregateElement(idx_range[0]); - if (C == 0) return 0; + if (!C) return nullptr; return FindInsertedValue(C, idx_range.slice(1), InsertBefore); } @@ -1654,7 +1644,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, if (req_idx == idx_range.end()) { // We can't handle this without inserting insertvalues if (!InsertBefore) - return 0; + return nullptr; // The requested index identifies a part of a nested aggregate. Handle // this specially. For example, @@ -1708,7 +1698,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, } // Otherwise, we don't know (such as, extracting from a function return value // or load instruction) - return 0; + return nullptr; } /// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if @@ -1769,13 +1759,13 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, // Make sure the index-ee is a pointer to array of i8. PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType()); ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType()); - if (AT == 0 || !AT->getElementType()->isIntegerTy(8)) + if (!AT || !AT->getElementType()->isIntegerTy(8)) return false; // Check to make sure that the first operand of the GEP is an integer and // has value 0 so that we are sure we're indexing into the initializer. const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); - if (FirstIdx == 0 || !FirstIdx->isZero()) + if (!FirstIdx || !FirstIdx->isZero()) return false; // If the second index isn't a ConstantInt, then this is a variable index @@ -1807,7 +1797,7 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, // Must be a Constant Array const ConstantDataArray *Array = dyn_cast<ConstantDataArray>(GV->getInitializer()); - if (Array == 0 || !Array->isString()) + if (!Array || !Array->isString()) return false; // Get the number of elements in the array @@ -1913,7 +1903,7 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) { // See if InstructionSimplify knows any relevant tricks. if (Instruction *I = dyn_cast<Instruction>(V)) // TODO: Acquire a DominatorTree and use it. - if (Value *Simplified = SimplifyInstruction(I, TD, 0)) { + if (Value *Simplified = SimplifyInstruction(I, TD, nullptr)) { V = Simplified; continue; } @@ -2001,7 +1991,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, return false; APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(Op, KnownZero, KnownOne, TD); + computeKnownBits(Op, KnownZero, KnownOne, TD); return !!KnownZero; } case Instruction::Load: { @@ -2076,14 +2066,18 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { // Alloca never returns null, malloc might. if (isa<AllocaInst>(V)) return true; - // A byval or inalloca argument is never null. + // A byval, inalloca, or nonnull argument is never null. if (const Argument *A = dyn_cast<Argument>(V)) - return A->hasByValOrInAllocaAttr(); + return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr(); // Global values are not null unless extern weak. if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) return !GV->hasExternalWeakLinkage(); + if (ImmutableCallSite CS = V) + if (CS.paramHasAttr(0, Attribute::NonNull)) + return true; + // operator new never returns null. if (isOperatorNewLikeFn(V, TLI, /*LookThroughBitCast=*/true)) return true; |