diff options
author | Stephen Hines <srhines@google.com> | 2014-05-29 02:49:00 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-05-29 02:49:00 -0700 |
commit | dce4a407a24b04eebc6a376f8e62b41aaa7b071f (patch) | |
tree | dcebc53f2b182f145a2e659393bf9a0472cedf23 /lib/Transforms/Scalar | |
parent | 220b921aed042f9e520c26cffd8282a94c66c3d5 (diff) | |
download | external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.zip external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.gz external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.bz2 |
Update LLVM for 3.5 rebase (r209712).
Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
Diffstat (limited to 'lib/Transforms/Scalar')
39 files changed, 1847 insertions, 805 deletions
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index fa8b598..1a3a4aa 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "adce" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" @@ -28,6 +27,8 @@ #include "llvm/Pass.h" using namespace llvm; +#define DEBUG_TYPE "adce" + STATISTIC(NumRemoved, "Number of instructions removed"); namespace { diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk index 3894f93..079cc86 100644 --- a/lib/Transforms/Scalar/Android.mk +++ b/lib/Transforms/Scalar/Android.mk @@ -32,6 +32,7 @@ transforms_scalar_SRC_FILES := \ Scalar.cpp \ Scalarizer.cpp \ ScalarReplAggregates.cpp \ + SeparateConstOffsetFromGEP.cpp \ SimplifyCFGPass.cpp \ Sink.cpp \ StructurizeCFG.cpp \ @@ -60,11 +61,6 @@ include $(CLEAR_VARS) LOCAL_SRC_FILES := $(transforms_scalar_SRC_FILES) LOCAL_MODULE:= libLLVMScalarOpts -# Override the default optimization level to work around a SIGSEGV -# on x86 target builds for SROA.cpp. -# Bug: 8047767 -LOCAL_CFLAGS_x86 += -O1 - LOCAL_MODULE_TAGS := optional include $(LLVM_DEVICE_BUILD_MK) diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 27434c1..3ad1488 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -5,19 +5,19 @@ add_llvm_library(LLVMScalarOpts CorrelatedValuePropagation.cpp DCE.cpp DeadStoreElimination.cpp - Scalarizer.cpp EarlyCSE.cpp - GlobalMerge.cpp + FlattenCFGPass.cpp GVN.cpp + GlobalMerge.cpp IndVarSimplify.cpp JumpThreading.cpp LICM.cpp LoopDeletion.cpp LoopIdiomRecognize.cpp LoopInstSimplify.cpp + LoopRerollPass.cpp LoopRotation.cpp LoopStrengthReduce.cpp - LoopRerollPass.cpp LoopUnrollPass.cpp LoopUnswitch.cpp LowerAtomic.cpp @@ -25,13 +25,14 @@ add_llvm_library(LLVMScalarOpts PartiallyInlineLibCalls.cpp Reassociate.cpp Reg2Mem.cpp - SampleProfile.cpp SCCP.cpp SROA.cpp + SampleProfile.cpp Scalar.cpp ScalarReplAggregates.cpp + Scalarizer.cpp + SeparateConstOffsetFromGEP.cpp SimplifyCFGPass.cpp - FlattenCFGPass.cpp Sink.cpp StructurizeCFG.cpp TailRecursionElimination.cpp diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp index 57a1521..763d02b 100644 --- a/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -33,7 +33,6 @@ // %0 = load i64* inttoptr (i64 big_constant to i64*) //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "consthoist" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -44,9 +43,12 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include <tuple> using namespace llvm; +#define DEBUG_TYPE "consthoist" + STATISTIC(NumConstantsHoisted, "Number of constants hoisted"); STATISTIC(NumConstantsRebased, "Number of constants rebased"); @@ -117,7 +119,8 @@ class ConstantHoisting : public FunctionPass { SmallVector<ConstantInfo, 8> ConstantVec; public: static char ID; // Pass identification, replacement for typeid - ConstantHoisting() : FunctionPass(ID), TTI(0), DT(0), Entry(0) { + ConstantHoisting() : FunctionPass(ID), TTI(nullptr), DT(nullptr), + Entry(nullptr) { initializeConstantHoistingPass(*PassRegistry::getPassRegistry()); } @@ -206,7 +209,16 @@ bool ConstantHoisting::runOnFunction(Function &Fn) { /// \brief Find the constant materialization insertion point. Instruction *ConstantHoisting::findMatInsertPt(Instruction *Inst, unsigned Idx) const { - // The simple and common case. + // If the operand is a cast instruction, then we have to materialize the + // constant before the cast instruction. + if (Idx != ~0U) { + Value *Opnd = Inst->getOperand(Idx); + if (auto CastInst = dyn_cast<Instruction>(Opnd)) + if (CastInst->isCast()) + return CastInst; + } + + // The simple and common case. This also includes constant expressions. if (!isa<PHINode>(Inst) && !isa<LandingPadInst>(Inst)) return Inst; @@ -228,7 +240,7 @@ findConstantInsertionPoint(const ConstantInfo &ConstInfo) const { SmallPtrSet<BasicBlock *, 8> BBs; for (auto const &RCI : ConstInfo.RebasedConstants) for (auto const &U : RCI.Uses) - BBs.insert(U.Inst->getParent()); + BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent()); if (BBs.count(Entry)) return &Entry->front(); @@ -487,8 +499,8 @@ void ConstantHoisting::emitBaseConstants(Instruction *Base, Constant *Offset, ClonedCastInst->insertAfter(CastInst); // Use the same debug location as the original cast instruction. ClonedCastInst->setDebugLoc(CastInst->getDebugLoc()); - DEBUG(dbgs() << "Clone instruction: " << *ClonedCastInst << '\n' - << "To : " << *CastInst << '\n'); + DEBUG(dbgs() << "Clone instruction: " << *CastInst << '\n' + << "To : " << *ClonedCastInst << '\n'); } DEBUG(dbgs() << "Update: " << *ConstUser.Inst << '\n'); diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index 7045b36..dd51ce1 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -18,7 +18,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "constprop" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" @@ -31,6 +30,8 @@ #include <set> using namespace llvm; +#define DEBUG_TYPE "constprop" + STATISTIC(NumInstKilled, "Number of instructions killed"); namespace { @@ -68,7 +69,7 @@ bool ConstantPropagation::runOnFunction(Function &F) { } bool Changed = false; DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0; + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); while (!WorkList.empty()) { diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 0490767..0829462 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "correlated-value-propagation" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -26,6 +25,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "correlated-value-propagation" + STATISTIC(NumPhis, "Number of phis propagated"); STATISTIC(NumSelects, "Number of selects propagated"); STATISTIC(NumMemAccess, "Number of memory access targets propagated"); @@ -138,7 +139,7 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) { } bool CorrelatedValuePropagation::processMemAccess(Instruction *I) { - Value *Pointer = 0; + Value *Pointer = nullptr; if (LoadInst *L = dyn_cast<LoadInst>(I)) Pointer = L->getPointerOperand(); else diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index 8377fd9..99fac75 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "dce" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/InstIterator.h" @@ -26,6 +25,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "dce" + STATISTIC(DIEEliminated, "Number of insts removed by DIE pass"); STATISTIC(DCEEliminated, "Number of insts removed"); diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index f54c00d..3af8ee7 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "dse" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -38,6 +37,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "dse" + STATISTIC(NumFastStores, "Number of stores deleted"); STATISTIC(NumFastOther , "Number of other instrs removed"); @@ -49,7 +50,7 @@ namespace { const TargetLibraryInfo *TLI; static char ID; // Pass identification, replacement for typeid - DSE() : FunctionPass(ID), AA(0), MD(0), DT(0) { + DSE() : FunctionPass(ID), AA(nullptr), MD(nullptr), DT(nullptr) { initializeDSEPass(*PassRegistry::getPassRegistry()); } @@ -69,7 +70,7 @@ namespace { if (DT->isReachableFromEntry(I)) Changed |= runOnBasicBlock(*I); - AA = 0; MD = 0; DT = 0; + AA = nullptr; MD = nullptr; DT = nullptr; return Changed; } @@ -111,9 +112,9 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } /// If ValueSet is non-null, remove any deleted instructions from it as well. /// static void DeleteDeadInstruction(Instruction *I, - MemoryDependenceAnalysis &MD, - const TargetLibraryInfo *TLI, - SmallSetVector<Value*, 16> *ValueSet = 0) { + MemoryDependenceAnalysis &MD, + const TargetLibraryInfo *TLI, + SmallSetVector<Value*, 16> *ValueSet = nullptr) { SmallVector<Instruction*, 32> NowDeadInsts; NowDeadInsts.push_back(I); @@ -131,7 +132,7 @@ static void DeleteDeadInstruction(Instruction *I, for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { Value *Op = DeadInst->getOperand(op); - DeadInst->setOperand(op, 0); + DeadInst->setOperand(op, nullptr); // If this operand just became dead, add it to the NowDeadInsts list. if (!Op->use_empty()) continue; @@ -203,13 +204,13 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { // If we don't have target data around, an unknown size in Location means // that we should use the size of the pointee type. This isn't valid for // memset/memcpy, which writes more than an i8. - if (Loc.Size == AliasAnalysis::UnknownSize && DL == 0) + if (Loc.Size == AliasAnalysis::UnknownSize && DL == nullptr) return AliasAnalysis::Location(); return Loc; } IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst); - if (II == 0) return AliasAnalysis::Location(); + if (!II) return AliasAnalysis::Location(); switch (II->getIntrinsicID()) { default: return AliasAnalysis::Location(); // Unhandled intrinsic. @@ -217,7 +218,7 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { // If we don't have target data around, an unknown size in Location means // that we should use the size of the pointee type. This isn't valid for // init.trampoline, which writes more than an i8. - if (DL == 0) return AliasAnalysis::Location(); + if (!DL) return AliasAnalysis::Location(); // FIXME: We don't know the size of the trampoline, so we can't really // handle it here. @@ -359,7 +360,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // If we have no DataLayout information around, then the size of the store // is inferrable from the pointee type. If they are the same type, then // we know that the store is safe. - if (DL == 0 && Later.Ptr->getType() == Earlier.Ptr->getType()) + if (DL == nullptr && Later.Ptr->getType() == Earlier.Ptr->getType()) return OverwriteComplete; return OverwriteUnknown; @@ -373,7 +374,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // Otherwise, we have to have size information, and the later store has to be // larger than the earlier one. if (Later.Size == AliasAnalysis::UnknownSize || - Earlier.Size == AliasAnalysis::UnknownSize || DL == 0) + Earlier.Size == AliasAnalysis::UnknownSize || DL == nullptr) return OverwriteUnknown; // Check to see if the later store is to the entire object (either a global, @@ -461,7 +462,7 @@ static bool isPossibleSelfRead(Instruction *Inst, // Self reads can only happen for instructions that read memory. Get the // location read. AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA); - if (InstReadLoc.Ptr == 0) return false; // Not a reading instruction. + if (!InstReadLoc.Ptr) return false; // Not a reading instruction. // If the read and written loc obviously don't alias, it isn't a read. if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false; @@ -528,7 +529,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { DeleteDeadInstruction(SI, *MD, TLI); - if (NextInst == 0) // Next instruction deleted. + if (!NextInst) // Next instruction deleted. BBI = BB.begin(); else if (BBI != BB.begin()) // Revisit this instruction if possible. --BBI; @@ -543,7 +544,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA); // If we didn't get a useful location, fail. - if (Loc.Ptr == 0) + if (!Loc.Ptr) continue; while (InstDep.isDef() || InstDep.isClobber()) { @@ -557,7 +558,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { Instruction *DepWrite = InstDep.getInst(); AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA); // If we didn't get a useful location, or if it isn't a size, bail out. - if (DepLoc.Ptr == 0) + if (!DepLoc.Ptr) break; // If we find a write that is a) removable (i.e., non-volatile), b) is diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index af2c3d1..735f5c1 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "early-cse" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/ScopedHashTable.h" @@ -29,6 +28,8 @@ #include <vector> using namespace llvm; +#define DEBUG_TYPE "early-cse" + STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd"); STATISTIC(NumCSE, "Number of instructions CSE'd"); STATISTIC(NumCSELoad, "Number of load instructions CSE'd"); @@ -207,7 +208,7 @@ namespace { return false; CallInst *CI = dyn_cast<CallInst>(Inst); - if (CI == 0 || !CI->onlyReadsMemory()) + if (!CI || !CI->onlyReadsMemory()) return false; return true; } @@ -405,14 +406,14 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // have invalidated the live-out memory values of our parent value. For now, // just be conservative and invalidate memory if this block has multiple // predecessors. - if (BB->getSinglePredecessor() == 0) + if (!BB->getSinglePredecessor()) ++CurrentGeneration; /// LastStore - Keep track of the last non-volatile store that we saw... for /// as long as there in no instruction that reads memory. If we see a store /// to the same location, we delete the dead store. This zaps trivial dead /// stores which can occur in bitfield code among other things. - StoreInst *LastStore = 0; + StoreInst *LastStore = nullptr; bool Changed = false; @@ -462,7 +463,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { // Ignore volatile loads. if (!LI->isSimple()) { - LastStore = 0; + LastStore = nullptr; continue; } @@ -470,7 +471,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // generation, replace this instruction. std::pair<Value*, unsigned> InVal = AvailableLoads->lookup(Inst->getOperand(0)); - if (InVal.first != 0 && InVal.second == CurrentGeneration) { + if (InVal.first != nullptr && InVal.second == CurrentGeneration) { DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << " to: " << *InVal.first << '\n'); if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first); @@ -483,20 +484,20 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // Otherwise, remember that we have this instruction. AvailableLoads->insert(Inst->getOperand(0), std::pair<Value*, unsigned>(Inst, CurrentGeneration)); - LastStore = 0; + LastStore = nullptr; continue; } // If this instruction may read from memory, forget LastStore. if (Inst->mayReadFromMemory()) - LastStore = 0; + LastStore = nullptr; // If this is a read-only call, process it. if (CallValue::canHandle(Inst)) { // If we have an available version of this call, and if it is the right // generation, replace this instruction. std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst); - if (InVal.first != 0 && InVal.second == CurrentGeneration) { + if (InVal.first != nullptr && InVal.second == CurrentGeneration) { DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << " to: " << *InVal.first << '\n'); if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first); @@ -528,7 +529,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { LastStore->eraseFromParent(); Changed = true; ++NumDSE; - LastStore = 0; + LastStore = nullptr; continue; } @@ -558,7 +559,7 @@ bool EarlyCSE::runOnFunction(Function &F) { std::vector<StackNode *> nodesToProcess; DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfo>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp index e7f2564..0430c18 100644 --- a/lib/Transforms/Scalar/FlattenCFGPass.cpp +++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "flattencfg" #include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/CFG.h" @@ -19,6 +18,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "flattencfg" + namespace { struct FlattenCFGPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 33c387c..6d07ddd 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -15,11 +15,11 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "gvn" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -50,6 +50,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "gvn" + STATISTIC(NumGVNInstr, "Number of instructions deleted"); STATISTIC(NumGVNLoad, "Number of loads deleted"); STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); @@ -213,13 +215,13 @@ Expression ValueTable::create_cmp_expression(unsigned Opcode, } Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) { - assert(EI != 0 && "Not an ExtractValueInst?"); + assert(EI && "Not an ExtractValueInst?"); Expression e; e.type = EI->getType(); e.opcode = 0; IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand()); - if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) { + if (I != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) { // EI might be an extract from one of our recognised intrinsics. If it // is we'll synthesize a semantically equivalent expression instead on // an extract value expression. @@ -327,7 +329,7 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) { const MemoryDependenceAnalysis::NonLocalDepInfo &deps = MD->getNonLocalCallDependency(CallSite(C)); // FIXME: Move the checking logic to MemDep! - CallInst* cdep = 0; + CallInst* cdep = nullptr; // Check to see if we have a single dominating call instruction that is // identical to C. @@ -338,8 +340,8 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) { // We don't handle non-definitions. If we already have a call, reject // instruction dependencies. - if (!I->getResult().isDef() || cdep != 0) { - cdep = 0; + if (!I->getResult().isDef() || cdep != nullptr) { + cdep = nullptr; break; } @@ -350,7 +352,7 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) { continue; } - cdep = 0; + cdep = nullptr; break; } @@ -551,7 +553,7 @@ namespace { static AvailableValueInBlock getUndef(BasicBlock *BB) { AvailableValueInBlock Res; Res.BB = BB; - Res.Val.setPointer(0); + Res.Val.setPointer(nullptr); Res.Val.setInt(UndefVal); Res.Offset = 0; return Res; @@ -611,7 +613,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit GVN(bool noloads = false) - : FunctionPass(ID), NoLoads(noloads), MD(0) { + : FunctionPass(ID), NoLoads(noloads), MD(nullptr) { initializeGVNPass(*PassRegistry::getPassRegistry()); } @@ -649,7 +651,7 @@ namespace { /// removeFromLeaderTable - Scan the list of values corresponding to a given /// value number, and remove the given instruction if encountered. void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) { - LeaderTableEntry* Prev = 0; + LeaderTableEntry* Prev = nullptr; LeaderTableEntry* Curr = &LeaderTable[N]; while (Curr->Val != I || Curr->BB != BB) { @@ -661,8 +663,8 @@ namespace { Prev->Next = Curr->Next; } else { if (!Curr->Next) { - Curr->Val = 0; - Curr->BB = 0; + Curr->Val = nullptr; + Curr->BB = nullptr; } else { LeaderTableEntry* Next = Curr->Next; Curr->Val = Next->Val; @@ -855,7 +857,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, Instruction *InsertPt, const DataLayout &DL) { if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL)) - return 0; + return nullptr; // If this is already the right type, just return it. Type *StoredValTy = StoredVal->getType(); @@ -1060,7 +1062,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, const DataLayout &DL) { // If the mem operation is a non-constant size, we can't handle it. ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); - if (SizeCst == 0) return -1; + if (!SizeCst) return -1; uint64_t MemSizeInBits = SizeCst->getZExtValue()*8; // If this is memset, we just need to see if the offset is valid in the size @@ -1075,10 +1077,10 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, MemTransferInst *MTI = cast<MemTransferInst>(MI); Constant *Src = dyn_cast<Constant>(MTI->getSource()); - if (Src == 0) return -1; + if (!Src) return -1; GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &DL)); - if (GV == 0 || !GV->isConstant()) return -1; + if (!GV || !GV->isConstant()) return -1; // See if the access is within the bounds of the transfer. int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, @@ -1420,8 +1422,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, // If this is a clobber and L is the first instruction in its block, then // we have the first instruction in the entry block. if (DepLI != LI && Address && DL) { - int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), - LI->getPointerOperand(), + int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, *DL); if (Offset != -1) { @@ -1469,8 +1470,8 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, if (S->getValueOperand()->getType() != LI->getType()) { // If the stored value is larger or equal to the loaded value, we can // reuse it. - if (DL == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), - LI->getType(), *DL)) { + if (!DL || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), + LI->getType(), *DL)) { UnavailableBlocks.push_back(DepBB); continue; } @@ -1486,7 +1487,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, if (LD->getType() != LI->getType()) { // If the stored value is larger or equal to the loaded value, we can // reuse it. - if (DL == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)){ + if (!DL || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)) { UnavailableBlocks.push_back(DepBB); continue; } @@ -1539,7 +1540,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // Check to see how many predecessors have the loaded value fully // available. - DenseMap<BasicBlock*, Value*> PredLoads; + MapVector<BasicBlock *, Value *> PredLoads; DenseMap<BasicBlock*, char> FullyAvailableBlocks; for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; @@ -1553,7 +1554,6 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) { continue; } - PredLoads[Pred] = 0; if (Pred->getTerminator()->getNumSuccessors() != 1) { if (isa<IndirectBrInst>(Pred->getTerminator())) { @@ -1570,11 +1570,14 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, } CriticalEdgePred.push_back(Pred); + } else { + // Only add the predecessors that will not be split for now. + PredLoads[Pred] = nullptr; } } // Decide whether PRE is profitable for this load. - unsigned NumUnavailablePreds = PredLoads.size(); + unsigned NumUnavailablePreds = PredLoads.size() + CriticalEdgePred.size(); assert(NumUnavailablePreds != 0 && "Fully available value should already be eliminated!"); @@ -1586,12 +1589,10 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, return false; // Split critical edges, and update the unavailable predecessors accordingly. - for (SmallVectorImpl<BasicBlock *>::iterator I = CriticalEdgePred.begin(), - E = CriticalEdgePred.end(); I != E; I++) { - BasicBlock *OrigPred = *I; + for (BasicBlock *OrigPred : CriticalEdgePred) { BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB); - PredLoads.erase(OrigPred); - PredLoads[NewPred] = 0; + assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!"); + PredLoads[NewPred] = nullptr; DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->" << LoadBB->getName() << '\n'); } @@ -1599,9 +1600,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // Check if the load can safely be moved to all the unavailable predecessors. bool CanDoPRE = true; SmallVector<Instruction*, 8> NewInsts; - for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(), - E = PredLoads.end(); I != E; ++I) { - BasicBlock *UnavailablePred = I->first; + for (auto &PredLoad : PredLoads) { + BasicBlock *UnavailablePred = PredLoad.first; // Do PHI translation to get its value in the predecessor if necessary. The // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. @@ -1610,20 +1610,20 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // the load on the pred (?!?), so we can insert code to materialize the // pointer if it is not available. PHITransAddr Address(LI->getPointerOperand(), DL); - Value *LoadPtr = 0; + Value *LoadPtr = nullptr; LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, *DT, NewInsts); // If we couldn't find or insert a computation of this phi translated value, // we fail PRE. - if (LoadPtr == 0) { + if (!LoadPtr) { DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " << *LI->getPointerOperand() << "\n"); CanDoPRE = false; break; } - I->second = LoadPtr; + PredLoad.second = LoadPtr; } if (!CanDoPRE) { @@ -1632,8 +1632,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, if (MD) MD->removeInstruction(I); I->eraseFromParent(); } - // HINT:Don't revert the edge-splitting as following transformation may - // also need to split these critial edges. + // HINT: Don't revert the edge-splitting as following transformation may + // also need to split these critical edges. return !CriticalEdgePred.empty(); } @@ -1654,10 +1654,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, VN.lookup_or_add(NewInsts[i]); } - for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(), - E = PredLoads.end(); I != E; ++I) { - BasicBlock *UnavailablePred = I->first; - Value *LoadPtr = I->second; + for (const auto &PredLoad : PredLoads) { + BasicBlock *UnavailablePred = PredLoad.first; + Value *LoadPtr = PredLoad.second; Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, LI->getAlignment(), @@ -1776,7 +1775,7 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) { MDNode *ReplMD = Metadata[i].second; switch(Kind) { default: - ReplInst->setMetadata(Kind, NULL); // Remove unknown metadata + ReplInst->setMetadata(Kind, nullptr); // Remove unknown metadata break; case LLVMContext::MD_dbg: llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg"); @@ -1832,7 +1831,7 @@ bool GVN::processLoad(LoadInst *L) { // a common base + constant offset, and if the previous store (or memset) // completely covers this load. This sort of thing can happen in bitfield // access code. - Value *AvailVal = 0; + Value *AvailVal = nullptr; if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) { int Offset = AnalyzeLoadFromClobberingStore(L->getType(), L->getPointerOperand(), @@ -1920,7 +1919,7 @@ bool GVN::processLoad(LoadInst *L) { if (DL) { StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), L, *DL); - if (StoredVal == 0) + if (!StoredVal) return false; DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal @@ -1949,7 +1948,7 @@ bool GVN::processLoad(LoadInst *L) { if (DL) { AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L, *DL); - if (AvailableVal == 0) + if (!AvailableVal) return false; DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal @@ -1999,9 +1998,9 @@ bool GVN::processLoad(LoadInst *L) { // a few comparisons of DFS numbers. Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) { LeaderTableEntry Vals = LeaderTable[num]; - if (!Vals.Val) return 0; + if (!Vals.Val) return nullptr; - Value *Val = 0; + Value *Val = nullptr; if (DT->dominates(Vals.BB, BB)) { Val = Vals.Val; if (isa<Constant>(Val)) return Val; @@ -2052,7 +2051,7 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, const BasicBlock *Src = E.getStart(); assert((!Pred || Pred == Src) && "No edge between these basic blocks!"); (void)Src; - return Pred != 0; + return Pred != nullptr; } /// propagateEquality - The given values are known to be equal in every block @@ -2296,7 +2295,7 @@ bool GVN::processInstruction(Instruction *I) { // Perform fast-path value-number based elimination of values inherited from // dominators. Value *repl = findLeader(I->getParent(), Num); - if (repl == 0) { + if (!repl) { // Failure, just remember this instance for future use. addToLeaderTable(Num, I, I->getParent()); return false; @@ -2319,7 +2318,7 @@ bool GVN::runOnFunction(Function& F) { MD = &getAnalysis<MemoryDependenceAnalysis>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfo>(); VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); VN.setMemDep(MD); @@ -2421,10 +2420,7 @@ bool GVN::processBlock(BasicBlock *BB) { bool GVN::performPRE(Function &F) { bool Changed = false; SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap; - for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), - DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { - BasicBlock *CurrentBlock = *DI; - + for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) { // Nothing to PRE in the entry block. if (CurrentBlock == &F.getEntryBlock()) continue; @@ -2464,7 +2460,7 @@ bool GVN::performPRE(Function &F) { // more complicated to get right. unsigned NumWith = 0; unsigned NumWithout = 0; - BasicBlock *PREPred = 0; + BasicBlock *PREPred = nullptr; predMap.clear(); for (pred_iterator PI = pred_begin(CurrentBlock), @@ -2482,8 +2478,8 @@ bool GVN::performPRE(Function &F) { } Value* predV = findLeader(P, ValNo); - if (predV == 0) { - predMap.push_back(std::make_pair(static_cast<Value *>(0), P)); + if (!predV) { + predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P)); PREPred = P; ++NumWithout; } else if (predV == CurInst) { @@ -2637,9 +2633,8 @@ bool GVN::iterateOnFunction(Function &F) { // std::vector<BasicBlock *> BBVect; BBVect.reserve(256); - for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), - DE = df_end(DT->getRootNode()); DI != DE; ++DI) - BBVect.push_back(DI->getBlock()); + for (DomTreeNode *x : depth_first(DT->getRootNode())) + BBVect.push_back(x->getBlock()); for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end(); I != E; I++) diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp index 8ffd64b..990d067 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -51,7 +51,6 @@ // note that we saved 2 registers here almostly "for free". // ===---------------------------------------------------------------------===// -#define DEBUG_TYPE "global-merge" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -70,6 +69,8 @@ #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; +#define DEBUG_TYPE "global-merge" + cl::opt<bool> EnableGlobalMerge("global-merge", cl::Hidden, cl::desc("Enable global merge pass"), @@ -107,7 +108,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. - explicit GlobalMerge(const TargetMachine *TM = 0) + explicit GlobalMerge(const TargetMachine *TM = nullptr) : FunctionPass(ID), TM(TM) { initializeGlobalMergePass(*PassRegistry::getPassRegistry()); } @@ -173,7 +174,8 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst, GlobalValue::InternalLinkage, MergedInit, "_MergedGlobals", - 0, GlobalVariable::NotThreadLocal, + nullptr, + GlobalVariable::NotThreadLocal, AddrSpace); for (size_t k = i; k < j; ++k) { Constant *Idx[2] = { diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 7537632..e83a5c4 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -24,7 +24,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "indvars" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" @@ -50,6 +49,8 @@ #include "llvm/Transforms/Utils/SimplifyIndVar.h" using namespace llvm; +#define DEBUG_TYPE "indvars" + STATISTIC(NumWidened , "Number of indvars widened"); STATISTIC(NumReplaced , "Number of exit values replaced"); STATISTIC(NumLFTR , "Number of loop exit tests replaced"); @@ -79,8 +80,8 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - IndVarSimplify() : LoopPass(ID), LI(0), SE(0), DT(0), DL(0), - Changed(false) { + IndVarSimplify() : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), + DL(nullptr), Changed(false) { initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry()); } @@ -196,7 +197,7 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def, if (!PHI) return User; - Instruction *InsertPt = 0; + Instruction *InsertPt = nullptr; for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { if (PHI->getIncomingValue(i) != Def) continue; @@ -257,13 +258,13 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // an add or increment value can not be represented by an integer. BinaryOperator *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); - if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; + if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return; // If this is not an add of the PHI with a constantfp, or if the constant fp // is not an integer, bail out. ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); int64_t IncValue; - if (IncValueVal == 0 || Incr->getOperand(0) != PN || + if (IncValueVal == nullptr || Incr->getOperand(0) != PN || !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) return; @@ -280,7 +281,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { FCmpInst *Compare = dyn_cast<FCmpInst>(U1); if (!Compare) Compare = dyn_cast<FCmpInst>(U2); - if (Compare == 0 || !Compare->hasOneUse() || + if (!Compare || !Compare->hasOneUse() || !isa<BranchInst>(Compare->user_back())) return; @@ -301,7 +302,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // transform it. ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); int64_t ExitValue; - if (ExitValueVal == 0 || + if (ExitValueVal == nullptr || !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) return; @@ -651,7 +652,8 @@ namespace { Type *WidestNativeType; // Widest integer type created [sz]ext bool IsSigned; // Was an sext user seen before a zext? - WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {} + WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr), + IsSigned(false) {} }; } @@ -693,7 +695,7 @@ struct NarrowIVDefUse { Instruction *NarrowUse; Instruction *WideDef; - NarrowIVDefUse(): NarrowDef(0), NarrowUse(0), WideDef(0) {} + NarrowIVDefUse(): NarrowDef(nullptr), NarrowUse(nullptr), WideDef(nullptr) {} NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD): NarrowDef(ND), NarrowUse(NU), WideDef(WD) {} @@ -736,9 +738,9 @@ public: L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), - WidePhi(0), - WideInc(0), - WideIncExpr(0), + WidePhi(nullptr), + WideInc(nullptr), + WideIncExpr(nullptr), DeadInsts(DI) { assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); } @@ -793,7 +795,7 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) { unsigned Opcode = DU.NarrowUse->getOpcode(); switch (Opcode) { default: - return 0; + return nullptr; case Instruction::Add: case Instruction::Mul: case Instruction::UDiv: @@ -838,14 +840,14 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) { const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { // Handle the common case of add<nsw/nuw> if (DU.NarrowUse->getOpcode() != Instruction::Add) - return 0; + return nullptr; // One operand (NarrowDef) has already been extended to WideDef. Now determine // if extending the other will lead to a recurrence. unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); - const SCEV *ExtendOperExpr = 0; + const SCEV *ExtendOperExpr = nullptr; const OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(DU.NarrowUse); if (IsSigned && OBO->hasNoSignedWrap()) @@ -855,7 +857,7 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { ExtendOperExpr = SE->getZeroExtendExpr( SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); else - return 0; + return nullptr; // When creating this AddExpr, don't apply the current operations NSW or NUW // flags. This instruction may be guarded by control flow that the no-wrap @@ -866,7 +868,7 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr)); if (!AddRec || AddRec->getLoop() != L) - return 0; + return nullptr; return AddRec; } @@ -877,14 +879,14 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { /// recurrence. Otherwise return NULL. const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { if (!SE->isSCEVable(NarrowUse->getType())) - return 0; + return nullptr; const SCEV *NarrowExpr = SE->getSCEV(NarrowUse); if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= SE->getTypeSizeInBits(WideType)) { // NarrowUse implicitly widens its operand. e.g. a gep with a narrow // index. So don't follow this use. - return 0; + return nullptr; } const SCEV *WideExpr = IsSigned ? @@ -892,7 +894,7 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { SE->getZeroExtendExpr(NarrowExpr, WideType); const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); if (!AddRec || AddRec->getLoop() != L) - return 0; + return nullptr; return AddRec; } @@ -930,7 +932,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " << *WidePhi << "\n"); } - return 0; + return nullptr; } } // Our raison d'etre! Eliminate sign and zero extension. @@ -968,7 +970,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // push the uses of WideDef here. // No further widening is needed. The deceased [sz]ext had done it for us. - return 0; + return nullptr; } // Does this user itself evaluate to a recurrence after widening? @@ -981,7 +983,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // follow it. Instead insert a Trunc to kill off the original use, // eventually isolating the original narrow IV so it can be removed. truncateIVUse(DU, DT); - return 0; + return nullptr; } // Assume block terminators cannot evaluate to a recurrence. We can't to // insert a Trunc after a terminator if there happens to be a critical edge. @@ -990,14 +992,14 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // Reuse the IV increment that SCEVExpander created as long as it dominates // NarrowUse. - Instruction *WideUse = 0; + Instruction *WideUse = nullptr; if (WideAddRec == WideIncExpr && Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) WideUse = WideInc; else { WideUse = CloneIVUser(DU); if (!WideUse) - return 0; + return nullptr; } // Evaluation of WideAddRec ensured that the narrow expression could be // extended outside the loop without overflow. This suggests that the wide use @@ -1008,7 +1010,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n"); DeadInsts.push_back(WideUse); - return 0; + return nullptr; } // Returning WideUse pushes it on the worklist. @@ -1043,7 +1045,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { // Is this phi an induction variable? const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); if (!AddRec) - return NULL; + return nullptr; // Widen the induction variable expression. const SCEV *WideIVExpr = IsSigned ? @@ -1056,7 +1058,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { // Can the IV be extended outside the loop without overflow? AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); if (!AddRec || AddRec->getLoop() != L) - return NULL; + return nullptr; // An AddRec must have loop-invariant operands. Since this AddRec is // materialized by a loop header phi, the expression cannot have any post-loop @@ -1282,7 +1284,7 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { Instruction *IncI = dyn_cast<Instruction>(IncV); if (!IncI) - return 0; + return nullptr; switch (IncI->getOpcode()) { case Instruction::Add: @@ -1293,17 +1295,17 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { if (IncI->getNumOperands() == 2) break; default: - return 0; + return nullptr; } PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); if (Phi && Phi->getParent() == L->getHeader()) { if (isLoopInvariant(IncI->getOperand(1), L, DT)) return Phi; - return 0; + return nullptr; } if (IncI->getOpcode() == Instruction::GetElementPtr) - return 0; + return nullptr; // Allow add/sub to be commuted. Phi = dyn_cast<PHINode>(IncI->getOperand(1)); @@ -1311,7 +1313,7 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { if (isLoopInvariant(IncI->getOperand(0), L, DT)) return Phi; } - return 0; + return nullptr; } /// Return the compare guarding the loop latch, or NULL for unrecognized tests. @@ -1321,7 +1323,7 @@ static ICmpInst *getLoopTest(Loop *L) { BasicBlock *LatchBlock = L->getLoopLatch(); // Don't bother with LFTR if the loop is not properly simplified. if (!LatchBlock) - return 0; + return nullptr; BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); assert(BI && "expected exit branch"); @@ -1446,8 +1448,8 @@ FindLoopCounter(Loop *L, const SCEV *BECount, cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition(); // Loop over all of the PHI nodes, looking for a simple counter. - PHINode *BestPhi = 0; - const SCEV *BestInit = 0; + PHINode *BestPhi = nullptr; + const SCEV *BestInit = nullptr; BasicBlock *LatchBlock = L->getLoopLatch(); assert(LatchBlock && "needsLFTR should guarantee a loop latch"); @@ -1571,7 +1573,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, // IVInit integer and IVCount pointer would only occur if a canonical IV // were generated on top of case #2, which is not expected. - const SCEV *IVLimit = 0; + const SCEV *IVLimit = nullptr; // For unit stride, IVCount = Start + BECount with 2's complement overflow. // For non-zero Start, compute IVCount here. if (AR->getStart()->isZero()) @@ -1813,7 +1815,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); DeadInsts.clear(); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 067deb7..230a381 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "jump-threading" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -38,6 +37,8 @@ #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; +#define DEBUG_TYPE "jump-threading" + STATISTIC(NumThreads, "Number of jumps threaded"); STATISTIC(NumFolds, "Number of terminators folded"); STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi"); @@ -153,7 +154,7 @@ bool JumpThreading::runOnFunction(Function &F) { DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfo>(); LVI = &getAnalysis<LazyValueInfo>(); @@ -308,7 +309,7 @@ void JumpThreading::FindLoopHeaders(Function &F) { /// Returns null if Val is null or not an appropriate constant. static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) { if (!Val) - return 0; + return nullptr; // Undef is "known" enough. if (UndefValue *U = dyn_cast<UndefValue>(Val)) @@ -352,7 +353,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, // If V is a non-instruction value, or an instruction in a different block, // then it can't be derived from a PHI. Instruction *I = dyn_cast<Instruction>(V); - if (I == 0 || I->getParent() != BB) { + if (!I || I->getParent() != BB) { // Okay, if this is a live-in value, see if it has a known value at the end // of any of our predecessors. @@ -495,7 +496,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB); Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, DL); - if (Res == 0) { + if (!Res) { if (!isa<Constant>(RHS)) continue; @@ -581,7 +582,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, // Either operand will do, so be sure to pick the one that's a known // constant. // FIXME: Do this more cleverly if both values are known constants? - KnownCond = (TrueVal != 0); + KnownCond = (TrueVal != nullptr); } // See if the select has a known constant value for this predecessor. @@ -737,7 +738,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { Instruction *CondInst = dyn_cast<Instruction>(Condition); // All the rest of our checks depend on the condition being an instruction. - if (CondInst == 0) { + if (!CondInst) { // FIXME: Unify this with code below. if (ProcessThreadableEdges(Condition, BB, Preference)) return true; @@ -890,7 +891,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { SmallPtrSet<BasicBlock*, 8> PredsScanned; typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy; AvailablePredsTy AvailablePreds; - BasicBlock *OneUnavailablePred = 0; + BasicBlock *OneUnavailablePred = nullptr; // If we got here, the loaded value is transparent through to the start of the // block. Check to see if it is available in any of the predecessor blocks. @@ -904,16 +905,16 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Scan the predecessor to see if the value is available in the pred. BBIt = PredBB->end(); - MDNode *ThisTBAATag = 0; + MDNode *ThisTBAATag = nullptr; Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6, - 0, &ThisTBAATag); + nullptr, &ThisTBAATag); if (!PredAvailable) { OneUnavailablePred = PredBB; continue; } // If tbaa tags disagree or are not present, forget about them. - if (TBAATag != ThisTBAATag) TBAATag = 0; + if (TBAATag != ThisTBAATag) TBAATag = nullptr; // If so, this load is partially redundant. Remember this info so that we // can create a PHI node. @@ -929,7 +930,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // predecessor, we want to insert a merge block for those common predecessors. // This ensures that we only have to insert one reload, thus not increasing // code size. - BasicBlock *UnavailablePred = 0; + BasicBlock *UnavailablePred = nullptr; // If there is exactly one predecessor where the value is unavailable, the // already computed 'OneUnavailablePred' block is it. If it ends in an @@ -996,7 +997,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { BasicBlock *P = *PI; AvailablePredsTy::iterator I = std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(), - std::make_pair(P, (Value*)0)); + std::make_pair(P, (Value*)nullptr)); assert(I != AvailablePreds.end() && I->first == P && "Didn't find entry for predecessor!"); @@ -1103,7 +1104,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, SmallPtrSet<BasicBlock*, 16> SeenPreds; SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList; - BasicBlock *OnlyDest = 0; + BasicBlock *OnlyDest = nullptr; BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL; for (unsigned i = 0, e = PredValues.size(); i != e; ++i) { @@ -1120,7 +1121,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, BasicBlock *DestBB; if (isa<UndefValue>(Val)) - DestBB = 0; + DestBB = nullptr; else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero()); else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { @@ -1171,7 +1172,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, // If the threadable edges are branching on an undefined value, we get to pick // the destination that these predecessors should get to. - if (MostPopularDest == 0) + if (!MostPopularDest) MostPopularDest = BB->getTerminator()-> getSuccessor(GetBestDestForJumpOnUndef(BB)); @@ -1273,7 +1274,7 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { } // Determine which value to split on, true, false, or undef if neither. - ConstantInt *SplitVal = 0; + ConstantInt *SplitVal = nullptr; if (NumTrue > NumFalse) SplitVal = ConstantInt::getTrue(BB->getContext()); else if (NumTrue != 0 || NumFalse != 0) @@ -1294,7 +1295,7 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { // help us. However, we can just replace the LHS or RHS with the constant. if (BlocksToFoldInto.size() == cast<PHINode>(BB->front()).getNumIncomingValues()) { - if (SplitVal == 0) { + if (!SplitVal) { // If all preds provide undef, just nuke the xor, because it is undef too. BO->replaceAllUsesWith(UndefValue::get(BO->getType())); BO->eraseFromParent(); @@ -1531,7 +1532,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, // can just clone the bits from BB into the end of the new PredBB. BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator()); - if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) { + if (!OldPredBranch || !OldPredBranch->isUnconditional()) { PredBB = SplitEdge(PredBB, BB, this); OldPredBranch = cast<BranchInst>(PredBB->getTerminator()); } diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index b69f2dc..0a8d16f 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -30,7 +30,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "licm" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -60,6 +59,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "licm" + STATISTIC(NumSunk , "Number of instructions sunk out of loop"); STATISTIC(NumHoisted , "Number of instructions hoisted out of loop"); STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); @@ -223,7 +224,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfo>(); assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); @@ -315,8 +316,8 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { "Parent loop not left in LCSSA form after LICM!"); // Clear out loops state information for the next iteration - CurLoop = 0; - Preheader = 0; + CurLoop = nullptr; + Preheader = nullptr; // If this loop is nested inside of another one, save the alias information // for when we process the outer loop. @@ -334,7 +335,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { /// iteration. /// void LICM::SinkRegion(DomTreeNode *N) { - assert(N != 0 && "Null dominator tree node?"); + assert(N != nullptr && "Null dominator tree node?"); BasicBlock *BB = N->getBlock(); // If this subregion is not in the top level loop at all, exit. @@ -381,7 +382,7 @@ void LICM::SinkRegion(DomTreeNode *N) { /// before uses, allowing us to hoist a loop body in one pass without iteration. /// void LICM::HoistRegion(DomTreeNode *N) { - assert(N != 0 && "Null dominator tree node?"); + assert(N != nullptr && "Null dominator tree node?"); BasicBlock *BB = N->getBlock(); // If this subregion is not in the top level loop at all, exit. @@ -774,7 +775,7 @@ void LICM::PromoteAliasSet(AliasSet &AS, // We start with an alignment of one and try to find instructions that allow // us to prove better alignment. unsigned Alignment = 1; - MDNode *TBAATag = 0; + MDNode *TBAATag = nullptr; // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 9a520c8..5ab686a 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-delete" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -23,6 +22,8 @@ #include "llvm/IR/Dominators.h" using namespace llvm; +#define DEBUG_TYPE "loop-delete" + STATISTIC(NumDeleted, "Number of loops deleted"); namespace { diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index e5e8b84..26a83df 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -41,7 +41,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-idiom" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -61,6 +60,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "loop-idiom" + STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); @@ -114,7 +115,7 @@ namespace { Value *matchCondition (BranchInst *Br, BasicBlock *NonZeroTarget) const; /// Return true iff the idiom is detected in the loop. and 1) \p CntInst - /// is set to the instruction counting the pupulation bit. 2) \p CntPhi + /// is set to the instruction counting the population bit. 2) \p CntPhi /// is set to the corresponding phi node. 3) \p Var is set to the value /// whose population bits are being counted. bool detectIdiom @@ -138,7 +139,7 @@ namespace { static char ID; explicit LoopIdiomRecognize() : LoopPass(ID) { initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); - DL = 0; DT = 0; SE = 0; TLI = 0; TTI = 0; + DL = nullptr; DT = nullptr; SE = nullptr; TLI = nullptr; TTI = nullptr; } bool runOnLoop(Loop *L, LPPassManager &LPM) override; @@ -182,7 +183,7 @@ namespace { if (DL) return DL; DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; return DL; } @@ -247,7 +248,7 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE, for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { Value *Op = DeadInst->getOperand(op); - DeadInst->setOperand(op, 0); + DeadInst->setOperand(op, nullptr); // If this operand just became dead, add it to the NowDeadInsts list. if (!Op->use_empty()) continue; @@ -292,9 +293,9 @@ bool LIRUtil::isAlmostEmpty(BasicBlock *BB) { BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) { if (BasicBlock *BB = PreHead->getSinglePredecessor()) { BranchInst *Br = getBranch(BB); - return Br && Br->isConditional() ? BB : 0; + return Br && Br->isConditional() ? BB : nullptr; } - return 0; + return nullptr; } //===----------------------------------------------------------------------===// @@ -304,7 +305,7 @@ BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) { //===----------------------------------------------------------------------===// NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR): - LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(0) { + LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(nullptr) { } bool NclPopcountRecognize::preliminaryScreen() { @@ -341,25 +342,25 @@ bool NclPopcountRecognize::preliminaryScreen() { return true; } -Value *NclPopcountRecognize::matchCondition (BranchInst *Br, - BasicBlock *LoopEntry) const { +Value *NclPopcountRecognize::matchCondition(BranchInst *Br, + BasicBlock *LoopEntry) const { if (!Br || !Br->isConditional()) - return 0; + return nullptr; ICmpInst *Cond = dyn_cast<ICmpInst>(Br->getCondition()); if (!Cond) - return 0; + return nullptr; ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1)); if (!CmpZero || !CmpZero->isZero()) - return 0; + return nullptr; ICmpInst::Predicate Pred = Cond->getPredicate(); if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) || (Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry)) return Cond->getOperand(0); - return 0; + return nullptr; } bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, @@ -390,9 +391,9 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, Value *VarX1, *VarX0; PHINode *PhiX, *CountPhi; - DefX2 = CountInst = 0; - VarX1 = VarX0 = 0; - PhiX = CountPhi = 0; + DefX2 = CountInst = nullptr; + VarX1 = VarX0 = nullptr; + PhiX = CountPhi = nullptr; LoopEntry = *(CurLoop->block_begin()); // step 1: Check if the loop-back branch is in desirable form. @@ -439,7 +440,7 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1 { - CountInst = NULL; + CountInst = nullptr; for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(), IterE = LoopEntry->end(); Iter != IterE; Iter++) { Instruction *Inst = Iter; @@ -744,7 +745,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, // If processing the store invalidated our iterator, start over from the // top of the block. - if (InstPtr == 0) + if (!InstPtr) I = BB->begin(); continue; } @@ -757,7 +758,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, // If processing the memset invalidated our iterator, start over from the // top of the block. - if (InstPtr == 0) + if (!InstPtr) I = BB->begin(); continue; } @@ -784,7 +785,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { // random store we can't handle. const SCEVAddRecExpr *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); - if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) + if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) return false; // Check to see if the stride matches the size of the store. If so, then we @@ -792,7 +793,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { unsigned StoreSize = (unsigned)SizeInBits >> 3; const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1)); - if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) { + if (!Stride || StoreSize != Stride->getValue()->getValue()) { // TODO: Could also handle negative stride here someday, that will require // the validity check in mayLoopAccessLocation to be updated though. // Enable this to print exact negative strides. @@ -841,7 +842,7 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { // loop, which indicates a strided store. If we have something else, it's a // random store we can't handle. const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer)); - if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine()) + if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine()) return false; // Reject memsets that are so large that they overflow an unsigned. @@ -855,7 +856,7 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { // TODO: Could also handle negative stride here someday, that will require the // validity check in mayLoopAccessLocation to be updated though. - if (Stride == 0 || MSI->getLength() != Stride->getValue()) + if (!Stride || MSI->getLength() != Stride->getValue()) return false; return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, @@ -908,23 +909,23 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout &DL) { // array. We could theoretically do a store to an alloca or something, but // that doesn't seem worthwhile. Constant *C = dyn_cast<Constant>(V); - if (C == 0) return 0; + if (!C) return nullptr; // Only handle simple values that are a power of two bytes in size. uint64_t Size = DL.getTypeSizeInBits(V->getType()); if (Size == 0 || (Size & 7) || (Size & (Size-1))) - return 0; + return nullptr; // Don't care enough about darwin/ppc to implement this. if (DL.isBigEndian()) - return 0; + return nullptr; // Convert to size in bytes. Size /= 8; // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see // if the top and bottom are the same (e.g. for vectors and large integers). - if (Size > 16) return 0; + if (Size > 16) return nullptr; // If the constant is exactly 16 bytes, just use it. if (Size == 16) return C; @@ -949,7 +950,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // are stored. A store of i32 0x01020304 can never be turned into a memset, // but it can be turned into memset_pattern if the target supports it. Value *SplatValue = isBytewiseValue(StoredVal); - Constant *PatternValue = 0; + Constant *PatternValue = nullptr; unsigned DestAS = DestPtr->getType()->getPointerAddressSpace(); @@ -960,13 +961,13 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // promote the memset. CurLoop->isLoopInvariant(SplatValue)) { // Keep and use SplatValue. - PatternValue = 0; + PatternValue = nullptr; } else if (DestAS == 0 && TLI->has(LibFunc::memset_pattern16) && (PatternValue = getMemSetPatternValue(StoredVal, *DL))) { // Don't create memset_pattern16s with address spaces. // It looks like we can use PatternValue! - SplatValue = 0; + SplatValue = nullptr; } else { // Otherwise, this isn't an idiom we can transform. For example, we can't // do anything with a 3-byte store. @@ -1033,7 +1034,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, Int8PtrTy, Int8PtrTy, IntPtr, - (void*)0); + (void*)nullptr); // Otherwise we should form a memset_pattern16. PatternValue is known to be // an constant array of 16-bytes. Plop the value into a mergable global. diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 263ba93..ab1a939 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-instsimplify" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" @@ -26,6 +25,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "loop-instsimplify" + STATISTIC(NumSimplified, "Number of redundant instructions simplified"); namespace { @@ -70,10 +71,10 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0; + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; LoopInfo *LI = &getAnalysis<LoopInfo>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0; + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); SmallVector<BasicBlock*, 8> ExitBlocks; @@ -126,7 +127,15 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { ++NumSimplified; } } - LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + if (res) { + // RecursivelyDeleteTriviallyDeadInstruction can remove + // more than one instruction, so simply incrementing the + // iterator does not work. When instructions get deleted + // re-iterate instead. + BI = BB->begin(); BE = BB->end(); + LocalChanged |= res; + } if (IsSubloopHeader && !isa<PHINode>(I)) break; diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp index 81c1e42..8b5e036 100644 --- a/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-reroll" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" @@ -36,6 +35,8 @@ using namespace llvm; +#define DEBUG_TYPE "loop-reroll" + STATISTIC(NumRerolledLoops, "Number of rerolled loops"); static cl::opt<unsigned> @@ -945,7 +946,7 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, bool InReduction = Reductions.isPairInSame(J1, J2); if (!(InReduction && J1->isAssociative())) { - bool Swapped = false, SomeOpMatched = false;; + bool Swapped = false, SomeOpMatched = false; for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) { Value *Op2 = J2->getOperand(j); @@ -1133,7 +1134,7 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) { SE = &getAnalysis<ScalarEvolution>(); TLI = &getAnalysis<TargetLibraryInfo>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); BasicBlock *Header = L->getHeader(); diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index fde6bac..2ce5831 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-rotate" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CodeMetrics.h" @@ -24,6 +23,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -31,7 +31,11 @@ #include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; -#define MAX_HEADER_SIZE 16 +#define DEBUG_TYPE "loop-rotate" + +static cl::opt<unsigned> +DefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden, + cl::desc("The default maximum header size for automatic loop rotation")); STATISTIC(NumRotated, "Number of loops rotated"); namespace { @@ -39,8 +43,12 @@ namespace { class LoopRotate : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopRotate() : LoopPass(ID) { + LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) { initializeLoopRotatePass(*PassRegistry::getPassRegistry()); + if (SpecifiedMaxHeaderSize == -1) + MaxHeaderSize = DefaultRotationThreshold; + else + MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize); } // LCSSA form makes instruction renaming easier. @@ -61,6 +69,7 @@ namespace { bool rotateLoop(Loop *L, bool SimplifiedLatch); private: + unsigned MaxHeaderSize; LoopInfo *LI; const TargetTransformInfo *TTI; }; @@ -74,7 +83,9 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false) -Pass *llvm::createLoopRotatePass() { return new LoopRotate(); } +Pass *llvm::createLoopRotatePass(int MaxHeaderSize) { + return new LoopRotate(MaxHeaderSize); +} /// Rotate Loop L as many times as possible. Return true if /// the loop is rotated at least once. @@ -82,6 +93,9 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) { if (skipOptnoneFunction(L)) return false; + // Save the loop metadata. + MDNode *LoopMD = L->getLoopID(); + LI = &getAnalysis<LoopInfo>(); TTI = &getAnalysis<TargetTransformInfo>(); @@ -96,6 +110,12 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) { MadeChange = true; SimplifiedLatch = false; } + + // Restore the loop metadata. + // NB! We presume LoopRotation DOESN'T ADD its own metadata. + if ((MadeChange || SimplifiedLatch) && LoopMD) + L->setLoopID(LoopMD); + return MadeChange; } @@ -281,7 +301,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { BasicBlock *OrigLatch = L->getLoopLatch(); BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); - if (BI == 0 || BI->isUnconditional()) + if (!BI || BI->isUnconditional()) return false; // If the loop header is not one of the loop exiting blocks then @@ -292,7 +312,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // If the loop latch already contains a branch that leaves the loop then the // loop is already rotated. - if (OrigLatch == 0) + if (!OrigLatch) return false; // Rotate if either the loop latch does *not* exit the loop, or if the loop @@ -310,7 +330,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { << " instructions: "; L->dump()); return false; } - if (Metrics.NumInsts > MAX_HEADER_SIZE) + if (Metrics.NumInsts > MaxHeaderSize) return false; } @@ -319,7 +339,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // If the loop could not be converted to canonical form, it must have an // indirectbr in it, just give up. - if (OrigPreheader == 0) + if (!OrigPreheader) return false; // Anything ScalarEvolution may know about this loop or the PHI nodes diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 272a16d..914b56a 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -53,7 +53,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-reduce" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Hashing.h" @@ -78,6 +77,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "loop-reduce" + /// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for /// bail out. This threshold is far beyond the number of users that LSR can /// conceivably solve, so it should not affect generated code, but catches the @@ -237,7 +238,15 @@ struct Formula { int64_t Scale; /// BaseRegs - The list of "base" registers for this use. When this is - /// non-empty, + /// non-empty. The canonical representation of a formula is + /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and + /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty(). + /// #1 enforces that the scaled register is always used when at least two + /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2. + /// #2 enforces that 1 * reg is reg. + /// This invariant can be temporarly broken while building a formula. + /// However, every formula inserted into the LSRInstance must be in canonical + /// form. SmallVector<const SCEV *, 4> BaseRegs; /// ScaledReg - The 'scaled' register for this use. This should be non-null @@ -250,12 +259,18 @@ struct Formula { int64_t UnfoldedOffset; Formula() - : BaseGV(0), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(0), - UnfoldedOffset(0) {} + : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0), + ScaledReg(nullptr), UnfoldedOffset(0) {} void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); - unsigned getNumRegs() const; + bool isCanonical() const; + + void Canonicalize(); + + bool Unscale(); + + size_t getNumRegs() const; Type *getType() const; void DeleteBaseReg(const SCEV *&S); @@ -345,12 +360,58 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) { BaseRegs.push_back(Sum); HasBaseReg = true; } + Canonicalize(); +} + +/// \brief Check whether or not this formula statisfies the canonical +/// representation. +/// \see Formula::BaseRegs. +bool Formula::isCanonical() const { + if (ScaledReg) + return Scale != 1 || !BaseRegs.empty(); + return BaseRegs.size() <= 1; +} + +/// \brief Helper method to morph a formula into its canonical representation. +/// \see Formula::BaseRegs. +/// Every formula having more than one base register, must use the ScaledReg +/// field. Otherwise, we would have to do special cases everywhere in LSR +/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ... +/// On the other hand, 1*reg should be canonicalized into reg. +void Formula::Canonicalize() { + if (isCanonical()) + return; + // So far we did not need this case. This is easy to implement but it is + // useless to maintain dead code. Beside it could hurt compile time. + assert(!BaseRegs.empty() && "1*reg => reg, should not be needed."); + // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg. + ScaledReg = BaseRegs.back(); + BaseRegs.pop_back(); + Scale = 1; + size_t BaseRegsSize = BaseRegs.size(); + size_t Try = 0; + // If ScaledReg is an invariant, try to find a variant expression. + while (Try < BaseRegsSize && !isa<SCEVAddRecExpr>(ScaledReg)) + std::swap(ScaledReg, BaseRegs[Try++]); +} + +/// \brief Get rid of the scale in the formula. +/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2. +/// \return true if it was possible to get rid of the scale, false otherwise. +/// \note After this operation the formula may not be in the canonical form. +bool Formula::Unscale() { + if (Scale != 1) + return false; + Scale = 0; + BaseRegs.push_back(ScaledReg); + ScaledReg = nullptr; + return true; } /// getNumRegs - Return the total number of register operands used by this /// formula. This does not include register uses implied by non-constant /// addrec strides. -unsigned Formula::getNumRegs() const { +size_t Formula::getNumRegs() const { return !!ScaledReg + BaseRegs.size(); } @@ -360,7 +421,7 @@ Type *Formula::getType() const { return !BaseRegs.empty() ? BaseRegs.front()->getType() : ScaledReg ? ScaledReg->getType() : BaseGV ? BaseGV->getType() : - 0; + nullptr; } /// DeleteBaseReg - Delete the given base reg from the BaseRegs list. @@ -487,11 +548,11 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, // Check for a division of a constant by a constant. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) { if (!RC) - return 0; + return nullptr; const APInt &LA = C->getValue()->getValue(); const APInt &RA = RC->getValue()->getValue(); if (LA.srem(RA) != 0) - return 0; + return nullptr; return SE.getConstant(LA.sdiv(RA)); } @@ -500,16 +561,16 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) { const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE, IgnoreSignificantBits); - if (!Step) return 0; + if (!Step) return nullptr; const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE, IgnoreSignificantBits); - if (!Start) return 0; + if (!Start) return nullptr; // FlagNW is independent of the start value, step direction, and is // preserved with smaller magnitude steps. // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap); } - return 0; + return nullptr; } // Distribute the sdiv over add operands, if the add doesn't overflow. @@ -520,12 +581,12 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, I != E; ++I) { const SCEV *Op = getExactSDiv(*I, RHS, SE, IgnoreSignificantBits); - if (!Op) return 0; + if (!Op) return nullptr; Ops.push_back(Op); } return SE.getAddExpr(Ops); } - return 0; + return nullptr; } // Check for a multiply operand that we can pull RHS out of. @@ -544,13 +605,13 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, } Ops.push_back(S); } - return Found ? SE.getMulExpr(Ops) : 0; + return Found ? SE.getMulExpr(Ops) : nullptr; } - return 0; + return nullptr; } // Otherwise we don't know. - return 0; + return nullptr; } /// ExtractImmediate - If S involves the addition of a constant integer value, @@ -604,7 +665,7 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { SCEV::FlagAnyWrap); return Result; } - return 0; + return nullptr; } /// isAddressUse - Returns true if the specified instruction is using the @@ -755,12 +816,12 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) { Value *V = DeadInsts.pop_back_val(); Instruction *I = dyn_cast_or_null<Instruction>(V); - if (I == 0 || !isInstructionTriviallyDead(I)) + if (!I || !isInstructionTriviallyDead(I)) continue; for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) if (Instruction *U = dyn_cast<Instruction>(*OI)) { - *OI = 0; + *OI = nullptr; if (U->use_empty()) DeadInsts.push_back(U); } @@ -775,9 +836,18 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) { namespace { class LSRUse; } -// Check if it is legal to fold 2 base registers. -static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU, - const Formula &F); + +/// \brief Check if the addressing mode defined by \p F is completely +/// folded in \p LU at isel time. +/// This includes address-mode folding and special icmp tricks. +/// This function returns true if \p LU can accommodate what \p F +/// defines and up to 1 base + 1 scaled + offset. +/// In other words, if \p F has several base registers, this function may +/// still return true. Therefore, users still need to account for +/// additional base registers and/or unfolded offsets to derive an +/// accurate cost model. +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, + const LSRUse &LU, const Formula &F); // Get the cost of the scaling factor used in F for LU. static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F); @@ -828,7 +898,7 @@ public: const SmallVectorImpl<int64_t> &Offsets, ScalarEvolution &SE, DominatorTree &DT, const LSRUse &LU, - SmallPtrSet<const SCEV *, 16> *LoserRegs = 0); + SmallPtrSet<const SCEV *, 16> *LoserRegs = nullptr); void print(raw_ostream &OS) const; void dump() const; @@ -921,6 +991,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, ScalarEvolution &SE, DominatorTree &DT, const LSRUse &LU, SmallPtrSet<const SCEV *, 16> *LoserRegs) { + assert(F.isCanonical() && "Cost is accurate only for canonical formula"); // Tally up the registers. if (const SCEV *ScaledReg = F.ScaledReg) { if (VisitedRegs.count(ScaledReg)) { @@ -944,11 +1015,13 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, } // Determine how many (unfolded) adds we'll need inside the loop. - size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0); + size_t NumBaseParts = F.getNumRegs(); if (NumBaseParts > 1) // Do not count the base and a possible second register if the target // allows to fold 2 registers. - NumBaseAdds += NumBaseParts - (1 + isLegal2RegAMUse(TTI, LU, F)); + NumBaseAdds += + NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F))); + NumBaseAdds += (F.UnfoldedOffset != 0); // Accumulate non-free scaling amounts. ScaleCost += getScalingFactorCost(TTI, LU, F); @@ -1047,7 +1120,8 @@ struct LSRFixup { } LSRFixup::LSRFixup() - : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {} + : UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)), + Offset(0) {} /// isUseFullyOutsideLoop - Test whether this fixup always uses its /// value outside of the given loop. @@ -1183,7 +1257,7 @@ public: MaxOffset(INT64_MIN), AllFixupsOutsideLoop(true), RigidFormula(false), - WidestFixupType(0) {} + WidestFixupType(nullptr) {} bool HasFormulaWithSameRegs(const Formula &F) const; bool InsertFormula(const Formula &F); @@ -1208,7 +1282,10 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { /// InsertFormula - If the given formula has not yet been inserted, add it to /// the list, and return true. Return false otherwise. +/// The formula must be in canonical form. bool LSRUse::InsertFormula(const Formula &F) { + assert(F.isCanonical() && "Invalid canonical representation"); + if (!Formulae.empty() && RigidFormula) return false; @@ -1234,6 +1311,8 @@ bool LSRUse::InsertFormula(const Formula &F) { // Record registers now being used by this use. Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); + if (F.ScaledReg) + Regs.insert(F.ScaledReg); return true; } @@ -1300,12 +1379,10 @@ void LSRUse::dump() const { } #endif -/// isLegalUse - Test whether the use described by AM is "legal", meaning it can -/// be completely folded into the user instruction at isel time. This includes -/// address-mode folding and special icmp tricks. -static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind, - Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg, int64_t Scale) { +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, + LSRUse::KindType Kind, Type *AccessTy, + GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale) { switch (Kind) { case LSRUse::Address: return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale); @@ -1356,10 +1433,11 @@ static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind, llvm_unreachable("Invalid LSRUse Kind!"); } -static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, - int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, - GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) { +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, + int64_t MinOffset, int64_t MaxOffset, + LSRUse::KindType Kind, Type *AccessTy, + GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale) { // Check for overflow. if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) != (MinOffset > 0)) @@ -1370,9 +1448,41 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, return false; MaxOffset = (uint64_t)BaseOffset + MaxOffset; - return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg, - Scale) && - isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale); + return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset, + HasBaseReg, Scale) && + isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset, + HasBaseReg, Scale); +} + +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, + int64_t MinOffset, int64_t MaxOffset, + LSRUse::KindType Kind, Type *AccessTy, + const Formula &F) { + // For the purpose of isAMCompletelyFolded either having a canonical formula + // or a scale not equal to zero is correct. + // Problems may arise from non canonical formulae having a scale == 0. + // Strictly speaking it would best to just rely on canonical formulae. + // However, when we generate the scaled formulae, we first check that the + // scaling factor is profitable before computing the actual ScaledReg for + // compile time sake. + assert((F.isCanonical() || F.Scale != 0)); + return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, + F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale); +} + +/// isLegalUse - Test whether we know how to expand the current formula. +static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, + int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, + GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) { + // We know how to expand completely foldable formulae. + return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, + BaseOffset, HasBaseReg, Scale) || + // Or formulae that use a base register produced by a sum of base + // registers. + (Scale == 1 && + isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, + BaseGV, BaseOffset, true, 0)); } static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, @@ -1382,36 +1492,23 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, F.BaseOffset, F.HasBaseReg, F.Scale); } -static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU, - const Formula &F) { - // If F is used as an Addressing Mode, it may fold one Base plus one - // scaled register. If the scaled register is nil, do as if another - // element of the base regs is a 1-scaled register. - // This is possible if BaseRegs has at least 2 registers. - - // If this is not an address calculation, this is not an addressing mode - // use. - if (LU.Kind != LSRUse::Address) - return false; - - // F is already scaled. - if (F.Scale != 0) - return false; - - // We need to keep one register for the base and one to scale. - if (F.BaseRegs.size() < 2) - return false; - - return isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, - F.BaseGV, F.BaseOffset, F.HasBaseReg, 1); - } +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, + const LSRUse &LU, const Formula &F) { + return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg, + F.Scale); +} static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F) { if (!F.Scale) return 0; - assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, - LU.AccessTy, F) && "Illegal formula in use."); + + // If the use is not completely folded in that instruction, we will have to + // pay an extra cost only for scale != 1. + if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, F)) + return F.Scale != 1; switch (LU.Kind) { case LSRUse::Address: { @@ -1430,12 +1527,10 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, return std::max(ScaleCostMinOffset, ScaleCostMaxOffset); } case LSRUse::ICmpZero: - // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg. - // Therefore, return 0 in case F.Scale == -1. - return F.Scale != -1; - case LSRUse::Basic: case LSRUse::Special: + // The use is completely folded, i.e., everything is folded into the + // instruction. return 0; } @@ -1460,7 +1555,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI, HasBaseReg = true; } - return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale); + return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset, + HasBaseReg, Scale); } static bool isAlwaysFoldable(const TargetTransformInfo &TTI, @@ -1485,8 +1581,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI, // base and a scale. int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1; - return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, - BaseOffset, HasBaseReg, Scale); + return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, + BaseOffset, HasBaseReg, Scale); } namespace { @@ -1515,7 +1611,7 @@ struct IVChain { SmallVector<IVInc,1> Incs; const SCEV *ExprBase; - IVChain() : ExprBase(0) {} + IVChain() : ExprBase(nullptr) {} IVChain(const IVInc &Head, const SCEV *Base) : Incs(1, Head), ExprBase(Base) {} @@ -1642,8 +1738,19 @@ class LSRInstance { void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base, unsigned Depth = 0); + + void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, + const Formula &Base, unsigned Depth, + size_t Idx, bool IsScaledReg = false); void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx, + const Formula &Base, size_t Idx, + bool IsScaledReg = false); void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx, + const Formula &Base, + const SmallVectorImpl<int64_t> &Worklist, + size_t Idx, bool IsScaledReg = false); void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base); void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base); void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base); @@ -1721,7 +1828,7 @@ void LSRInstance::OptimizeShadowIV() { IVUsers::const_iterator CandidateUI = UI; ++UI; Instruction *ShadowUse = CandidateUI->getUser(); - Type *DestTy = 0; + Type *DestTy = nullptr; bool IsSigned = false; /* If shadow use is a int->float cast then insert a second IV @@ -1783,7 +1890,7 @@ void LSRInstance::OptimizeShadowIV() { continue; /* Initialize new IV, double d = 0.0 in above example. */ - ConstantInt *C = 0; + ConstantInt *C = nullptr; if (Incr->getOperand(0) == PH) C = dyn_cast<ConstantInt>(Incr->getOperand(1)); else if (Incr->getOperand(1) == PH) @@ -1905,7 +2012,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { // for ICMP_ULE here because the comparison would be with zero, which // isn't interesting. CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; - const SCEVNAryExpr *Max = 0; + const SCEVNAryExpr *Max = nullptr; if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) { Pred = ICmpInst::ICMP_SLE; Max = S; @@ -1948,7 +2055,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { // Check the right operand of the select, and remember it, as it will // be used in the new comparison instruction. - Value *NewRHS = 0; + Value *NewRHS = nullptr; if (ICmpInst::isTrueWhenEqual(Pred)) { // Look for n+1, and grab n. if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1))) @@ -2018,7 +2125,7 @@ LSRInstance::OptimizeLoopTermCond() { continue; // Search IVUsesByStride to find Cond's IVUse if there is one. - IVStrideUse *CondUse = 0; + IVStrideUse *CondUse = nullptr; ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition()); if (!FindIVUserForCond(Cond, CondUse)) continue; @@ -2071,12 +2178,12 @@ LSRInstance::OptimizeLoopTermCond() { // Check for possible scaled-address reuse. Type *AccessTy = getAccessType(UI->getUser()); int64_t Scale = C->getSExtValue(); - if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0, + if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr, /*BaseOffset=*/ 0, /*HasBaseReg=*/ false, Scale)) goto decline_post_inc; Scale = -Scale; - if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0, + if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr, /*BaseOffset=*/ 0, /*HasBaseReg=*/ false, Scale)) goto decline_post_inc; @@ -2146,23 +2253,25 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, // the uses will have all its uses outside the loop, for example. if (LU.Kind != Kind) return false; + + // Check for a mismatched access type, and fall back conservatively as needed. + // TODO: Be less conservative when the type is similar and can use the same + // addressing modes. + if (Kind == LSRUse::Address && AccessTy != LU.AccessTy) + NewAccessTy = Type::getVoidTy(AccessTy->getContext()); + // Conservatively assume HasBaseReg is true for now. if (NewOffset < LU.MinOffset) { - if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0, + if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr, LU.MaxOffset - NewOffset, HasBaseReg)) return false; NewMinOffset = NewOffset; } else if (NewOffset > LU.MaxOffset) { - if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0, + if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr, NewOffset - LU.MinOffset, HasBaseReg)) return false; NewMaxOffset = NewOffset; } - // Check for a mismatched access type, and fall back conservatively as needed. - // TODO: Be less conservative when the type is similar and can use the same - // addressing modes. - if (Kind == LSRUse::Address && AccessTy != LU.AccessTy) - NewAccessTy = Type::getVoidTy(AccessTy->getContext()); // Update the use. LU.MinOffset = NewMinOffset; @@ -2183,7 +2292,7 @@ LSRInstance::getUse(const SCEV *&Expr, int64_t Offset = ExtractImmediate(Expr, SE); // Basic uses can't accept any offset, for example. - if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0, + if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr, Offset, /*HasBaseReg=*/ true)) { Expr = Copy; Offset = 0; @@ -2267,7 +2376,7 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, } // Nothing looked good. - return 0; + return nullptr; } void LSRInstance::CollectInterestingTypesAndFactors() { @@ -2385,7 +2494,7 @@ static const SCEV *getExprBase(const SCEV *S) { default: // uncluding scUnknown. return S; case scConstant: - return 0; + return nullptr; case scTruncate: return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand()); case scZeroExtend: @@ -2476,7 +2585,7 @@ isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users, && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) { --cost; } - const SCEV *LastIncExpr = 0; + const SCEV *LastIncExpr = nullptr; unsigned NumConstIncrements = 0; unsigned NumVarIncrements = 0; unsigned NumReusedIncrements = 0; @@ -2535,7 +2644,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, // Visit all existing chains. Check if its IVOper can be computed as a // profitable loop invariant increment from the last link in the Chain. unsigned ChainIdx = 0, NChains = IVChainVec.size(); - const SCEV *LastIncExpr = 0; + const SCEV *LastIncExpr = nullptr; for (; ChainIdx < NChains; ++ChainIdx) { IVChain &Chain = IVChainVec[ChainIdx]; @@ -2755,7 +2864,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, int64_t IncOffset = IncConst->getValue()->getSExtValue(); if (!isAlwaysFoldable(TTI, LSRUse::Address, - getAccessType(UserInst), /*BaseGV=*/ 0, + getAccessType(UserInst), /*BaseGV=*/ nullptr, IncOffset, /*HaseBaseReg=*/ false)) return false; @@ -2773,7 +2882,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, // findIVOperand returns IVOpEnd if it can no longer find a valid IV user. User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(), IVOpEnd, L, SE); - Value *IVSrc = 0; + Value *IVSrc = nullptr; while (IVOpIter != IVOpEnd) { IVSrc = getWideOperand(*IVOpIter); @@ -2800,7 +2909,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n"); Type *IVTy = IVSrc->getType(); Type *IntTy = SE.getEffectiveSCEVType(IVTy); - const SCEV *LeftOverExpr = 0; + const SCEV *LeftOverExpr = nullptr; for (IVChain::const_iterator IncI = Chain.begin(), IncE = Chain.end(); IncI != IncE; ++IncI) { @@ -2831,7 +2940,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, TTI)) { assert(IVTy == IVOper->getType() && "inconsistent IV increment type"); IVSrc = IVOper; - LeftOverExpr = 0; + LeftOverExpr = nullptr; } } Type *OperTy = IncI->IVOperand->getType(); @@ -2886,7 +2995,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { LF.PostIncLoops = UI->getPostIncLoops(); LSRUse::KindType Kind = LSRUse::Basic; - Type *AccessTy = 0; + Type *AccessTy = nullptr; if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) { Kind = LSRUse::Address; AccessTy = getAccessType(LF.UserInst); @@ -2917,7 +3026,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) { // S is normalized, so normalize N before folding it into S // to keep the result normalized. - N = TransformForPostIncUse(Normalize, N, CI, 0, + N = TransformForPostIncUse(Normalize, N, CI, nullptr, LF.PostIncLoops, SE, DT); Kind = LSRUse::ICmpZero; S = SE.getMinusSCEV(N, S); @@ -2992,6 +3101,9 @@ void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) { /// InsertFormula - If the given formula has not yet been inserted, add it to /// the list, and return true. Return false otherwise. bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) { + // Do not insert formula that we will not be able to expand. + assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && + "Formula is illegal"); if (!LU.InsertFormula(F)) return false; @@ -3068,7 +3180,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { LSRFixup &LF = getNewFixup(); LF.UserInst = const_cast<Instruction *>(UserInst); LF.OperandValToReplace = U; - std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0); + std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, nullptr); LF.LUIdx = P.first; LF.Offset = P.second; LSRUse &LU = Uses[LF.LUIdx]; @@ -3107,7 +3219,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C, if (Remainder) Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder); } - return 0; + return nullptr; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { // Split a non-zero base out of an addrec. if (AR->getStart()->isZero()) @@ -3119,7 +3231,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C, // does not pertain to this loop. if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) { Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder); - Remainder = 0; + Remainder = nullptr; } if (Remainder != AR->getStart()) { if (!Remainder) @@ -3141,90 +3253,110 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C, CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1); if (Remainder) Ops.push_back(SE.getMulExpr(C, Remainder)); - return 0; + return nullptr; } } return S; } -/// GenerateReassociations - Split out subexpressions from adds and the bases of -/// addrecs. -void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, - Formula Base, - unsigned Depth) { - // Arbitrarily cap recursion to protect compile time. - if (Depth >= 3) return; - - for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { - const SCEV *BaseReg = Base.BaseRegs[i]; +/// \brief Helper function for LSRInstance::GenerateReassociations. +void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, + const Formula &Base, + unsigned Depth, size_t Idx, + bool IsScaledReg) { + const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; + SmallVector<const SCEV *, 8> AddOps; + const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE); + if (Remainder) + AddOps.push_back(Remainder); + + if (AddOps.size() == 1) + return; - SmallVector<const SCEV *, 8> AddOps; - const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE); - if (Remainder) - AddOps.push_back(Remainder); + for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(), + JE = AddOps.end(); + J != JE; ++J) { - if (AddOps.size() == 1) continue; + // Loop-variant "unknown" values are uninteresting; we won't be able to + // do anything meaningful with them. + if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L)) + continue; - for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(), - JE = AddOps.end(); J != JE; ++J) { + // Don't pull a constant into a register if the constant could be folded + // into an immediate field. + if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, *J, Base.getNumRegs() > 1)) + continue; - // Loop-variant "unknown" values are uninteresting; we won't be able to - // do anything meaningful with them. - if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L)) - continue; + // Collect all operands except *J. + SmallVector<const SCEV *, 8> InnerAddOps( + ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J); + InnerAddOps.append(std::next(J), + ((const SmallVector<const SCEV *, 8> &)AddOps).end()); + + // Don't leave just a constant behind in a register if the constant could + // be folded into an immediate field. + if (InnerAddOps.size() == 1 && + isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1)) + continue; - // Don't pull a constant into a register if the constant could be folded - // into an immediate field. - if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, - LU.AccessTy, *J, Base.getNumRegs() > 1)) - continue; + const SCEV *InnerSum = SE.getAddExpr(InnerAddOps); + if (InnerSum->isZero()) + continue; + Formula F = Base; - // Collect all operands except *J. - SmallVector<const SCEV *, 8> InnerAddOps( - ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J); - InnerAddOps.append(std::next(J), - ((const SmallVector<const SCEV *, 8> &)AddOps).end()); - - // Don't leave just a constant behind in a register if the constant could - // be folded into an immediate field. - if (InnerAddOps.size() == 1 && - isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, - LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1)) - continue; + // Add the remaining pieces of the add back into the new formula. + const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum); + if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 && + TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + InnerSumSC->getValue()->getZExtValue())) { + F.UnfoldedOffset = + (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue(); + if (IsScaledReg) + F.ScaledReg = nullptr; + else + F.BaseRegs.erase(F.BaseRegs.begin() + Idx); + } else if (IsScaledReg) + F.ScaledReg = InnerSum; + else + F.BaseRegs[Idx] = InnerSum; + + // Add J as its own register, or an unfolded immediate. + const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J); + if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && + TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + SC->getValue()->getZExtValue())) + F.UnfoldedOffset = + (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue(); + else + F.BaseRegs.push_back(*J); + // We may have changed the number of register in base regs, adjust the + // formula accordingly. + F.Canonicalize(); + + if (InsertFormula(LU, LUIdx, F)) + // If that formula hadn't been seen before, recurse to find more like + // it. + GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1); + } +} - const SCEV *InnerSum = SE.getAddExpr(InnerAddOps); - if (InnerSum->isZero()) - continue; - Formula F = Base; +/// GenerateReassociations - Split out subexpressions from adds and the bases of +/// addrecs. +void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, + Formula Base, unsigned Depth) { + assert(Base.isCanonical() && "Input must be in the canonical form"); + // Arbitrarily cap recursion to protect compile time. + if (Depth >= 3) + return; - // Add the remaining pieces of the add back into the new formula. - const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum); - if (InnerSumSC && - SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 && - TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + - InnerSumSC->getValue()->getZExtValue())) { - F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + - InnerSumSC->getValue()->getZExtValue(); - F.BaseRegs.erase(F.BaseRegs.begin() + i); - } else - F.BaseRegs[i] = InnerSum; - - // Add J as its own register, or an unfolded immediate. - const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J); - if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && - TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + - SC->getValue()->getZExtValue())) - F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + - SC->getValue()->getZExtValue(); - else - F.BaseRegs.push_back(*J); + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) + GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i); - if (InsertFormula(LU, LUIdx, F)) - // If that formula hadn't been seen before, recurse to find more like - // it. - GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1); - } - } + if (Base.Scale == 1) + GenerateReassociationsImpl(LU, LUIdx, Base, Depth, + /* Idx */ -1, /* IsScaledReg */ true); } /// GenerateCombinations - Generate a formula consisting of all of the @@ -3232,8 +3364,12 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base) { // This method is only interesting on a plurality of registers. - if (Base.BaseRegs.size() <= 1) return; + if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1) + return; + // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before + // processing the formula. + Base.Unscale(); Formula F = Base; F.BaseRegs.clear(); SmallVector<const SCEV *, 4> Ops; @@ -3253,29 +3389,87 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, // rather than proceed with zero in a register. if (!Sum->isZero()) { F.BaseRegs.push_back(Sum); + F.Canonicalize(); (void)InsertFormula(LU, LUIdx, F); } } } +/// \brief Helper function for LSRInstance::GenerateSymbolicOffsets. +void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx, + const Formula &Base, size_t Idx, + bool IsScaledReg) { + const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; + GlobalValue *GV = ExtractSymbol(G, SE); + if (G->isZero() || !GV) + return; + Formula F = Base; + F.BaseGV = GV; + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) + return; + if (IsScaledReg) + F.ScaledReg = G; + else + F.BaseRegs[Idx] = G; + (void)InsertFormula(LU, LUIdx, F); +} + /// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets. void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base) { // We can't add a symbolic offset if the address already contains one. if (Base.BaseGV) return; - for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { - const SCEV *G = Base.BaseRegs[i]; - GlobalValue *GV = ExtractSymbol(G, SE); - if (G->isZero() || !GV) - continue; + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) + GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i); + if (Base.Scale == 1) + GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1, + /* IsScaledReg */ true); +} + +/// \brief Helper function for LSRInstance::GenerateConstantOffsets. +void LSRInstance::GenerateConstantOffsetsImpl( + LSRUse &LU, unsigned LUIdx, const Formula &Base, + const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) { + const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; + for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(), + E = Worklist.end(); + I != E; ++I) { Formula F = Base; - F.BaseGV = GV; - if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) - continue; - F.BaseRegs[i] = G; - (void)InsertFormula(LU, LUIdx, F); + F.BaseOffset = (uint64_t)Base.BaseOffset - *I; + if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind, + LU.AccessTy, F)) { + // Add the offset to the base register. + const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G); + // If it cancelled out, drop the base register, otherwise update it. + if (NewG->isZero()) { + if (IsScaledReg) { + F.Scale = 0; + F.ScaledReg = nullptr; + } else + F.DeleteBaseReg(F.BaseRegs[Idx]); + F.Canonicalize(); + } else if (IsScaledReg) + F.ScaledReg = NewG; + else + F.BaseRegs[Idx] = NewG; + + (void)InsertFormula(LU, LUIdx, F); + } } + + int64_t Imm = ExtractImmediate(G, SE); + if (G->isZero() || Imm == 0) + return; + Formula F = Base; + F.BaseOffset = (uint64_t)F.BaseOffset + Imm; + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) + return; + if (IsScaledReg) + F.ScaledReg = G; + else + F.BaseRegs[Idx] = G; + (void)InsertFormula(LU, LUIdx, F); } /// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets. @@ -3288,38 +3482,11 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, if (LU.MaxOffset != LU.MinOffset) Worklist.push_back(LU.MaxOffset); - for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { - const SCEV *G = Base.BaseRegs[i]; - - for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(), - E = Worklist.end(); I != E; ++I) { - Formula F = Base; - F.BaseOffset = (uint64_t)Base.BaseOffset - *I; - if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind, - LU.AccessTy, F)) { - // Add the offset to the base register. - const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G); - // If it cancelled out, drop the base register, otherwise update it. - if (NewG->isZero()) { - std::swap(F.BaseRegs[i], F.BaseRegs.back()); - F.BaseRegs.pop_back(); - } else - F.BaseRegs[i] = NewG; - - (void)InsertFormula(LU, LUIdx, F); - } - } - - int64_t Imm = ExtractImmediate(G, SE); - if (G->isZero() || Imm == 0) - continue; - Formula F = Base; - F.BaseOffset = (uint64_t)F.BaseOffset + Imm; - if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) - continue; - F.BaseRegs[i] = G; - (void)InsertFormula(LU, LUIdx, F); - } + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) + GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i); + if (Base.Scale == 1) + GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1, + /* IsScaledReg */ true); } /// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up @@ -3419,7 +3586,11 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { if (!IntTy) return; // If this Formula already has a scaled register, we can't add another one. - if (Base.Scale != 0) return; + // Try to unscale the formula to generate a better scale. + if (Base.Scale != 0 && !Base.Unscale()) + return; + + assert(Base.Scale == 0 && "Unscale did not did its job!"); // Check each interesting stride. for (SmallSetVector<int64_t, 8>::const_iterator @@ -3460,6 +3631,11 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { Formula F = Base; F.ScaledReg = Quotient; F.DeleteBaseReg(F.BaseRegs[i]); + // The canonical representation of 1*reg is reg, which is already in + // Base. In that case, do not try to insert the formula, it will be + // rejected anyway. + if (F.Scale == 1 && F.BaseRegs.empty()) + continue; (void)InsertFormula(LU, LUIdx, F); } } @@ -3624,7 +3800,12 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // TODO: Use a more targeted data structure. for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) { - const Formula &F = LU.Formulae[L]; + Formula F = LU.Formulae[L]; + // FIXME: The code for the scaled and unscaled registers looks + // very similar but slightly different. Investigate if they + // could be merged. That way, we would not have to unscale the + // Formula. + F.Unscale(); // Use the immediate in the scaled register. if (F.ScaledReg == OrigReg) { int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale; @@ -3650,6 +3831,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { continue; // OK, looks good. + NewF.Canonicalize(); (void)InsertFormula(LU, LUIdx, NewF); } else { // Use the immediate in a base register. @@ -3683,6 +3865,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { goto skip_formula; // Ok, looks good. + NewF.Canonicalize(); (void)InsertFormula(LU, LUIdx, NewF); break; skip_formula:; @@ -3936,7 +4119,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), E = LU.Formulae.end(); I != E; ++I) { const Formula &F = *I; - if (F.BaseOffset == 0 || F.Scale != 0) + if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1)) continue; LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU); @@ -4033,7 +4216,7 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { // Pick the register which is used by the most LSRUses, which is likely // to be a good reuse register candidate. - const SCEV *Best = 0; + const SCEV *Best = nullptr; unsigned BestNum = 0; for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end(); I != E; ++I) { @@ -4130,19 +4313,22 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution, E = LU.Formulae.end(); I != E; ++I) { const Formula &F = *I; - // Ignore formulae which do not use any of the required registers. - bool SatisfiedReqReg = true; + // Ignore formulae which may not be ideal in terms of register reuse of + // ReqRegs. The formula should use all required registers before + // introducing new ones. + int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size()); for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(), JE = ReqRegs.end(); J != JE; ++J) { const SCEV *Reg = *J; - if ((!F.ScaledReg || F.ScaledReg != Reg) && - std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) == + if ((F.ScaledReg && F.ScaledReg == Reg) || + std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) != F.BaseRegs.end()) { - SatisfiedReqReg = false; - break; + --NumReqRegsToFind; + if (NumReqRegsToFind == 0) + break; } } - if (!SatisfiedReqReg) { + if (NumReqRegsToFind != 0) { // If none of the formulae satisfied the required registers, then we could // clear ReqRegs and try again. Currently, we simply give up in this case. continue; @@ -4240,7 +4426,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, } bool AllDominate = true; - Instruction *BetterPos = 0; + Instruction *BetterPos = nullptr; Instruction *Tentative = IDom->getTerminator(); for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(), E = Inputs.end(); I != E; ++I) { @@ -4379,11 +4565,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF, LF.UserInst, LF.OperandValToReplace, Loops, SE, DT); - Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP))); + Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, IP))); } // Expand the ScaledReg portion. - Value *ICmpScaledV = 0; + Value *ICmpScaledV = nullptr; if (F.Scale != 0) { const SCEV *ScaledS = F.ScaledReg; @@ -4394,25 +4580,34 @@ Value *LSRInstance::Expand(const LSRFixup &LF, Loops, SE, DT); if (LU.Kind == LSRUse::ICmpZero) { - // An interesting way of "folding" with an icmp is to use a negated - // scale, which we'll implement by inserting it into the other operand - // of the icmp. - assert(F.Scale == -1 && - "The only scale supported by ICmpZero uses is -1!"); - ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP); + // Expand ScaleReg as if it was part of the base regs. + if (F.Scale == 1) + Ops.push_back( + SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP))); + else { + // An interesting way of "folding" with an icmp is to use a negated + // scale, which we'll implement by inserting it into the other operand + // of the icmp. + assert(F.Scale == -1 && + "The only scale supported by ICmpZero uses is -1!"); + ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP); + } } else { // Otherwise just expand the scaled register and an explicit scale, // which is expected to be matched as part of the address. // Flush the operand list to suppress SCEVExpander hoisting address modes. - if (!Ops.empty() && LU.Kind == LSRUse::Address) { + // Unless the addressing mode will not be folded. + if (!Ops.empty() && LU.Kind == LSRUse::Address && + isAMCompletelyFolded(TTI, LU, F)) { Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } - ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP)); - ScaledS = SE.getMulExpr(ScaledS, - SE.getConstant(ScaledS->getType(), F.Scale)); + ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)); + if (F.Scale != 1) + ScaledS = + SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale)); Ops.push_back(ScaledS); } } @@ -4490,7 +4685,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF, } CI->setOperand(1, ICmpScaledV); } else { - assert(F.Scale == 0 && + // A scale of 1 means that the scale has been expanded as part of the + // base regs. + assert((F.Scale == 0 || F.Scale == 1) && "ICmp does not support folding a global value and " "a scale at the same time!"); Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy), @@ -4531,7 +4728,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN, Loop *PNLoop = LI.getLoopFor(Parent); if (!PNLoop || Parent != PNLoop->getHeader()) { // Split the critical edge. - BasicBlock *NewBB = 0; + BasicBlock *NewBB = nullptr; if (!Parent->isLandingPad()) { NewBB = SplitCriticalEdge(BB, Parent, P, /*MergeIdenticalEdges=*/true, @@ -4560,7 +4757,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN, } std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair = - Inserted.insert(std::make_pair(BB, static_cast<Value *>(0))); + Inserted.insert(std::make_pair(BB, static_cast<Value *>(nullptr))); if (!Pair.second) PN->setIncomingValue(i, Pair.first->second); else { @@ -4670,7 +4867,7 @@ LSRInstance::LSRInstance(Loop *L, Pass *P) DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()), LI(P->getAnalysis<LoopInfo>()), TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false), - IVIncInsertPos(0) { + IVIncInsertPos(nullptr) { // If LoopSimplify form is not available, stay out of trouble. if (!L->isLoopSimplifyForm()) return; diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index ecd350b..fc28fd2 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -12,7 +12,6 @@ // counts of loops easily. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-unroll" #include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/LoopPass.h" @@ -29,6 +28,8 @@ using namespace llvm; +#define DEBUG_TYPE "loop-unroll" + static cl::opt<unsigned> UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden, cl::desc("The cut-off point for automatic loop unrolling")); @@ -237,9 +238,12 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { return false; } uint64_t Size = (uint64_t)LoopSize*Count; - if (TripCount != 1 && Size > Threshold) { - DEBUG(dbgs() << " Too large to fully unroll with count: " << Count - << " because size: " << Size << ">" << Threshold << "\n"); + if (TripCount != 1 && + (Size > Threshold || (Count != TripCount && Size > PartialThreshold))) { + if (Size > Threshold) + DEBUG(dbgs() << " Too large to fully unroll with count: " << Count + << " because size: " << Size << ">" << Threshold << "\n"); + bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial; if (!AllowPartial && !(Runtime && TripCount == 0)) { DEBUG(dbgs() << " will not try to unroll partially because " diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 5954f4a..977c53a 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -26,7 +26,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-unswitch" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -53,6 +52,8 @@ #include <set> using namespace llvm; +#define DEBUG_TYPE "loop-unswitch" + STATISTIC(NumBranches, "Number of branches unswitched"); STATISTIC(NumSwitches, "Number of switches unswitched"); STATISTIC(NumSelects , "Number of selects unswitched"); @@ -96,7 +97,7 @@ namespace { public: LUAnalysisCache() : - CurLoopInstructions(0), CurrentLoopProperties(0), + CurLoopInstructions(nullptr), CurrentLoopProperties(nullptr), MaxSize(Threshold) {} @@ -151,8 +152,8 @@ namespace { static char ID; // Pass ID, replacement for typeid explicit LoopUnswitch(bool Os = false) : LoopPass(ID), OptimizeForSize(Os), redoLoop(false), - currentLoop(0), DT(0), loopHeader(0), - loopPreheader(0) { + currentLoop(nullptr), DT(nullptr), loopHeader(nullptr), + loopPreheader(nullptr) { initializeLoopUnswitchPass(*PassRegistry::getPassRegistry()); } @@ -180,15 +181,6 @@ namespace { BranchesInfo.forgetLoop(currentLoop); } - /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist, - /// remove it. - void RemoveLoopFromWorklist(Loop *L) { - std::vector<Loop*>::iterator I = std::find(LoopProcessWorklist.begin(), - LoopProcessWorklist.end(), L); - if (I != LoopProcessWorklist.end()) - LoopProcessWorklist.erase(I); - } - void initLoopData() { loopHeader = currentLoop->getHeader(); loopPreheader = currentLoop->getLoopPreheader(); @@ -212,9 +204,8 @@ namespace { Instruction *InsertPt); void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L); - void RemoveLoopFromHierarchy(Loop *L); - bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0, - BasicBlock **LoopExit = 0); + bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = nullptr, + BasicBlock **LoopExit = nullptr); }; } @@ -283,8 +274,8 @@ void LUAnalysisCache::forgetLoop(const Loop *L) { LoopsProperties.erase(LIt); } - CurrentLoopProperties = 0; - CurLoopInstructions = 0; + CurrentLoopProperties = nullptr; + CurLoopInstructions = nullptr; } // Mark case value as unswitched. @@ -355,10 +346,10 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { // We can never unswitch on vector conditions. if (Cond->getType()->isVectorTy()) - return 0; + return nullptr; // Constants should be folded, not unswitched on! - if (isa<Constant>(Cond)) return 0; + if (isa<Constant>(Cond)) return nullptr; // TODO: Handle: br (VARIANT|INVARIANT). @@ -378,7 +369,7 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { return RHS; } - return 0; + return nullptr; } bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { @@ -389,7 +380,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { LPM = &LPM_Ref; DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DT = DTWP ? &DTWP->getDomTree() : 0; + DT = DTWP ? &DTWP->getDomTree() : nullptr; currentLoop = L; Function *F = currentLoop->getHeader()->getParent(); bool Changed = false; @@ -461,7 +452,7 @@ bool LoopUnswitch::processCurrentLoop() { // Find a value to unswitch on: // FIXME: this should chose the most expensive case! // FIXME: scan for a case with a non-critical edge? - Constant *UnswitchVal = 0; + Constant *UnswitchVal = nullptr; // Do not process same value again and again. // At this point we have some cases already unswitched and @@ -518,7 +509,7 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, if (!L->contains(BB)) { // Otherwise, this is a loop exit, this is fine so long as this is the // first exit. - if (ExitBB != 0) return false; + if (ExitBB) return false; ExitBB = BB; return true; } @@ -545,10 +536,10 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) { std::set<BasicBlock*> Visited; Visited.insert(L->getHeader()); // Branches to header make infinite loops. - BasicBlock *ExitBB = 0; + BasicBlock *ExitBB = nullptr; if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited)) return ExitBB; - return 0; + return nullptr; } /// IsTrivialUnswitchCondition - Check to see if this unswitch condition is @@ -569,7 +560,7 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, TerminatorInst *HeaderTerm = Header->getTerminator(); LLVMContext &Context = Header->getContext(); - BasicBlock *LoopExitBB = 0; + BasicBlock *LoopExitBB = nullptr; if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) { // If the header block doesn't end with a conditional branch on Cond, we // can't handle it. @@ -639,8 +630,8 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, /// unswitch the loop, reprocess the pieces, then return true. bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { Function *F = loopHeader->getParent(); - Constant *CondVal = 0; - BasicBlock *ExitBlock = 0; + Constant *CondVal = nullptr; + BasicBlock *ExitBlock = nullptr; if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) { // If the condition is trivial, always unswitch. There is no code growth @@ -948,17 +939,6 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V, ++NumSimplify; } -/// RemoveLoopFromHierarchy - We have discovered that the specified loop has -/// become unwrapped, either because the backedge was deleted, or because the -/// edge into the header was removed. If the edge into the header from the -/// latch block was removed, the loop is unwrapped but subloops are still alive, -/// so they just reparent loops. If the loops are actually dead, they will be -/// removed later. -void LoopUnswitch::RemoveLoopFromHierarchy(Loop *L) { - LPM->deleteLoopFromQueue(L); - RemoveLoopFromWorklist(L); -} - // RewriteLoopBodyWithConditionConstant - We know either that the value LIC has // the value specified by Val in the specified loop, or we know it does NOT have // that value. Rewrite any uses of LIC or of properties correlated to it. @@ -1020,7 +1000,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // If we know that LIC is not Val, use this info to simplify code. SwitchInst *SI = dyn_cast<SwitchInst>(UI); - if (SI == 0 || !isa<ConstantInt>(Val)) continue; + if (!SI || !isa<ConstantInt>(Val)) continue; SwitchInst::CaseIt DeadCase = SI->findCaseValue(cast<ConstantInt>(Val)); // Default case is live for multiple values. diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp index 7c0a623..4251ac4 100644 --- a/lib/Transforms/Scalar/LowerAtomic.cpp +++ b/lib/Transforms/Scalar/LowerAtomic.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loweratomic" #include "llvm/Transforms/Scalar.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -20,6 +19,8 @@ #include "llvm/Pass.h" using namespace llvm; +#define DEBUG_TYPE "loweratomic" + static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) { IRBuilder<> Builder(CXI->getParent(), CXI); Value *Ptr = CXI->getPointerOperand(); @@ -42,7 +43,7 @@ static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) { Value *Val = RMWI->getValOperand(); LoadInst *Orig = Builder.CreateLoad(Ptr); - Value *Res = NULL; + Value *Res = nullptr; switch (RMWI->getOperation()) { default: llvm_unreachable("Unexpected RMW operation"); diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 2603c96..b6bc792 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "memcpyopt" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -33,6 +32,8 @@ #include <list> using namespace llvm; +#define DEBUG_TYPE "memcpyopt" + STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); STATISTIC(NumMemSetInfer, "Number of memsets inferred"); STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); @@ -49,7 +50,7 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, int64_t Offset = 0; for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i)); - if (OpC == 0) + if (!OpC) return VariableIdxFound = true; if (OpC->isZero()) continue; // No offset. @@ -89,12 +90,12 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, // If one pointer is a GEP and the other isn't, then see if the GEP is a // constant offset from the base, as in "P" and "gep P, 1". - if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) { + if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) { Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD); return !VariableIdxFound; } - if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) { + if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) { Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD); return !VariableIdxFound; } @@ -317,9 +318,9 @@ namespace { static char ID; // Pass identification, replacement for typeid MemCpyOpt() : FunctionPass(ID) { initializeMemCpyOptPass(*PassRegistry::getPassRegistry()); - MD = 0; - TLI = 0; - DL = 0; + MD = nullptr; + TLI = nullptr; + DL = nullptr; } bool runOnFunction(Function &F) override; @@ -373,7 +374,7 @@ INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization", /// attempts to merge them together into a memcpy/memset. Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, Value *StartPtr, Value *ByteVal) { - if (DL == 0) return 0; + if (!DL) return nullptr; // Okay, so we now have a single store that can be splatable. Scan to find // all subsequent stores of the same value to offset from the same pointer. @@ -426,7 +427,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, // If we have no ranges, then we just had a single store with nothing that // could be merged in. This is a very common case of course. if (Ranges.empty()) - return 0; + return nullptr; // If we had at least one store that could be merged in, add the starting // store as well. We try to avoid this unless there is at least something @@ -440,7 +441,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, // Now that we have full information about ranges, loop over the ranges and // emit memset's for anything big enough to be worthwhile. - Instruction *AMemSet = 0; + Instruction *AMemSet = nullptr; for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MemsetRange &Range = *I; @@ -491,7 +492,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (!SI->isSimple()) return false; - if (DL == 0) return false; + if (!DL) return false; // Detect cases where we're performing call slot forwarding, but // happen to be using a load-store pair to implement it, rather than @@ -500,7 +501,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (LI->isSimple() && LI->hasOneUse() && LI->getParent() == SI->getParent()) { MemDepResult ldep = MD->getDependency(LI); - CallInst *C = 0; + CallInst *C = nullptr; if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) C = dyn_cast<CallInst>(ldep.getInst()); @@ -512,7 +513,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { for (BasicBlock::iterator I = --BasicBlock::iterator(SI), E = C; I != E; --I) { if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) { - C = 0; + C = nullptr; break; } } @@ -603,7 +604,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, return false; // Check that all of src is copied to dest. - if (DL == 0) return false; + if (!DL) return false; ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); if (!srcArraySize) @@ -846,7 +847,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { // The optimizations after this point require the memcpy size. ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength()); - if (CopySize == 0) return false; + if (!CopySize) return false; // The are three possible optimizations we can do for memcpy: // a) memcpy-memcpy xform which exposes redundance for DSE. @@ -929,7 +930,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { /// processByValArgument - This is called on every byval argument in call sites. bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { - if (DL == 0) return false; + if (!DL) return false; // Find out what feeds this byval argument. Value *ByValArg = CS.getArgument(ArgNo); @@ -946,13 +947,13 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { // a memcpy, see if we can byval from the source of the memcpy instead of the // result. MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()); - if (MDep == 0 || MDep->isVolatile() || + if (!MDep || MDep->isVolatile() || ByValArg->stripPointerCasts() != MDep->getDest()) return false; // The length of the memcpy must be larger or equal to the size of the byval. ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength()); - if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize) + if (!C1 || C1->getValue().getZExtValue() < ByValSize) return false; // Get the alignment of the byval. If the call doesn't specify the alignment, @@ -1043,7 +1044,7 @@ bool MemCpyOpt::runOnFunction(Function &F) { bool MadeChange = false; MD = &getAnalysis<MemoryDependenceAnalysis>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfo>(); // If we don't have at least memset and memcpy, there is little point of doing @@ -1058,6 +1059,6 @@ bool MemCpyOpt::runOnFunction(Function &F) { MadeChange = true; } - MD = 0; + MD = nullptr; return MadeChange; } diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp index 2f19935..7cce89e 100644 --- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp +++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "partially-inline-libcalls" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Intrinsics.h" @@ -25,6 +24,8 @@ using namespace llvm; +#define DEBUG_TYPE "partially-inline-libcalls" + namespace { class PartiallyInlineLibCalls : public FunctionPass { public: diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index b6b4d97..986d6a4 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -20,7 +20,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "reassociate" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" @@ -42,6 +41,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "reassociate" + STATISTIC(NumChanged, "Number of insts reassociated"); STATISTIC(NumAnnihil, "Number of expr tree annihilated"); STATISTIC(NumFactor , "Number of multiplies factored"); @@ -122,14 +123,14 @@ namespace { public: XorOpnd(Value *V); - bool isInvalid() const { return SymbolicPart == 0; } + bool isInvalid() const { return SymbolicPart == nullptr; } bool isOrExpr() const { return isOr; } Value *getValue() const { return OrigVal; } Value *getSymbolicPart() const { return SymbolicPart; } unsigned getSymbolicRank() const { return SymbolicRank; } const APInt &getConstPart() const { return ConstPart; } - void Invalidate() { SymbolicPart = OrigVal = 0; } + void Invalidate() { SymbolicPart = OrigVal = nullptr; } void setSymbolicRank(unsigned R) { SymbolicRank = R; } // Sort the XorOpnd-Pointer in ascending order of symbolic-value-rank. @@ -236,7 +237,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { if (V->hasOneUse() && isa<Instruction>(V) && cast<Instruction>(V)->getOpcode() == Opcode) return cast<BinaryOperator>(V); - return 0; + return nullptr; } static bool isUnmovableInstruction(Instruction *I) { @@ -284,7 +285,7 @@ void Reassociate::BuildRankMap(Function &F) { unsigned Reassociate::getRank(Value *V) { Instruction *I = dyn_cast<Instruction>(V); - if (I == 0) { + if (!I) { if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument. return 0; // Otherwise it's a global or constant, rank 0. } @@ -705,7 +706,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, // ExpressionChanged - Non-null if the rewritten expression differs from the // original in some non-trivial way, requiring the clearing of optional flags. // Flags are cleared from the operator in ExpressionChanged up to I inclusive. - BinaryOperator *ExpressionChanged = 0; + BinaryOperator *ExpressionChanged = nullptr; for (unsigned i = 0; ; ++i) { // The last operation (which comes earliest in the IR) is special as both // operands will come from Ops, rather than just one with the other being @@ -995,7 +996,7 @@ static Value *EmitAddTreeOfValues(Instruction *I, /// remove Factor from the tree and return the new tree. Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { BinaryOperator *BO = isReassociableOp(V, Instruction::Mul); - if (!BO) return 0; + if (!BO) return nullptr; SmallVector<RepeatedValue, 8> Tree; MadeChange |= LinearizeExprTree(BO, Tree); @@ -1029,7 +1030,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { if (!FoundFactor) { // Make sure to restore the operands to the expression tree. RewriteExprTree(BO, Factors); - return 0; + return nullptr; } BasicBlock::iterator InsertPt = BO; ++InsertPt; @@ -1114,7 +1115,7 @@ static Value *OptimizeAndOrXor(unsigned Opcode, ++NumAnnihil; } } - return 0; + return nullptr; } /// Helper funciton of CombineXorOpnd(). It creates a bitwise-and @@ -1135,7 +1136,7 @@ static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd, } return Opnd; } - return 0; + return nullptr; } // Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd" @@ -1261,7 +1262,7 @@ Value *Reassociate::OptimizeXor(Instruction *I, return V; if (Ops.size() == 1) - return 0; + return nullptr; SmallVector<XorOpnd, 8> Opnds; SmallVector<XorOpnd*, 8> OpndPtrs; @@ -1294,7 +1295,7 @@ Value *Reassociate::OptimizeXor(Instruction *I, std::stable_sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor()); // Step 3: Combine adjacent operands - XorOpnd *PrevOpnd = 0; + XorOpnd *PrevOpnd = nullptr; bool Changed = false; for (unsigned i = 0, e = Opnds.size(); i < e; i++) { XorOpnd *CurrOpnd = OpndPtrs[i]; @@ -1328,7 +1329,7 @@ Value *Reassociate::OptimizeXor(Instruction *I, PrevOpnd = CurrOpnd; } else { CurrOpnd->Invalidate(); - PrevOpnd = 0; + PrevOpnd = nullptr; } Changed = true; } @@ -1358,7 +1359,7 @@ Value *Reassociate::OptimizeXor(Instruction *I, } } - return 0; + return nullptr; } /// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This @@ -1445,7 +1446,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4) // where they are actually the same multiply. unsigned MaxOcc = 0; - Value *MaxOccVal = 0; + Value *MaxOccVal = nullptr; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul); if (!BOp) @@ -1543,7 +1544,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I, Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2)); } - return 0; + return nullptr; } /// \brief Build up a vector of value/power pairs factoring a product. @@ -1688,14 +1689,14 @@ Value *Reassociate::OptimizeMul(BinaryOperator *I, // We can only optimize the multiplies when there is a chain of more than // three, such that a balanced tree might require fewer total multiplies. if (Ops.size() < 4) - return 0; + return nullptr; // Try to turn linear trees of multiplies without other uses of the // intermediate stages into minimal multiply DAGs with perfect sub-expression // re-use. SmallVector<Factor, 4> Factors; if (!collectMultiplyFactors(Ops, Factors)) - return 0; // All distinct factors, so nothing left for us to do. + return nullptr; // All distinct factors, so nothing left for us to do. IRBuilder<> Builder(I); Value *V = buildMinimalMultiplyDAG(Builder, Factors); @@ -1704,14 +1705,14 @@ Value *Reassociate::OptimizeMul(BinaryOperator *I, ValueEntry NewEntry = ValueEntry(getRank(V), V); Ops.insert(std::lower_bound(Ops.begin(), Ops.end(), NewEntry), NewEntry); - return 0; + return nullptr; } Value *Reassociate::OptimizeExpression(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops) { // Now that we have the linearized expression tree, try to optimize it. // Start by folding any constants that we found. - Constant *Cst = 0; + Constant *Cst = nullptr; unsigned Opcode = I->getOpcode(); while (!Ops.empty() && isa<Constant>(Ops.back().Op)) { Constant *C = cast<Constant>(Ops.pop_back_val().Op); @@ -1761,7 +1762,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, if (Ops.size() != NumOps) return OptimizeExpression(I, Ops); - return 0; + return nullptr; } /// EraseInst - Zap the given instruction, adding interesting operands to the diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index d9809ce..b6023e2 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "reg2mem" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/BasicBlock.h" @@ -30,6 +29,8 @@ #include <list> using namespace llvm; +#define DEBUG_TYPE "reg2mem" + STATISTIC(NumRegsDemoted, "Number of registers demoted"); STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted"); diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index b8f10e9..feeb231 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -17,7 +17,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sccp" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -42,6 +41,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "sccp" + STATISTIC(NumInstRemoved, "Number of instructions removed"); STATISTIC(NumDeadBlocks , "Number of basic blocks unreachable"); @@ -81,7 +82,7 @@ class LatticeVal { } public: - LatticeVal() : Val(0, undefined) {} + LatticeVal() : Val(nullptr, undefined) {} bool isUndefined() const { return getLatticeValue() == undefined; } bool isConstant() const { @@ -133,7 +134,7 @@ public: ConstantInt *getConstantInt() const { if (isConstant()) return dyn_cast<ConstantInt>(getConstant()); - return 0; + return nullptr; } void markForcedConstant(Constant *V) { @@ -403,7 +404,7 @@ private: if (Constant *C = dyn_cast<Constant>(V)) { Constant *Elt = C->getAggregateElement(i); - if (Elt == 0) + if (!Elt) LV.markOverdefined(); // Unknown sort of constant. else if (isa<UndefValue>(Elt)) ; // Undef values remain undefined. @@ -522,7 +523,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI, LatticeVal BCValue = getValueState(BI->getCondition()); ConstantInt *CI = BCValue.getConstantInt(); - if (CI == 0) { + if (!CI) { // Overdefined condition variables, and branches on unfoldable constant // conditions, mean the branch could go either way. if (!BCValue.isUndefined()) @@ -549,7 +550,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI, LatticeVal SCValue = getValueState(SI->getCondition()); ConstantInt *CI = SCValue.getConstantInt(); - if (CI == 0) { // Overdefined or undefined condition? + if (!CI) { // Overdefined or undefined condition? // All destinations are executable! if (!SCValue.isUndefined()) Succs.assign(TI.getNumSuccessors(), true); @@ -594,7 +595,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { // Overdefined condition variables mean the branch could go either way, // undef conditions mean that neither edge is feasible yet. ConstantInt *CI = BCValue.getConstantInt(); - if (CI == 0) + if (!CI) return !BCValue.isUndefined(); // Constant condition variables mean the branch can only go a single way. @@ -612,7 +613,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { LatticeVal SCValue = getValueState(SI->getCondition()); ConstantInt *CI = SCValue.getConstantInt(); - if (CI == 0) + if (!CI) return !SCValue.isUndefined(); return SI->findCaseValue(CI).getCaseSuccessor() == To; @@ -626,7 +627,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { #ifndef NDEBUG dbgs() << "Unknown terminator instruction: " << *TI << '\n'; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } // visit Implementations - Something changed in this instruction, either an @@ -667,7 +668,7 @@ void SCCPSolver::visitPHINode(PHINode &PN) { // constant. If they are constant and don't agree, the PHI is overdefined. // If there are no executable operands, the PHI remains undefined. // - Constant *OperandVal = 0; + Constant *OperandVal = nullptr; for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { LatticeVal IV = getValueState(PN.getIncomingValue(i)); if (IV.isUndefined()) continue; // Doesn't influence PHI node. @@ -678,7 +679,7 @@ void SCCPSolver::visitPHINode(PHINode &PN) { if (IV.isOverdefined()) // PHI node becomes overdefined! return markOverdefined(&PN); - if (OperandVal == 0) { // Grab the first value. + if (!OperandVal) { // Grab the first value. OperandVal = IV.getConstant(); continue; } @@ -774,7 +775,7 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) { void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) { StructType *STy = dyn_cast<StructType>(IVI.getType()); - if (STy == 0) + if (!STy) return markOverdefined(&IVI); // If this has more than one index, we can't handle it, drive all results to @@ -862,7 +863,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) { // If this is an AND or OR with 0 or -1, it doesn't matter that the other // operand is overdefined. if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Or) { - LatticeVal *NonOverdefVal = 0; + LatticeVal *NonOverdefVal = nullptr; if (!V1State.isOverdefined()) NonOverdefVal = &V1State; else if (!V2State.isOverdefined()) @@ -1081,7 +1082,7 @@ void SCCPSolver::visitCallSite(CallSite CS) { // The common case is that we aren't tracking the callee, either because we // are not doing interprocedural analysis or the callee is indirect, or is // external. Handle these cases first. - if (F == 0 || F->isDeclaration()) { + if (!F || F->isDeclaration()) { CallOverdefined: // Void return and not tracking callee, just bail. if (I->getType()->isVoidTy()) return; @@ -1555,7 +1556,7 @@ bool SCCP::runOnFunction(Function &F) { DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n"); const DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0; + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); SCCPSolver Solver(DL, TLI); @@ -1684,7 +1685,7 @@ static bool AddressIsTaken(const GlobalValue *GV) { bool IPSCCP::runOnModule(Module &M) { DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0; + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); SCCPSolver Solver(DL, TLI); diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index ed5e618..04bf4f8 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -23,7 +23,6 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sroa" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -64,6 +63,8 @@ using namespace llvm; +#define DEBUG_TYPE "sroa" + STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement"); STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed"); STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca"); @@ -159,8 +160,8 @@ public: Use *getUse() const { return UseAndIsSplittable.getPointer(); } - bool isDead() const { return getUse() == 0; } - void kill() { UseAndIsSplittable.setPointer(0); } + bool isDead() const { return getUse() == nullptr; } + void kill() { UseAndIsSplittable.setPointer(nullptr); } /// \brief Support for ordering ranges. /// @@ -320,7 +321,7 @@ static Value *foldSelectInst(SelectInst &SI) { if (SI.getOperand(1) == SI.getOperand(2)) return SI.getOperand(1); - return 0; + return nullptr; } /// \brief Builder for the alloca slices. @@ -642,7 +643,7 @@ private: Uses.push_back(std::make_pair(I, cast<Instruction>(U))); } while (!Uses.empty()); - return 0; + return nullptr; } void visitPHINode(PHINode &PN) { @@ -724,7 +725,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) AI(AI), #endif - PointerEscapingInstr(0) { + PointerEscapingInstr(nullptr) { SliceBuilder PB(DL, AI, *this); SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI); if (PtrI.isEscaped() || PtrI.isAborted()) { @@ -873,7 +874,7 @@ public: for (SmallVectorImpl<DbgValueInst *>::const_iterator I = DVIs.begin(), E = DVIs.end(); I != E; ++I) { DbgValueInst *DVI = *I; - Value *Arg = 0; + Value *Arg = nullptr; if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. @@ -969,7 +970,7 @@ class SROA : public FunctionPass { public: SROA(bool RequiresDomTree = true) : FunctionPass(ID), RequiresDomTree(RequiresDomTree), - C(0), DL(0), DT(0) { + C(nullptr), DL(nullptr), DT(nullptr) { initializeSROAPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; @@ -1011,9 +1012,9 @@ INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates", static Type *findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset) { - Type *Ty = 0; + Type *Ty = nullptr; bool TyIsCommon = true; - IntegerType *ITy = 0; + IntegerType *ITy = nullptr; // Note that we need to look at *every* alloca slice's Use to ensure we // always get consistent results regardless of the order of slices. @@ -1024,7 +1025,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B, if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset) continue; - Type *UserTy = 0; + Type *UserTy = nullptr; if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { UserTy = LI->getType(); } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { @@ -1074,7 +1075,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B, /// FIXME: This should be hoisted into a generic utility, likely in /// Transforms/Util/Local.h static bool isSafePHIToSpeculate(PHINode &PN, - const DataLayout *DL = 0) { + const DataLayout *DL = nullptr) { // For now, we can only do this promotion if the load is in the same block // as the PHI, and if there are no stores between the phi and load. // TODO: Allow recursive phi users. @@ -1084,7 +1085,7 @@ static bool isSafePHIToSpeculate(PHINode &PN, bool HaveLoad = false; for (User *U : PN.users()) { LoadInst *LI = dyn_cast<LoadInst>(U); - if (LI == 0 || !LI->isSimple()) + if (!LI || !LI->isSimple()) return false; // For now we only allow loads in the same block as the PHI. This is @@ -1191,7 +1192,8 @@ static void speculatePHINodeLoads(PHINode &PN) { /// /// We can do this to a select if its only uses are loads and if the operand /// to the select can be loaded unconditionally. -static bool isSafeSelectToSpeculate(SelectInst &SI, const DataLayout *DL = 0) { +static bool isSafeSelectToSpeculate(SelectInst &SI, + const DataLayout *DL = nullptr) { Value *TValue = SI.getTrueValue(); Value *FValue = SI.getFalseValue(); bool TDerefable = TValue->isDereferenceablePointer(); @@ -1199,7 +1201,7 @@ static bool isSafeSelectToSpeculate(SelectInst &SI, const DataLayout *DL = 0) { for (User *U : SI.users()) { LoadInst *LI = dyn_cast<LoadInst>(U); - if (LI == 0 || !LI->isSimple()) + if (!LI || !LI->isSimple()) return false; // Both operands to the select need to be dereferencable, either @@ -1332,19 +1334,21 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, // We can't recurse through pointer types. if (Ty->isPointerTy()) - return 0; + return nullptr; // We try to analyze GEPs over vectors here, but note that these GEPs are // extremely poorly defined currently. The long-term goal is to remove GEPing // over a vector from the IR completely. if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) { unsigned ElementSizeInBits = DL.getTypeSizeInBits(VecTy->getScalarType()); - if (ElementSizeInBits % 8) - return 0; // GEPs over non-multiple of 8 size vector elements are invalid. + if (ElementSizeInBits % 8 != 0) { + // GEPs over non-multiple of 8 size vector elements are invalid. + return nullptr; + } APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8); APInt NumSkippedElements = Offset.sdiv(ElementSize); if (NumSkippedElements.ugt(VecTy->getNumElements())) - return 0; + return nullptr; Offset -= NumSkippedElements * ElementSize; Indices.push_back(IRB.getInt(NumSkippedElements)); return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(), @@ -1356,7 +1360,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy)); APInt NumSkippedElements = Offset.sdiv(ElementSize); if (NumSkippedElements.ugt(ArrTy->getNumElements())) - return 0; + return nullptr; Offset -= NumSkippedElements * ElementSize; Indices.push_back(IRB.getInt(NumSkippedElements)); @@ -1366,17 +1370,17 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, StructType *STy = dyn_cast<StructType>(Ty); if (!STy) - return 0; + return nullptr; const StructLayout *SL = DL.getStructLayout(STy); uint64_t StructOffset = Offset.getZExtValue(); if (StructOffset >= SL->getSizeInBytes()) - return 0; + return nullptr; unsigned Index = SL->getElementContainingOffset(StructOffset); Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index)); Type *ElementTy = STy->getElementType(Index); if (Offset.uge(DL.getTypeAllocSize(ElementTy))) - return 0; // The offset points into alignment padding. + return nullptr; // The offset points into alignment padding. Indices.push_back(IRB.getInt32(Index)); return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy, @@ -1402,14 +1406,14 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL, // Don't consider any GEPs through an i8* as natural unless the TargetTy is // an i8. if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8)) - return 0; + return nullptr; Type *ElementTy = Ty->getElementType(); if (!ElementTy->isSized()) - return 0; // We can't GEP through an unsized element. + return nullptr; // We can't GEP through an unsized element. APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy)); if (ElementSize == 0) - return 0; // Zero-length arrays can't help us build a natural GEP. + return nullptr; // Zero-length arrays can't help us build a natural GEP. APInt NumSkippedElements = Offset.sdiv(ElementSize); Offset -= NumSkippedElements * ElementSize; @@ -1445,11 +1449,11 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, // We may end up computing an offset pointer that has the wrong type. If we // never are able to compute one directly that has the correct type, we'll // fall back to it, so keep it around here. - Value *OffsetPtr = 0; + Value *OffsetPtr = nullptr; // Remember any i8 pointer we come across to re-use if we need to do a raw // byte offset. - Value *Int8Ptr = 0; + Value *Int8Ptr = nullptr; APInt Int8PtrOffset(Offset.getBitWidth(), 0); Type *TargetTy = PointerTy->getPointerElementType(); @@ -2043,14 +2047,14 @@ public: NewAllocaBeginOffset(NewAllocaBeginOffset), NewAllocaEndOffset(NewAllocaEndOffset), NewAllocaTy(NewAI.getAllocatedType()), - VecTy(IsVectorPromotable ? cast<VectorType>(NewAllocaTy) : 0), - ElementTy(VecTy ? VecTy->getElementType() : 0), + VecTy(IsVectorPromotable ? cast<VectorType>(NewAllocaTy) : nullptr), + ElementTy(VecTy ? VecTy->getElementType() : nullptr), ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0), IntTy(IsIntegerPromotable ? Type::getIntNTy( NewAI.getContext(), DL.getTypeSizeInBits(NewAI.getAllocatedType())) - : 0), + : nullptr), BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(), OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers), IRB(NewAI.getContext(), ConstantFolder()) { @@ -2144,7 +2148,7 @@ private: /// /// You can optionally pass a type to this routine and if that type's ABI /// alignment is itself suitable, this will return zero. - unsigned getSliceAlign(Type *Ty = 0) { + unsigned getSliceAlign(Type *Ty = nullptr) { unsigned NewAIAlign = NewAI.getAlignment(); if (!NewAIAlign) NewAIAlign = DL.getABITypeAlignment(NewAI.getAllocatedType()); @@ -2594,7 +2598,7 @@ private: unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0; unsigned NumElements = EndIndex - BeginIndex; IntegerType *SubIntTy - = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0; + = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : nullptr; // Reset the other pointer type to match the register type we're going to // use, but using the address space of the original other pointer. @@ -2992,22 +2996,22 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, return stripAggregateTypeWrapping(DL, Ty); if (Offset > DL.getTypeAllocSize(Ty) || (DL.getTypeAllocSize(Ty) - Offset) < Size) - return 0; + return nullptr; if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) { // We can't partition pointers... if (SeqTy->isPointerTy()) - return 0; + return nullptr; Type *ElementTy = SeqTy->getElementType(); uint64_t ElementSize = DL.getTypeAllocSize(ElementTy); uint64_t NumSkippedElements = Offset / ElementSize; if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) { if (NumSkippedElements >= ArrTy->getNumElements()) - return 0; + return nullptr; } else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) { if (NumSkippedElements >= VecTy->getNumElements()) - return 0; + return nullptr; } Offset -= NumSkippedElements * ElementSize; @@ -3015,7 +3019,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, if (Offset > 0 || Size < ElementSize) { // Bail if the partition ends in a different array element. if ((Offset + Size) > ElementSize) - return 0; + return nullptr; // Recurse through the element type trying to peel off offset bytes. return getTypePartition(DL, ElementTy, Offset, Size); } @@ -3026,20 +3030,20 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, assert(Size > ElementSize); uint64_t NumElements = Size / ElementSize; if (NumElements * ElementSize != Size) - return 0; + return nullptr; return ArrayType::get(ElementTy, NumElements); } StructType *STy = dyn_cast<StructType>(Ty); if (!STy) - return 0; + return nullptr; const StructLayout *SL = DL.getStructLayout(STy); if (Offset >= SL->getSizeInBytes()) - return 0; + return nullptr; uint64_t EndOffset = Offset + Size; if (EndOffset > SL->getSizeInBytes()) - return 0; + return nullptr; unsigned Index = SL->getElementContainingOffset(Offset); Offset -= SL->getElementOffset(Index); @@ -3047,12 +3051,12 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, Type *ElementTy = STy->getElementType(Index); uint64_t ElementSize = DL.getTypeAllocSize(ElementTy); if (Offset >= ElementSize) - return 0; // The offset points into alignment padding. + return nullptr; // The offset points into alignment padding. // See if any partition must be contained by the element. if (Offset > 0 || Size < ElementSize) { if ((Offset + Size) > ElementSize) - return 0; + return nullptr; return getTypePartition(DL, ElementTy, Offset, Size); } assert(Offset == 0); @@ -3065,14 +3069,14 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, if (EndOffset < SL->getSizeInBytes()) { unsigned EndIndex = SL->getElementContainingOffset(EndOffset); if (Index == EndIndex) - return 0; // Within a single element and its padding. + return nullptr; // Within a single element and its padding. // Don't try to form "natural" types if the elements don't line up with the // expected size. // FIXME: We could potentially recurse down through the last element in the // sub-struct to find a natural end point. if (SL->getElementOffset(EndIndex) != EndOffset) - return 0; + return nullptr; assert(Index < EndIndex); EE = STy->element_begin() + EndIndex; @@ -3083,7 +3087,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, STy->isPacked()); const StructLayout *SubSL = DL.getStructLayout(SubTy); if (Size != SubSL->getSizeInBytes()) - return 0; // The sub-struct doesn't have quite the size needed. + return nullptr; // The sub-struct doesn't have quite the size needed. return SubTy; } @@ -3108,7 +3112,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S, // Try to compute a friendly type for this partition of the alloca. This // won't always succeed, in which case we fall back to a legal integer type // or an i8 array of an appropriate size. - Type *SliceTy = 0; + Type *SliceTy = nullptr; if (Type *CommonUseTy = findCommonType(B, E, EndOffset)) if (DL->getTypeAllocSize(CommonUseTy) >= SliceSize) SliceTy = CommonUseTy; @@ -3155,7 +3159,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S, // the alloca's alignment unconstrained. if (Alignment <= DL->getABITypeAlignment(SliceTy)) Alignment = 0; - NewAI = new AllocaInst(SliceTy, 0, Alignment, + NewAI = new AllocaInst(SliceTy, nullptr, Alignment, AI.getName() + ".sroa." + Twine(B - S.begin()), &AI); ++NumNewAllocas; } @@ -3494,7 +3498,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) { for (Use &Operand : I->operands()) if (Instruction *U = dyn_cast<Instruction>(Operand)) { // Zero out the operand and see if it becomes trivially dead. - Operand = 0; + Operand = nullptr; if (isInstructionTriviallyDead(U)) DeadInsts.insert(U); } @@ -3612,7 +3616,7 @@ bool SROA::runOnFunction(Function &F) { DL = &DLP->getDataLayout(); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DT = DTWP ? &DTWP->getDomTree() : 0; + DT = DTWP ? &DTWP->getDomTree() : nullptr; BasicBlock &EntryBB = F.getEntryBlock(); for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end()); diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp index 20d6daa..8e557aa 100644 --- a/lib/Transforms/Scalar/SampleProfile.cpp +++ b/lib/Transforms/Scalar/SampleProfile.cpp @@ -22,8 +22,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sample-profile" - #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -54,6 +52,8 @@ using namespace llvm; +#define DEBUG_TYPE "sample-profile" + // Command line option to specify the file to read samples from. This is // mainly used for debugging. static cl::opt<std::string> SampleProfileFile( @@ -120,8 +120,8 @@ typedef DenseMap<BasicBlock *, SmallVector<BasicBlock *, 8>> BlockEdgeMap; class SampleFunctionProfile { public: SampleFunctionProfile() - : TotalSamples(0), TotalHeadSamples(0), HeaderLineno(0), DT(0), PDT(0), - LI(0), Ctx(0) {} + : TotalSamples(0), TotalHeadSamples(0), HeaderLineno(0), DT(nullptr), + PDT(nullptr), LI(nullptr), Ctx(nullptr) {} unsigned getFunctionLoc(Function &F); bool emitAnnotations(Function &F, DominatorTree *DomTree, @@ -315,7 +315,7 @@ protected: /// \brief Name of the profile file to load. StringRef Filename; - /// \brief Flag indicating whether the profile input loaded succesfully. + /// \brief Flag indicating whether the profile input loaded successfully. bool ProfileIsValid; }; } diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index e950eba..f8f828c 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -64,6 +64,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeStructurizeCFGPass(Registry); initializeSinkingPass(Registry); initializeTailCallElimPass(Registry); + initializeSeparateConstOffsetFromGEPPass(Registry); } void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) { @@ -181,6 +182,7 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) { void LLVMAddVerifierPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createVerifierPass()); + // FIXME: should this also add createDebugInfoVerifierPass()? } void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) { diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index e7b5ab2..58192fc 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -19,7 +19,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "scalarrepl" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" @@ -52,6 +51,8 @@ #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; +#define DEBUG_TYPE "scalarrepl" + STATISTIC(NumReplaced, "Number of allocas broken up"); STATISTIC(NumPromoted, "Number of allocas promoted"); STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion"); @@ -304,7 +305,7 @@ public: explicit ConvertToScalarInfo(unsigned Size, const DataLayout &DL, unsigned SLT) : AllocaSize(Size), DL(DL), ScalarLoadThreshold(SLT), IsNotTrivial(false), - ScalarKind(Unknown), VectorTy(0), HadNonMemTransferAccess(false), + ScalarKind(Unknown), VectorTy(nullptr), HadNonMemTransferAccess(false), HadDynamicAccess(false) { } AllocaInst *TryConvert(AllocaInst *AI); @@ -332,8 +333,8 @@ private: AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { // If we can't convert this scalar, or if mem2reg can trivially do it, bail // out. - if (!CanConvertToScalar(AI, 0, 0) || !IsNotTrivial) - return 0; + if (!CanConvertToScalar(AI, 0, nullptr) || !IsNotTrivial) + return nullptr; // If an alloca has only memset / memcpy uses, it may still have an Unknown // ScalarKind. Treat it as an Integer below. @@ -361,23 +362,24 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { // Do not convert to scalar integer if the alloca size exceeds the // scalar load threshold. if (BitWidth > ScalarLoadThreshold) - return 0; + return nullptr; if ((ScalarKind == ImplicitVector || ScalarKind == Integer) && !HadNonMemTransferAccess && !DL.fitsInLegalInteger(BitWidth)) - return 0; + return nullptr; // Dynamic accesses on integers aren't yet supported. They need us to shift // by a dynamic amount which could be difficult to work out as we might not // know whether to use a left or right shift. if (ScalarKind == Integer && HadDynamicAccess) - return 0; + return nullptr; DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); // Create and insert the integer alloca. NewTy = IntegerType::get(AI->getContext(), BitWidth); } - AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); - ConvertUsesToScalar(AI, NewAI, 0, 0); + AllocaInst *NewAI = new AllocaInst(NewTy, nullptr, "", + AI->getParent()->begin()); + ConvertUsesToScalar(AI, NewAI, 0, nullptr); return NewAI; } @@ -508,7 +510,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset, // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); - Value *GEPNonConstantIdx = 0; + Value *GEPNonConstantIdx = nullptr; if (!GEP->hasAllConstantIndices()) { if (!isa<VectorType>(PtrTy->getElementType())) return false; @@ -564,7 +566,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset, if (NonConstantIdx) return false; ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength()); - if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0) + if (!Len || Len->getZExtValue() != AllocaSize || Offset != 0) return false; IsNotTrivial = true; // Can't be mem2reg'd. @@ -608,7 +610,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); - Value* GEPNonConstantIdx = 0; + Value* GEPNonConstantIdx = nullptr; if (!GEP->hasAllConstantIndices()) { assert(!NonConstantIdx && "Dynamic GEP reading from dynamic GEP unsupported"); @@ -671,7 +673,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); Value *New = ConvertScalar_InsertValue( ConstantInt::get(User->getContext(), APVal), - Old, Offset, 0, Builder); + Old, Offset, nullptr, Builder); Builder.CreateStore(New, NewAI); // If the load we just inserted is now dead, then the memset overwrote @@ -809,7 +811,7 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), Offset+Layout.getElementOffsetInBits(i), - 0, Builder); + nullptr, Builder); Res = Builder.CreateInsertValue(Res, Elt, i); } return Res; @@ -822,7 +824,8 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, Value *Res = UndefValue::get(AT); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), - Offset+i*EltSize, 0, Builder); + Offset+i*EltSize, nullptr, + Builder); Res = Builder.CreateInsertValue(Res, Elt, i); } return Res; @@ -938,7 +941,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, Value *Elt = Builder.CreateExtractValue(SV, i); Old = ConvertScalar_InsertValue(Elt, Old, Offset+Layout.getElementOffsetInBits(i), - 0, Builder); + nullptr, Builder); } return Old; } @@ -949,7 +952,8 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType()); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { Value *Elt = Builder.CreateExtractValue(SV, i); - Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, 0, Builder); + Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, nullptr, + Builder); } return Old; } @@ -1024,7 +1028,7 @@ bool SROA::runOnFunction(Function &F) { return false; DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; bool Changed = performPromotion(F); @@ -1054,7 +1058,7 @@ class AllocaPromoter : public LoadAndStorePromoter { public: AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, DIBuilder *DB) - : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {} + : LoadAndStorePromoter(Insts, S), AI(nullptr), DIB(DB) {} void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) { // Remember which alloca we're promoting (for isInstInList). @@ -1100,7 +1104,7 @@ public: for (SmallVectorImpl<DbgValueInst *>::const_iterator I = DVIs.begin(), E = DVIs.end(); I != E; ++I) { DbgValueInst *DVI = *I; - Value *Arg = NULL; + Value *Arg = nullptr; if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. @@ -1143,7 +1147,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) { for (User *U : SI->users()) { LoadInst *LI = dyn_cast<LoadInst>(U); - if (LI == 0 || !LI->isSimple()) return false; + if (!LI || !LI->isSimple()) return false; // Both operands to the select need to be dereferencable, either absolutely // (e.g. allocas) or at this point because we can see other accesses to it. @@ -1183,7 +1187,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) { unsigned MaxAlign = 0; for (User *U : PN->users()) { LoadInst *LI = dyn_cast<LoadInst>(U); - if (LI == 0 || !LI->isSimple()) return false; + if (!LI || !LI->isSimple()) return false; // For now we only allow loads in the same block as the PHI. This is a // common case that happens when instcombine merges two loads through a PHI. @@ -1380,7 +1384,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *Pred = PN->getIncomingBlock(i); LoadInst *&Load = InsertedLoads[Pred]; - if (Load == 0) { + if (!Load) { Load = new LoadInst(PN->getIncomingValue(i), PN->getName() + "." + Pred->getName(), Pred->getTerminator()); @@ -1400,7 +1404,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) { bool SROA::performPromotion(Function &F) { std::vector<AllocaInst*> Allocas; - DominatorTree *DT = 0; + DominatorTree *DT = nullptr; if (HasDomTree) DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); @@ -1537,7 +1541,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI, if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { ElementAllocas.reserve(ST->getNumContainedTypes()); for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { - AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, + AllocaInst *NA = new AllocaInst(ST->getContainedType(i), nullptr, AI->getAlignment(), AI->getName() + "." + Twine(i), AI); ElementAllocas.push_back(NA); @@ -1548,7 +1552,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI, ElementAllocas.reserve(AT->getNumElements()); Type *ElTy = AT->getElementType(); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { - AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), + AllocaInst *NA = new AllocaInst(ElTy, nullptr, AI->getAlignment(), AI->getName() + "." + Twine(i), AI); ElementAllocas.push_back(NA); WorkList.push_back(NA); // Add to worklist for recursive processing @@ -1577,7 +1581,7 @@ void SROA::DeleteDeadInstructions() { // Zero out the operand and see if it becomes trivially dead. // (But, don't add allocas to the dead instruction list -- they are // already on the worklist and will be deleted separately.) - *OI = 0; + *OI = nullptr; if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U)) DeadInsts.push_back(U); } @@ -1604,12 +1608,10 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, isSafeForScalarRepl(GEPI, GEPOffset, Info); } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); - if (Length == 0) - return MarkUnsafe(Info, User); - if (Length->isNegative()) + if (!Length || Length->isNegative()) return MarkUnsafe(Info, User); - isSafeMemAccess(Offset, Length->getZExtValue(), 0, + isSafeMemAccess(Offset, Length->getZExtValue(), nullptr, U.getOperandNo() == 0, Info, MI, true /*AllowWholeAccess*/); } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { @@ -1744,12 +1746,12 @@ static bool isHomogeneousAggregate(Type *T, unsigned &NumElts, Type *&EltTy) { if (ArrayType *AT = dyn_cast<ArrayType>(T)) { NumElts = AT->getNumElements(); - EltTy = (NumElts == 0 ? 0 : AT->getElementType()); + EltTy = (NumElts == 0 ? nullptr : AT->getElementType()); return true; } if (StructType *ST = dyn_cast<StructType>(T)) { NumElts = ST->getNumContainedTypes(); - EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0)); + EltTy = (NumElts == 0 ? nullptr : ST->getContainedType(0)); for (unsigned n = 1; n < NumElts; ++n) { if (ST->getContainedType(n) != EltTy) return false; @@ -2038,7 +2040,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, // In this case, it must be the last GEP operand which is dynamic so keep that // aside until we've found the constant GEP offset then add it back in at the // end. - Value* NonConstantIdx = 0; + Value* NonConstantIdx = nullptr; if (!GEPI->hasAllConstantIndices()) NonConstantIdx = Indices.pop_back_val(); Offset += DL->getIndexedOffset(GEPI->getPointerOperandType(), Indices); @@ -2108,7 +2110,8 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, if (NewOffset) { // Splice the first element and index 'NewOffset' bytes in. SROA will // split the alloca again later. - Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy()); + unsigned AS = AI->getType()->getAddressSpace(); + Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy(AS)); V = Builder.CreateGEP(V, Builder.getInt64(NewOffset)); IdxTy = NewElts[Idx]->getAllocatedType(); @@ -2155,7 +2158,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // appropriate type. The "Other" pointer is the pointer that goes to memory // that doesn't have anything to do with the alloca that we are promoting. For // memset, this Value* stays null. - Value *OtherPtr = 0; + Value *OtherPtr = nullptr; unsigned MemAlignment = MI->getAlignment(); if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy if (Inst == MTI->getRawDest()) @@ -2207,7 +2210,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // If this is a memcpy/memmove, emit a GEP of the other element address. - Value *OtherElt = 0; + Value *OtherElt = nullptr; unsigned OtherEltAlign = MemAlignment; if (OtherPtr) { @@ -2449,7 +2452,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, // There are two forms here: AI could be an array or struct. Both cases // have different ways to compute the element offset. - const StructLayout *Layout = 0; + const StructLayout *Layout = nullptr; uint64_t ArrayEltBitOffset = 0; if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { Layout = DL->getStructLayout(EltSTy); diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp index 006375c..7a73f11 100644 --- a/lib/Transforms/Scalar/Scalarizer.cpp +++ b/lib/Transforms/Scalar/Scalarizer.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "scalarizer" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -25,6 +24,8 @@ using namespace llvm; +#define DEBUG_TYPE "scalarizer" + namespace { // Used to store the scattered form of a vector. typedef SmallVector<Value *, 8> ValueVector; @@ -48,7 +49,7 @@ public: // insert them before BBI in BB. If Cache is nonnull, use it to cache // the results. Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, - ValueVector *cachePtr = 0); + ValueVector *cachePtr = nullptr); // Return component I, creating a new Value for it if necessary. Value *operator[](unsigned I); @@ -101,7 +102,7 @@ struct BinarySplitter { // Information about a load or store that we're scalarizing. struct VectorLayout { - VectorLayout() : VecTy(0), ElemTy(0), VecAlign(0), ElemSize(0) {} + VectorLayout() : VecTy(nullptr), ElemTy(nullptr), VecAlign(0), ElemSize(0) {} // Return the alignment of element I. uint64_t getElemAlign(unsigned I) { @@ -186,9 +187,9 @@ Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, Ty = PtrTy->getElementType(); Size = Ty->getVectorNumElements(); if (!CachePtr) - Tmp.resize(Size, 0); + Tmp.resize(Size, nullptr); else if (CachePtr->empty()) - CachePtr->resize(Size, 0); + CachePtr->resize(Size, nullptr); else assert(Size == CachePtr->size() && "Inconsistent vector sizes"); } @@ -241,7 +242,7 @@ bool Scalarizer::doInitialization(Module &M) { bool Scalarizer::runOnFunction(Function &F) { DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { BasicBlock *BB = BBI; for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp new file mode 100644 index 0000000..b8529e1 --- /dev/null +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -0,0 +1,623 @@ +//===-- SeparateConstOffsetFromGEP.cpp - ------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loop unrolling may create many similar GEPs for array accesses. +// e.g., a 2-level loop +// +// float a[32][32]; // global variable +// +// for (int i = 0; i < 2; ++i) { +// for (int j = 0; j < 2; ++j) { +// ... +// ... = a[x + i][y + j]; +// ... +// } +// } +// +// will probably be unrolled to: +// +// gep %a, 0, %x, %y; load +// gep %a, 0, %x, %y + 1; load +// gep %a, 0, %x + 1, %y; load +// gep %a, 0, %x + 1, %y + 1; load +// +// LLVM's GVN does not use partial redundancy elimination yet, and is thus +// unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs +// significant slowdown in targets with limited addressing modes. For instance, +// because the PTX target does not support the reg+reg addressing mode, the +// NVPTX backend emits PTX code that literally computes the pointer address of +// each GEP, wasting tons of registers. It emits the following PTX for the +// first load and similar PTX for other loads. +// +// mov.u32 %r1, %x; +// mov.u32 %r2, %y; +// mul.wide.u32 %rl2, %r1, 128; +// mov.u64 %rl3, a; +// add.s64 %rl4, %rl3, %rl2; +// mul.wide.u32 %rl5, %r2, 4; +// add.s64 %rl6, %rl4, %rl5; +// ld.global.f32 %f1, [%rl6]; +// +// To reduce the register pressure, the optimization implemented in this file +// merges the common part of a group of GEPs, so we can compute each pointer +// address by adding a simple offset to the common part, saving many registers. +// +// It works by splitting each GEP into a variadic base and a constant offset. +// The variadic base can be computed once and reused by multiple GEPs, and the +// constant offsets can be nicely folded into the reg+immediate addressing mode +// (supported by most targets) without using any extra register. +// +// For instance, we transform the four GEPs and four loads in the above example +// into: +// +// base = gep a, 0, x, y +// load base +// laod base + 1 * sizeof(float) +// load base + 32 * sizeof(float) +// load base + 33 * sizeof(float) +// +// Given the transformed IR, a backend that supports the reg+immediate +// addressing mode can easily fold the pointer arithmetics into the loads. For +// example, the NVPTX backend can easily fold the pointer arithmetics into the +// ld.global.f32 instructions, and the resultant PTX uses much fewer registers. +// +// mov.u32 %r1, %tid.x; +// mov.u32 %r2, %tid.y; +// mul.wide.u32 %rl2, %r1, 128; +// mov.u64 %rl3, a; +// add.s64 %rl4, %rl3, %rl2; +// mul.wide.u32 %rl5, %r2, 4; +// add.s64 %rl6, %rl4, %rl5; +// ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX +// ld.global.f32 %f2, [%rl6+4]; // much better +// ld.global.f32 %f3, [%rl6+128]; // much better +// ld.global.f32 %f4, [%rl6+132]; // much better +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; + +static cl::opt<bool> DisableSeparateConstOffsetFromGEP( + "disable-separate-const-offset-from-gep", cl::init(false), + cl::desc("Do not separate the constant offset from a GEP instruction"), + cl::Hidden); + +namespace { + +/// \brief A helper class for separating a constant offset from a GEP index. +/// +/// In real programs, a GEP index may be more complicated than a simple addition +/// of something and a constant integer which can be trivially splitted. For +/// example, to split ((a << 3) | 5) + b, we need to search deeper for the +/// constant offset, so that we can separate the index to (a << 3) + b and 5. +/// +/// Therefore, this class looks into the expression that computes a given GEP +/// index, and tries to find a constant integer that can be hoisted to the +/// outermost level of the expression as an addition. Not every constant in an +/// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a + +/// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case, +/// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15). +class ConstantOffsetExtractor { + public: + /// Extracts a constant offset from the given GEP index. It outputs the + /// numeric value of the extracted constant offset (0 if failed), and a + /// new index representing the remainder (equal to the original index minus + /// the constant offset). + /// \p Idx The given GEP index + /// \p NewIdx The new index to replace + /// \p DL The datalayout of the module + /// \p IP Calculating the new index requires new instructions. IP indicates + /// where to insert them (typically right before the GEP). + static int64_t Extract(Value *Idx, Value *&NewIdx, const DataLayout *DL, + Instruction *IP); + /// Looks for a constant offset without extracting it. The meaning of the + /// arguments and the return value are the same as Extract. + static int64_t Find(Value *Idx, const DataLayout *DL); + + private: + ConstantOffsetExtractor(const DataLayout *Layout, Instruction *InsertionPt) + : DL(Layout), IP(InsertionPt) {} + /// Searches the expression that computes V for a constant offset. If the + /// searching is successful, update UserChain as a path from V to the constant + /// offset. + int64_t find(Value *V); + /// A helper function to look into both operands of a binary operator U. + /// \p IsSub Whether U is a sub operator. If so, we need to negate the + /// constant offset at some point. + int64_t findInEitherOperand(User *U, bool IsSub); + /// After finding the constant offset and how it is reached from the GEP + /// index, we build a new index which is a clone of the old one except the + /// constant offset is removed. For example, given (a + (b + 5)) and knowning + /// the constant offset is 5, this function returns (a + b). + /// + /// We cannot simply change the constant to zero because the expression that + /// computes the index or its intermediate result may be used by others. + Value *rebuildWithoutConstantOffset(); + // A helper function for rebuildWithoutConstantOffset that rebuilds the direct + // user (U) of the constant offset (C). + Value *rebuildLeafWithoutConstantOffset(User *U, Value *C); + /// Returns a clone of U except the first occurrence of From with To. + Value *cloneAndReplace(User *U, Value *From, Value *To); + + /// Returns true if LHS and RHS have no bits in common, i.e., LHS | RHS == 0. + bool NoCommonBits(Value *LHS, Value *RHS) const; + /// Computes which bits are known to be one or zero. + /// \p KnownOne Mask of all bits that are known to be one. + /// \p KnownZero Mask of all bits that are known to be zero. + void ComputeKnownBits(Value *V, APInt &KnownOne, APInt &KnownZero) const; + /// Finds the first use of Used in U. Returns -1 if not found. + static unsigned FindFirstUse(User *U, Value *Used); + /// Returns whether OPC (sext or zext) can be distributed to the operands of + /// BO. e.g., sext can be distributed to the operands of an "add nsw" because + /// sext (add nsw a, b) == add nsw (sext a), (sext b). + static bool Distributable(unsigned OPC, BinaryOperator *BO); + + /// The path from the constant offset to the old GEP index. e.g., if the GEP + /// index is "a * b + (c + 5)". After running function find, UserChain[0] will + /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and + /// UserChain[2] will be the entire expression "a * b + (c + 5)". + /// + /// This path helps rebuildWithoutConstantOffset rebuild the new GEP index. + SmallVector<User *, 8> UserChain; + /// The data layout of the module. Used in ComputeKnownBits. + const DataLayout *DL; + Instruction *IP; /// Insertion position of cloned instructions. +}; + +/// \brief A pass that tries to split every GEP in the function into a variadic +/// base and a constant offset. It is a FunctionPass because searching for the +/// constant offset may inspect other basic blocks. +class SeparateConstOffsetFromGEP : public FunctionPass { + public: + static char ID; + SeparateConstOffsetFromGEP() : FunctionPass(ID) { + initializeSeparateConstOffsetFromGEPPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<DataLayoutPass>(); + AU.addRequired<TargetTransformInfo>(); + } + bool runOnFunction(Function &F) override; + + private: + /// Tries to split the given GEP into a variadic base and a constant offset, + /// and returns true if the splitting succeeds. + bool splitGEP(GetElementPtrInst *GEP); + /// Finds the constant offset within each index, and accumulates them. This + /// function only inspects the GEP without changing it. The output + /// NeedsExtraction indicates whether we can extract a non-zero constant + /// offset from any index. + int64_t accumulateByteOffset(GetElementPtrInst *GEP, const DataLayout *DL, + bool &NeedsExtraction); +}; +} // anonymous namespace + +char SeparateConstOffsetFromGEP::ID = 0; +INITIALIZE_PASS_BEGIN( + SeparateConstOffsetFromGEP, "separate-const-offset-from-gep", + "Split GEPs to a variadic base and a constant offset for better CSE", false, + false) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) +INITIALIZE_PASS_DEPENDENCY(DataLayoutPass) +INITIALIZE_PASS_END( + SeparateConstOffsetFromGEP, "separate-const-offset-from-gep", + "Split GEPs to a variadic base and a constant offset for better CSE", false, + false) + +FunctionPass *llvm::createSeparateConstOffsetFromGEPPass() { + return new SeparateConstOffsetFromGEP(); +} + +bool ConstantOffsetExtractor::Distributable(unsigned OPC, BinaryOperator *BO) { + assert(OPC == Instruction::SExt || OPC == Instruction::ZExt); + + // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B) + // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B) + if (BO->getOpcode() == Instruction::Add || + BO->getOpcode() == Instruction::Sub) { + return (OPC == Instruction::SExt && BO->hasNoSignedWrap()) || + (OPC == Instruction::ZExt && BO->hasNoUnsignedWrap()); + } + + // sext/zext (and/or/xor A, B) == and/or/xor (sext/zext A), (sext/zext B) + // -instcombine also leverages this invariant to do the reverse + // transformation to reduce integer casts. + return BO->getOpcode() == Instruction::And || + BO->getOpcode() == Instruction::Or || + BO->getOpcode() == Instruction::Xor; +} + +int64_t ConstantOffsetExtractor::findInEitherOperand(User *U, bool IsSub) { + assert(U->getNumOperands() == 2); + int64_t ConstantOffset = find(U->getOperand(0)); + // If we found a constant offset in the left operand, stop and return that. + // This shortcut might cause us to miss opportunities of combining the + // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9. + // However, such cases are probably already handled by -instcombine, + // given this pass runs after the standard optimizations. + if (ConstantOffset != 0) return ConstantOffset; + ConstantOffset = find(U->getOperand(1)); + // If U is a sub operator, negate the constant offset found in the right + // operand. + return IsSub ? -ConstantOffset : ConstantOffset; +} + +int64_t ConstantOffsetExtractor::find(Value *V) { + // TODO(jingyue): We can even trace into integer/pointer casts, such as + // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only + // integers because it gives good enough results for our benchmarks. + assert(V->getType()->isIntegerTy()); + + User *U = dyn_cast<User>(V); + // We cannot do much with Values that are not a User, such as BasicBlock and + // MDNode. + if (U == nullptr) return 0; + + int64_t ConstantOffset = 0; + if (ConstantInt *CI = dyn_cast<ConstantInt>(U)) { + // Hooray, we found it! + ConstantOffset = CI->getSExtValue(); + } else if (Operator *O = dyn_cast<Operator>(U)) { + // The GEP index may be more complicated than a simple addition of a + // varaible and a constant. Therefore, we trace into subexpressions for more + // hoisting opportunities. + switch (O->getOpcode()) { + case Instruction::Add: { + ConstantOffset = findInEitherOperand(U, false); + break; + } + case Instruction::Sub: { + ConstantOffset = findInEitherOperand(U, true); + break; + } + case Instruction::Or: { + // If LHS and RHS don't have common bits, (LHS | RHS) is equivalent to + // (LHS + RHS). + if (NoCommonBits(U->getOperand(0), U->getOperand(1))) + ConstantOffset = findInEitherOperand(U, false); + break; + } + case Instruction::SExt: + case Instruction::ZExt: { + // We trace into sext/zext if the operator can be distributed to its + // operand. e.g., we can transform into "sext (add nsw a, 5)" and + // extract constant 5, because + // sext (add nsw a, 5) == add nsw (sext a), 5 + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0))) { + if (Distributable(O->getOpcode(), BO)) + ConstantOffset = find(U->getOperand(0)); + } + break; + } + } + } + // If we found a non-zero constant offset, adds it to the path for future + // transformation (rebuildWithoutConstantOffset). Zero is a valid constant + // offset, but doesn't help this optimization. + if (ConstantOffset != 0) + UserChain.push_back(U); + return ConstantOffset; +} + +unsigned ConstantOffsetExtractor::FindFirstUse(User *U, Value *Used) { + for (unsigned I = 0, E = U->getNumOperands(); I < E; ++I) { + if (U->getOperand(I) == Used) + return I; + } + return -1; +} + +Value *ConstantOffsetExtractor::cloneAndReplace(User *U, Value *From, + Value *To) { + // Finds in U the first use of From. It is safe to ignore future occurrences + // of From, because findInEitherOperand similarly stops searching the right + // operand when the first operand has a non-zero constant offset. + unsigned OpNo = FindFirstUse(U, From); + assert(OpNo != (unsigned)-1 && "UserChain wasn't built correctly"); + + // ConstantOffsetExtractor::find only follows Operators (i.e., Instructions + // and ConstantExprs). Therefore, U is either an Instruction or a + // ConstantExpr. + if (Instruction *I = dyn_cast<Instruction>(U)) { + Instruction *Clone = I->clone(); + Clone->setOperand(OpNo, To); + Clone->insertBefore(IP); + return Clone; + } + // cast<Constant>(To) is safe because a ConstantExpr only uses Constants. + return cast<ConstantExpr>(U) + ->getWithOperandReplaced(OpNo, cast<Constant>(To)); +} + +Value *ConstantOffsetExtractor::rebuildLeafWithoutConstantOffset(User *U, + Value *C) { + assert(U->getNumOperands() <= 2 && + "We didn't trace into any operator with more than 2 operands"); + // If U has only one operand which is the constant offset, removing the + // constant offset leaves U as a null value. + if (U->getNumOperands() == 1) + return Constant::getNullValue(U->getType()); + + // U->getNumOperands() == 2 + unsigned OpNo = FindFirstUse(U, C); // U->getOperand(OpNo) == C + assert(OpNo < 2 && "UserChain wasn't built correctly"); + Value *TheOther = U->getOperand(1 - OpNo); // The other operand of U + // If U = C - X, removing C makes U = -X; otherwise U will simply be X. + if (!isa<SubOperator>(U) || OpNo == 1) + return TheOther; + if (isa<ConstantExpr>(U)) + return ConstantExpr::getNeg(cast<Constant>(TheOther)); + return BinaryOperator::CreateNeg(TheOther, "", IP); +} + +Value *ConstantOffsetExtractor::rebuildWithoutConstantOffset() { + assert(UserChain.size() > 0 && "you at least found a constant, right?"); + // Start with the constant and go up through UserChain, each time building a + // clone of the subexpression but with the constant removed. + // e.g., to build a clone of (a + (b + (c + 5)) but with the 5 removed, we + // first c, then (b + c), and finally (a + (b + c)). + // + // Fast path: if the GEP index is a constant, simply returns 0. + if (UserChain.size() == 1) + return ConstantInt::get(UserChain[0]->getType(), 0); + + Value *Remainder = + rebuildLeafWithoutConstantOffset(UserChain[1], UserChain[0]); + for (size_t I = 2; I < UserChain.size(); ++I) + Remainder = cloneAndReplace(UserChain[I], UserChain[I - 1], Remainder); + return Remainder; +} + +int64_t ConstantOffsetExtractor::Extract(Value *Idx, Value *&NewIdx, + const DataLayout *DL, + Instruction *IP) { + ConstantOffsetExtractor Extractor(DL, IP); + // Find a non-zero constant offset first. + int64_t ConstantOffset = Extractor.find(Idx); + if (ConstantOffset == 0) + return 0; + // Then rebuild a new index with the constant removed. + NewIdx = Extractor.rebuildWithoutConstantOffset(); + return ConstantOffset; +} + +int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL) { + return ConstantOffsetExtractor(DL, nullptr).find(Idx); +} + +void ConstantOffsetExtractor::ComputeKnownBits(Value *V, APInt &KnownOne, + APInt &KnownZero) const { + IntegerType *IT = cast<IntegerType>(V->getType()); + KnownOne = APInt(IT->getBitWidth(), 0); + KnownZero = APInt(IT->getBitWidth(), 0); + llvm::computeKnownBits(V, KnownZero, KnownOne, DL, 0); +} + +bool ConstantOffsetExtractor::NoCommonBits(Value *LHS, Value *RHS) const { + assert(LHS->getType() == RHS->getType() && + "LHS and RHS should have the same type"); + APInt LHSKnownOne, LHSKnownZero, RHSKnownOne, RHSKnownZero; + ComputeKnownBits(LHS, LHSKnownOne, LHSKnownZero); + ComputeKnownBits(RHS, RHSKnownOne, RHSKnownZero); + return (LHSKnownZero | RHSKnownZero).isAllOnesValue(); +} + +int64_t SeparateConstOffsetFromGEP::accumulateByteOffset( + GetElementPtrInst *GEP, const DataLayout *DL, bool &NeedsExtraction) { + NeedsExtraction = false; + int64_t AccumulativeByteOffset = 0; + gep_type_iterator GTI = gep_type_begin(*GEP); + for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { + if (isa<SequentialType>(*GTI)) { + // Tries to extract a constant offset from this GEP index. + int64_t ConstantOffset = + ConstantOffsetExtractor::Find(GEP->getOperand(I), DL); + if (ConstantOffset != 0) { + NeedsExtraction = true; + // A GEP may have multiple indices. We accumulate the extracted + // constant offset to a byte offset, and later offset the remainder of + // the original GEP with this byte offset. + AccumulativeByteOffset += + ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType()); + } + } + } + return AccumulativeByteOffset; +} + +bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { + // Skip vector GEPs. + if (GEP->getType()->isVectorTy()) + return false; + + // The backend can already nicely handle the case where all indices are + // constant. + if (GEP->hasAllConstantIndices()) + return false; + + bool Changed = false; + + // Shortcuts integer casts. Eliminating these explicit casts can make + // subsequent optimizations more obvious: ConstantOffsetExtractor needn't + // trace into these casts. + if (GEP->isInBounds()) { + // Doing this to inbounds GEPs is safe because their indices are guaranteed + // to be non-negative and in bounds. + gep_type_iterator GTI = gep_type_begin(*GEP); + for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { + if (isa<SequentialType>(*GTI)) { + if (Operator *O = dyn_cast<Operator>(GEP->getOperand(I))) { + if (O->getOpcode() == Instruction::SExt || + O->getOpcode() == Instruction::ZExt) { + GEP->setOperand(I, O->getOperand(0)); + Changed = true; + } + } + } + } + } + + const DataLayout *DL = &getAnalysis<DataLayoutPass>().getDataLayout(); + bool NeedsExtraction; + int64_t AccumulativeByteOffset = + accumulateByteOffset(GEP, DL, NeedsExtraction); + + if (!NeedsExtraction) + return Changed; + // Before really splitting the GEP, check whether the backend supports the + // addressing mode we are about to produce. If no, this splitting probably + // won't be beneficial. + TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>(); + if (!TTI.isLegalAddressingMode(GEP->getType()->getElementType(), + /*BaseGV=*/nullptr, AccumulativeByteOffset, + /*HasBaseReg=*/true, /*Scale=*/0)) { + return Changed; + } + + // Remove the constant offset in each GEP index. The resultant GEP computes + // the variadic base. + gep_type_iterator GTI = gep_type_begin(*GEP); + for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { + if (isa<SequentialType>(*GTI)) { + Value *NewIdx = nullptr; + // Tries to extract a constant offset from this GEP index. + int64_t ConstantOffset = + ConstantOffsetExtractor::Extract(GEP->getOperand(I), NewIdx, DL, GEP); + if (ConstantOffset != 0) { + assert(NewIdx != nullptr && + "ConstantOffset != 0 implies NewIdx is set"); + GEP->setOperand(I, NewIdx); + // Clear the inbounds attribute because the new index may be off-bound. + // e.g., + // + // b = add i64 a, 5 + // addr = gep inbounds float* p, i64 b + // + // is transformed to: + // + // addr2 = gep float* p, i64 a + // addr = gep float* addr2, i64 5 + // + // If a is -4, although the old index b is in bounds, the new index a is + // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the + // inbounds keyword is not present, the offsets are added to the base + // address with silently-wrapping two's complement arithmetic". + // Therefore, the final code will be a semantically equivalent. + // + // TODO(jingyue): do some range analysis to keep as many inbounds as + // possible. GEPs with inbounds are more friendly to alias analysis. + GEP->setIsInBounds(false); + Changed = true; + } + } + } + + // Offsets the base with the accumulative byte offset. + // + // %gep ; the base + // ... %gep ... + // + // => add the offset + // + // %gep2 ; clone of %gep + // %new.gep = gep %gep2, <offset / sizeof(*%gep)> + // %gep ; will be removed + // ... %gep ... + // + // => replace all uses of %gep with %new.gep and remove %gep + // + // %gep2 ; clone of %gep + // %new.gep = gep %gep2, <offset / sizeof(*%gep)> + // ... %new.gep ... + // + // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an + // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep): + // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the + // type of %gep. + // + // %gep2 ; clone of %gep + // %0 = bitcast %gep2 to i8* + // %uglygep = gep %0, <offset> + // %new.gep = bitcast %uglygep to <type of %gep> + // ... %new.gep ... + Instruction *NewGEP = GEP->clone(); + NewGEP->insertBefore(GEP); + + Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + uint64_t ElementTypeSizeOfGEP = + DL->getTypeAllocSize(GEP->getType()->getElementType()); + if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) { + // Very likely. As long as %gep is natually aligned, the byte offset we + // extracted should be a multiple of sizeof(*%gep). + // Per ANSI C standard, signed / unsigned = unsigned. Therefore, we + // cast ElementTypeSizeOfGEP to signed. + int64_t Index = + AccumulativeByteOffset / static_cast<int64_t>(ElementTypeSizeOfGEP); + NewGEP = GetElementPtrInst::Create( + NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP); + } else { + // Unlikely but possible. For example, + // #pragma pack(1) + // struct S { + // int a[3]; + // int64 b[8]; + // }; + // #pragma pack() + // + // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After + // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is + // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of + // sizeof(int64). + // + // Emit an uglygep in this case. + Type *I8PtrTy = Type::getInt8PtrTy(GEP->getContext(), + GEP->getPointerAddressSpace()); + NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP); + NewGEP = GetElementPtrInst::Create( + NewGEP, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), + "uglygep", GEP); + if (GEP->getType() != I8PtrTy) + NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP); + } + + GEP->replaceAllUsesWith(NewGEP); + GEP->eraseFromParent(); + + return true; +} + +bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) { + if (DisableSeparateConstOffsetFromGEP) + return false; + + bool Changed = false; + for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) { + for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ) { + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++)) { + Changed |= splitGEP(GEP); + } + // No need to split GEP ConstantExprs because all its indices are constant + // already. + } + } + return Changed; +} diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index ceae5a7..5d5606b 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -21,7 +21,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "simplifycfg" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -38,6 +37,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "simplifycfg" + STATISTIC(NumSimpl, "Number of blocks simplified"); namespace { @@ -71,7 +72,7 @@ FunctionPass *llvm::createCFGSimplificationPass() { static bool mergeEmptyReturnBlocks(Function &F) { bool Changed = false; - BasicBlock *RetBlock = 0; + BasicBlock *RetBlock = nullptr; // Scan all the blocks in the function, looking for empty return blocks. for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) { @@ -79,7 +80,7 @@ static bool mergeEmptyReturnBlocks(Function &F) { // Only look at return blocks. ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator()); - if (Ret == 0) continue; + if (!Ret) continue; // Only look at the block if it is empty or the only other thing in it is a // single PHI node that is the operand to the return. @@ -98,7 +99,7 @@ static bool mergeEmptyReturnBlocks(Function &F) { } // If this is the first returning block, remember it and keep going. - if (RetBlock == 0) { + if (!RetBlock) { RetBlock = &BB; continue; } @@ -119,7 +120,7 @@ static bool mergeEmptyReturnBlocks(Function &F) { // If the canonical return block has no PHI node, create one now. PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin()); - if (RetBlockPHI == 0) { + if (!RetBlockPHI) { Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0); pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock); RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), @@ -173,7 +174,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) { const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0; + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; bool EverChanged = removeUnreachableBlocks(F); EverChanged |= mergeEmptyReturnBlocks(F); EverChanged |= iterativelySimplifyCFG(F, TTI, DL); diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index 4107374..482c33a 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sink" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -25,6 +24,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "sink" + STATISTIC(NumSunk, "Number of instructions sunk"); STATISTIC(NumSinkIter, "Number of sinking iterations"); @@ -203,7 +204,7 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst, // Don't sink instructions into a loop. Loop *succ = LI->getLoopFor(SuccToSinkTo); Loop *cur = LI->getLoopFor(Inst->getParent()); - if (succ != 0 && succ != cur) + if (succ != nullptr && succ != cur) return false; } @@ -237,14 +238,14 @@ bool Sinking::SinkInstruction(Instruction *Inst, // SuccToSinkTo - This is the successor to sink this instruction to, once we // decide. - BasicBlock *SuccToSinkTo = 0; + BasicBlock *SuccToSinkTo = nullptr; // Instructions can only be sunk if all their uses are in blocks // dominated by one of the successors. // Look at all the postdominators and see if we can sink it in one. DomTreeNode *DTN = DT->getNode(Inst->getParent()); for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end(); - I != E && SuccToSinkTo == 0; ++I) { + I != E && SuccToSinkTo == nullptr; ++I) { BasicBlock *Candidate = (*I)->getBlock(); if ((*I)->getIDom()->getBlock() == Inst->getParent() && IsAcceptableTarget(Inst, Candidate)) @@ -254,13 +255,13 @@ bool Sinking::SinkInstruction(Instruction *Inst, // If no suitable postdominator was found, look at all the successors and // decide which one we should sink to, if any. for (succ_iterator I = succ_begin(Inst->getParent()), - E = succ_end(Inst->getParent()); I != E && SuccToSinkTo == 0; ++I) { + E = succ_end(Inst->getParent()); I != E && !SuccToSinkTo; ++I) { if (IsAcceptableTarget(Inst, *I)) SuccToSinkTo = *I; } // If we couldn't find a block to sink to, ignore this instruction. - if (SuccToSinkTo == 0) + if (!SuccToSinkTo) return false; DEBUG(dbgs() << "Sink" << *Inst << " ("; diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index 8fd2268..7b77ae1 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "structurizecfg" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SCCIterator.h" @@ -21,6 +20,8 @@ using namespace llvm; using namespace llvm::PatternMatch; +#define DEBUG_TYPE "structurizecfg" + namespace { // Definition of the complex types used in this pass. @@ -64,14 +65,14 @@ public: /// \brief Start a new query NearestCommonDominator(DominatorTree *DomTree) { DT = DomTree; - Result = 0; + Result = nullptr; } /// \brief Add BB to the resulting dominator void addBlock(BasicBlock *BB, bool Remember = true) { DomTreeNode *Node = DT->getNode(BB); - if (Result == 0) { + if (!Result) { unsigned Numbering = 0; for (;Node;Node = Node->getIDom()) IndexMap[Node] = ++Numbering; @@ -279,7 +280,7 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) { void StructurizeCFG::orderNodes() { scc_iterator<Region *> I = scc_begin(ParentRegion); for (Order.clear(); !I.isAtEnd(); ++I) { - std::vector<RegionNode *> &Nodes = *I; + const std::vector<RegionNode *> &Nodes = *I; Order.append(Nodes.begin(), Nodes.end()); } } @@ -453,10 +454,7 @@ void StructurizeCFG::insertConditions(bool Loops) { Value *Default = Loops ? BoolTrue : BoolFalse; SSAUpdater PhiInserter; - for (BranchVector::iterator I = Conds.begin(), - E = Conds.end(); I != E; ++I) { - - BranchInst *Term = *I; + for (BranchInst *Term : Conds) { assert(Term->isConditional()); BasicBlock *Parent = Term->getParent(); @@ -472,7 +470,7 @@ void StructurizeCFG::insertConditions(bool Loops) { NearestCommonDominator Dominator(DT); Dominator.addBlock(Parent, false); - Value *ParentValue = 0; + Value *ParentValue = nullptr; for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { @@ -591,7 +589,7 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, if (Node->isSubRegion()) { Region *SubRegion = Node->getNodeAs<Region>(); BasicBlock *OldExit = SubRegion->getExit(); - BasicBlock *Dominator = 0; + BasicBlock *Dominator = nullptr; // Find all the edges from the sub region to the exit for (pred_iterator I = pred_begin(OldExit), E = pred_end(OldExit); @@ -678,7 +676,8 @@ BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow, /// \brief Set the previous node void StructurizeCFG::setPrevNode(BasicBlock *BB) { - PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) : 0; + PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) + : nullptr; } /// \brief Does BB dominate all the predicates of Node ? @@ -699,7 +698,7 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { bool Dominated = false; // Regionentry is always true - if (PrevNode == 0) + if (!PrevNode) return true; for (BBPredicates::iterator I = Preds.begin(), E = Preds.end(); @@ -806,11 +805,11 @@ void StructurizeCFG::createFlow() { Conditions.clear(); LoopConds.clear(); - PrevNode = 0; + PrevNode = nullptr; Visited.clear(); while (!Order.empty()) { - handleLoops(EntryDominatesExit, 0); + handleLoops(EntryDominatesExit, nullptr); } if (PrevNode) diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 6d02777..05b9892 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -50,12 +50,12 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "tailcallelim" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" @@ -64,6 +64,7 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -76,6 +77,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "tailcallelim" + STATISTIC(NumEliminated, "Number of tail calls removed"); STATISTIC(NumRetDuped, "Number of return duplicated"); STATISTIC(NumAccumAdded, "Number of accumulators introduced"); @@ -94,6 +97,9 @@ namespace { bool runOnFunction(Function &F) override; private: + bool runTRE(Function &F); + bool markTails(Function &F, bool &AllCallsAreTailCalls); + CallInst *FindTRECandidate(Instruction *I, bool CannotTailCallElimCallsMarkedTail); bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, @@ -131,55 +137,255 @@ void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetTransformInfo>(); } -/// CanTRE - Scan the specified basic block for alloca instructions. -/// If it contains any that are variable-sized or not in the entry block, -/// returns false. -static bool CanTRE(AllocaInst *AI) { - // Because of PR962, we don't TRE allocas outside the entry block. - - // If this alloca is in the body of the function, or if it is a variable - // sized allocation, we cannot tail call eliminate calls marked 'tail' - // with this mechanism. - BasicBlock *BB = AI->getParent(); - return BB == &BB->getParent()->getEntryBlock() && - isa<ConstantInt>(AI->getArraySize()); +/// \brief Scan the specified function for alloca instructions. +/// If it contains any dynamic allocas, returns false. +static bool CanTRE(Function &F) { + // Because of PR962, we don't TRE dynamic allocas. + for (auto &BB : F) { + for (auto &I : BB) { + if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) { + if (!AI->isStaticAlloca()) + return false; + } + } + } + + return true; } -namespace { -struct AllocaCaptureTracker : public CaptureTracker { - AllocaCaptureTracker() : Captured(false) {} +bool TailCallElim::runOnFunction(Function &F) { + if (skipOptnoneFunction(F)) + return false; - void tooManyUses() override { Captured = true; } + bool AllCallsAreTailCalls = false; + bool Modified = markTails(F, AllCallsAreTailCalls); + if (AllCallsAreTailCalls) + Modified |= runTRE(F); + return Modified; +} - bool shouldExplore(const Use *U) override { - Value *V = U->getUser(); - if (isa<CallInst>(V) || isa<InvokeInst>(V)) - UsesAlloca.insert(V); - return true; +namespace { +struct AllocaDerivedValueTracker { + // Start at a root value and walk its use-def chain to mark calls that use the + // value or a derived value in AllocaUsers, and places where it may escape in + // EscapePoints. + void walk(Value *Root) { + SmallVector<Use *, 32> Worklist; + SmallPtrSet<Use *, 32> Visited; + + auto AddUsesToWorklist = [&](Value *V) { + for (auto &U : V->uses()) { + if (!Visited.insert(&U)) + continue; + Worklist.push_back(&U); + } + }; + + AddUsesToWorklist(Root); + + while (!Worklist.empty()) { + Use *U = Worklist.pop_back_val(); + Instruction *I = cast<Instruction>(U->getUser()); + + switch (I->getOpcode()) { + case Instruction::Call: + case Instruction::Invoke: { + CallSite CS(I); + bool IsNocapture = !CS.isCallee(U) && + CS.doesNotCapture(CS.getArgumentNo(U)); + callUsesLocalStack(CS, IsNocapture); + if (IsNocapture) { + // If the alloca-derived argument is passed in as nocapture, then it + // can't propagate to the call's return. That would be capturing. + continue; + } + break; + } + case Instruction::Load: { + // The result of a load is not alloca-derived (unless an alloca has + // otherwise escaped, but this is a local analysis). + continue; + } + case Instruction::Store: { + if (U->getOperandNo() == 0) + EscapePoints.insert(I); + continue; // Stores have no users to analyze. + } + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::PHI: + case Instruction::Select: + case Instruction::AddrSpaceCast: + break; + default: + EscapePoints.insert(I); + break; + } + + AddUsesToWorklist(I); + } } - bool captured(const Use *U) override { - if (isa<ReturnInst>(U->getUser())) - return false; - Captured = true; - return true; + void callUsesLocalStack(CallSite CS, bool IsNocapture) { + // Add it to the list of alloca users. If it's already there, skip further + // processing. + if (!AllocaUsers.insert(CS.getInstruction())) + return; + + // If it's nocapture then it can't capture the alloca. + if (IsNocapture) + return; + + // If it can write to memory, it can leak the alloca value. + if (!CS.onlyReadsMemory()) + EscapePoints.insert(CS.getInstruction()); } - bool Captured; - SmallPtrSet<const Value *, 16> UsesAlloca; + SmallPtrSet<Instruction *, 32> AllocaUsers; + SmallPtrSet<Instruction *, 32> EscapePoints; }; -} // end anonymous namespace +} -bool TailCallElim::runOnFunction(Function &F) { - if (skipOptnoneFunction(F)) +bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) { + if (F.callsFunctionThatReturnsTwice()) return false; + AllCallsAreTailCalls = true; + + // The local stack holds all alloca instructions and all byval arguments. + AllocaDerivedValueTracker Tracker; + for (Argument &Arg : F.args()) { + if (Arg.hasByValAttr()) + Tracker.walk(&Arg); + } + for (auto &BB : F) { + for (auto &I : BB) + if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) + Tracker.walk(AI); + } + bool Modified = false; + + // Track whether a block is reachable after an alloca has escaped. Blocks that + // contain the escaping instruction will be marked as being visited without an + // escaped alloca, since that is how the block began. + enum VisitType { + UNVISITED, + UNESCAPED, + ESCAPED + }; + DenseMap<BasicBlock *, VisitType> Visited; + + // We propagate the fact that an alloca has escaped from block to successor. + // Visit the blocks that are propagating the escapedness first. To do this, we + // maintain two worklists. + SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped; + + // We may enter a block and visit it thinking that no alloca has escaped yet, + // then see an escape point and go back around a loop edge and come back to + // the same block twice. Because of this, we defer setting tail on calls when + // we first encounter them in a block. Every entry in this list does not + // statically use an alloca via use-def chain analysis, but may find an alloca + // through other means if the block turns out to be reachable after an escape + // point. + SmallVector<CallInst *, 32> DeferredTails; + + BasicBlock *BB = &F.getEntryBlock(); + VisitType Escaped = UNESCAPED; + do { + for (auto &I : *BB) { + if (Tracker.EscapePoints.count(&I)) + Escaped = ESCAPED; + + CallInst *CI = dyn_cast<CallInst>(&I); + if (!CI || CI->isTailCall()) + continue; + + if (CI->doesNotAccessMemory()) { + // A call to a readnone function whose arguments are all things computed + // outside this function can be marked tail. Even if you stored the + // alloca address into a global, a readnone function can't load the + // global anyhow. + // + // Note that this runs whether we know an alloca has escaped or not. If + // it has, then we can't trust Tracker.AllocaUsers to be accurate. + bool SafeToTail = true; + for (auto &Arg : CI->arg_operands()) { + if (isa<Constant>(Arg.getUser())) + continue; + if (Argument *A = dyn_cast<Argument>(Arg.getUser())) + if (!A->hasByValAttr()) + continue; + SafeToTail = false; + break; + } + if (SafeToTail) { + emitOptimizationRemark( + F.getContext(), "tailcallelim", F, CI->getDebugLoc(), + "marked this readnone call a tail call candidate"); + CI->setTailCall(); + Modified = true; + continue; + } + } + + if (Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) { + DeferredTails.push_back(CI); + } else { + AllCallsAreTailCalls = false; + } + } + + for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) { + auto &State = Visited[SuccBB]; + if (State < Escaped) { + State = Escaped; + if (State == ESCAPED) + WorklistEscaped.push_back(SuccBB); + else + WorklistUnescaped.push_back(SuccBB); + } + } + + if (!WorklistEscaped.empty()) { + BB = WorklistEscaped.pop_back_val(); + Escaped = ESCAPED; + } else { + BB = nullptr; + while (!WorklistUnescaped.empty()) { + auto *NextBB = WorklistUnescaped.pop_back_val(); + if (Visited[NextBB] == UNESCAPED) { + BB = NextBB; + Escaped = UNESCAPED; + break; + } + } + } + } while (BB); + + for (CallInst *CI : DeferredTails) { + if (Visited[CI->getParent()] != ESCAPED) { + // If the escape point was part way through the block, calls after the + // escape point wouldn't have been put into DeferredTails. + emitOptimizationRemark(F.getContext(), "tailcallelim", F, + CI->getDebugLoc(), + "marked this call a tail call candidate"); + CI->setTailCall(); + Modified = true; + } else { + AllCallsAreTailCalls = false; + } + } + + return Modified; +} + +bool TailCallElim::runTRE(Function &F) { // If this function is a varargs function, we won't be able to PHI the args // right, so don't even try to convert it... if (F.getFunctionType()->isVarArg()) return false; TTI = &getAnalysis<TargetTransformInfo>(); - BasicBlock *OldEntry = 0; + BasicBlock *OldEntry = nullptr; bool TailCallsAreMarkedTail = false; SmallVector<PHINode*, 8> ArgumentPHIs; bool MadeChange = false; @@ -188,39 +394,23 @@ bool TailCallElim::runOnFunction(Function &F) { // marked with the 'tail' attribute, because doing so would cause the stack // size to increase (real TRE would deallocate variable sized allocas, TRE // doesn't). - bool CanTRETailMarkedCall = true; - - // Find calls that can be marked tail. - AllocaCaptureTracker ACT; - for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) { - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { - CanTRETailMarkedCall &= CanTRE(AI); - PointerMayBeCaptured(AI, &ACT); - // If any allocas are captured, exit. - if (ACT.Captured) - return false; - } - } - } + bool CanTRETailMarkedCall = CanTRE(F); - // Second pass, change any tail recursive calls to loops. + // Change any tail recursive calls to loops. // // FIXME: The code generator produces really bad code when an 'escaping // alloca' is changed from being a static alloca to being a dynamic alloca. // Until this is resolved, disable this transformation if that would ever // happen. This bug is PR962. - if (ACT.UsesAlloca.empty()) { - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) { - bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs, !CanTRETailMarkedCall); - if (!Change && BB->getFirstNonPHIOrDbg() == Ret) - Change = FoldReturnAndProcessPred(BB, Ret, OldEntry, - TailCallsAreMarkedTail, ArgumentPHIs, - !CanTRETailMarkedCall); - MadeChange |= Change; - } + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) { + bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, + ArgumentPHIs, !CanTRETailMarkedCall); + if (!Change && BB->getFirstNonPHIOrDbg() == Ret) + Change = FoldReturnAndProcessPred(BB, Ret, OldEntry, + TailCallsAreMarkedTail, ArgumentPHIs, + !CanTRETailMarkedCall); + MadeChange |= Change; } } @@ -229,34 +419,13 @@ bool TailCallElim::runOnFunction(Function &F) { // with themselves. Check to see if we did and clean up our mess if so. This // occurs when a function passes an argument straight through to its tail // call. - if (!ArgumentPHIs.empty()) { - for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) { - PHINode *PN = ArgumentPHIs[i]; - - // If the PHI Node is a dynamic constant, replace it with the value it is. - if (Value *PNV = SimplifyInstruction(PN)) { - PN->replaceAllUsesWith(PNV); - PN->eraseFromParent(); - } - } - } + for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) { + PHINode *PN = ArgumentPHIs[i]; - // At this point, we know that the function does not have any captured - // allocas. If additionally the function does not call setjmp, mark all calls - // in the function that do not access stack memory with the tail keyword. This - // implies ensuring that there does not exist any path from a call that takes - // in an alloca but does not capture it and the call which we wish to mark - // with "tail". - if (!F.callsFunctionThatReturnsTwice()) { - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - if (CallInst *CI = dyn_cast<CallInst>(I)) { - if (!ACT.UsesAlloca.count(CI)) { - CI->setTailCall(); - MadeChange = true; - } - } - } + // If the PHI Node is a dynamic constant, replace it with the value it is. + if (Value *PNV = SimplifyInstruction(PN)) { + PN->replaceAllUsesWith(PNV); + PN->eraseFromParent(); } } @@ -343,11 +512,11 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) { // static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { Function *F = CI->getParent()->getParent(); - Value *ReturnedValue = 0; + Value *ReturnedValue = nullptr; for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) { ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()); - if (RI == 0 || RI == IgnoreRI) continue; + if (RI == nullptr || RI == IgnoreRI) continue; // We can only perform this transformation if the value returned is // evaluatable at the start of the initial invocation of the function, @@ -355,10 +524,10 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { // Value *RetOp = RI->getOperand(0); if (!isDynamicConstant(RetOp, CI, RI)) - return 0; + return nullptr; if (ReturnedValue && RetOp != ReturnedValue) - return 0; // Cannot transform if differing values are returned. + return nullptr; // Cannot transform if differing values are returned. ReturnedValue = RetOp; } return ReturnedValue; @@ -370,18 +539,18 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { /// Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI) { - if (!I->isAssociative() || !I->isCommutative()) return 0; + if (!I->isAssociative() || !I->isCommutative()) return nullptr; assert(I->getNumOperands() == 2 && "Associative/commutative operations should have 2 args!"); // Exactly one operand should be the result of the call instruction. if ((I->getOperand(0) == CI && I->getOperand(1) == CI) || (I->getOperand(0) != CI && I->getOperand(1) != CI)) - return 0; + return nullptr; // The only user of this instruction we allow is a single return instruction. if (!I->hasOneUse() || !isa<ReturnInst>(I->user_back())) - return 0; + return nullptr; // Ok, now we have to check all of the other return instructions in this // function. If they return non-constants or differing values, then we cannot @@ -402,11 +571,11 @@ TailCallElim::FindTRECandidate(Instruction *TI, Function *F = BB->getParent(); if (&BB->front() == TI) // Make sure there is something before the terminator. - return 0; + return nullptr; // Scan backwards from the return, checking to see if there is a tail call in // this block. If so, set CI to it. - CallInst *CI = 0; + CallInst *CI = nullptr; BasicBlock::iterator BBI = TI; while (true) { CI = dyn_cast<CallInst>(BBI); @@ -414,14 +583,14 @@ TailCallElim::FindTRECandidate(Instruction *TI, break; if (BBI == BB->begin()) - return 0; // Didn't find a potential tail call. + return nullptr; // Didn't find a potential tail call. --BBI; } // If this call is marked as a tail call, and if there are dynamic allocas in // the function, we cannot perform this optimization. if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail) - return 0; + return nullptr; // As a special case, detect code like this: // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call @@ -441,7 +610,7 @@ TailCallElim::FindTRECandidate(Instruction *TI, for (; I != E && FI != FE; ++I, ++FI) if (*I != &*FI) break; if (I == E && FI == FE) - return 0; + return nullptr; } return CI; @@ -462,8 +631,8 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, // which is different to the constant returned by other return instructions // (which is recorded in AccumulatorRecursionEliminationInitVal). This is a // special case of accumulator recursion, the operation being "return C". - Value *AccumulatorRecursionEliminationInitVal = 0; - Instruction *AccumulatorRecursionInstr = 0; + Value *AccumulatorRecursionEliminationInitVal = nullptr; + Instruction *AccumulatorRecursionInstr = nullptr; // Ok, we found a potential tail call. We can currently only transform the // tail call if all of the instructions between the call and the return are @@ -493,8 +662,8 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, // accumulator recursion variable eliminated. if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI && !isa<UndefValue>(Ret->getReturnValue()) && - AccumulatorRecursionEliminationInitVal == 0 && - !getCommonReturnValue(0, CI)) { + AccumulatorRecursionEliminationInitVal == nullptr && + !getCommonReturnValue(nullptr, CI)) { // One case remains that we are able to handle: the current return // instruction returns a constant, and all other return instructions // return a different constant. @@ -510,9 +679,12 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BasicBlock *BB = Ret->getParent(); Function *F = BB->getParent(); + emitOptimizationRemark(F->getContext(), "tailcallelim", *F, CI->getDebugLoc(), + "transforming tail recursion to loop"); + // OK! We can transform this tail call. If this is the first one found, // create the new entry block, allowing us to branch back to the old entry. - if (OldEntry == 0) { + if (!OldEntry) { OldEntry = &F->getEntryBlock(); BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry); NewEntry->takeName(OldEntry); |