aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-05-29 02:49:00 -0700
committerStephen Hines <srhines@google.com>2014-05-29 02:49:00 -0700
commitdce4a407a24b04eebc6a376f8e62b41aaa7b071f (patch)
treedcebc53f2b182f145a2e659393bf9a0472cedf23 /lib/Transforms/Scalar
parent220b921aed042f9e520c26cffd8282a94c66c3d5 (diff)
downloadexternal_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.zip
external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.gz
external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.bz2
Update LLVM for 3.5 rebase (r209712).
Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/ADCE.cpp3
-rw-r--r--lib/Transforms/Scalar/Android.mk6
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt11
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp24
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp5
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp5
-rw-r--r--lib/Transforms/Scalar/DCE.cpp3
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp33
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp23
-rw-r--r--lib/Transforms/Scalar/FlattenCFGPass.cpp3
-rw-r--r--lib/Transforms/Scalar/GVN.cpp115
-rw-r--r--lib/Transforms/Scalar/GlobalMerge.cpp8
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp80
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp39
-rw-r--r--lib/Transforms/Scalar/LICM.cpp15
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp3
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp65
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp17
-rw-r--r--lib/Transforms/Scalar/LoopRerollPass.cpp7
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp36
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp683
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp12
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp62
-rw-r--r--lib/Transforms/Scalar/LowerAtomic.cpp5
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp41
-rw-r--r--lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp3
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp43
-rw-r--r--lib/Transforms/Scalar/Reg2Mem.cpp3
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp33
-rw-r--r--lib/Transforms/Scalar/SROA.cpp106
-rw-r--r--lib/Transforms/Scalar/SampleProfile.cpp10
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp2
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp79
-rw-r--r--lib/Transforms/Scalar/Scalarizer.cpp13
-rw-r--r--lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp623
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp13
-rw-r--r--lib/Transforms/Scalar/Sink.cpp13
-rw-r--r--lib/Transforms/Scalar/StructurizeCFG.cpp27
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp380
39 files changed, 1847 insertions, 805 deletions
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index fa8b598..1a3a4aa 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "adce"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -28,6 +27,8 @@
#include "llvm/Pass.h"
using namespace llvm;
+#define DEBUG_TYPE "adce"
+
STATISTIC(NumRemoved, "Number of instructions removed");
namespace {
diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk
index 3894f93..079cc86 100644
--- a/lib/Transforms/Scalar/Android.mk
+++ b/lib/Transforms/Scalar/Android.mk
@@ -32,6 +32,7 @@ transforms_scalar_SRC_FILES := \
Scalar.cpp \
Scalarizer.cpp \
ScalarReplAggregates.cpp \
+ SeparateConstOffsetFromGEP.cpp \
SimplifyCFGPass.cpp \
Sink.cpp \
StructurizeCFG.cpp \
@@ -60,11 +61,6 @@ include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(transforms_scalar_SRC_FILES)
LOCAL_MODULE:= libLLVMScalarOpts
-# Override the default optimization level to work around a SIGSEGV
-# on x86 target builds for SROA.cpp.
-# Bug: 8047767
-LOCAL_CFLAGS_x86 += -O1
-
LOCAL_MODULE_TAGS := optional
include $(LLVM_DEVICE_BUILD_MK)
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 27434c1..3ad1488 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -5,19 +5,19 @@ add_llvm_library(LLVMScalarOpts
CorrelatedValuePropagation.cpp
DCE.cpp
DeadStoreElimination.cpp
- Scalarizer.cpp
EarlyCSE.cpp
- GlobalMerge.cpp
+ FlattenCFGPass.cpp
GVN.cpp
+ GlobalMerge.cpp
IndVarSimplify.cpp
JumpThreading.cpp
LICM.cpp
LoopDeletion.cpp
LoopIdiomRecognize.cpp
LoopInstSimplify.cpp
+ LoopRerollPass.cpp
LoopRotation.cpp
LoopStrengthReduce.cpp
- LoopRerollPass.cpp
LoopUnrollPass.cpp
LoopUnswitch.cpp
LowerAtomic.cpp
@@ -25,13 +25,14 @@ add_llvm_library(LLVMScalarOpts
PartiallyInlineLibCalls.cpp
Reassociate.cpp
Reg2Mem.cpp
- SampleProfile.cpp
SCCP.cpp
SROA.cpp
+ SampleProfile.cpp
Scalar.cpp
ScalarReplAggregates.cpp
+ Scalarizer.cpp
+ SeparateConstOffsetFromGEP.cpp
SimplifyCFGPass.cpp
- FlattenCFGPass.cpp
Sink.cpp
StructurizeCFG.cpp
TailRecursionElimination.cpp
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index 57a1521..763d02b 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -33,7 +33,6 @@
// %0 = load i64* inttoptr (i64 big_constant to i64*)
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "consthoist"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -44,9 +43,12 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include <tuple>
using namespace llvm;
+#define DEBUG_TYPE "consthoist"
+
STATISTIC(NumConstantsHoisted, "Number of constants hoisted");
STATISTIC(NumConstantsRebased, "Number of constants rebased");
@@ -117,7 +119,8 @@ class ConstantHoisting : public FunctionPass {
SmallVector<ConstantInfo, 8> ConstantVec;
public:
static char ID; // Pass identification, replacement for typeid
- ConstantHoisting() : FunctionPass(ID), TTI(0), DT(0), Entry(0) {
+ ConstantHoisting() : FunctionPass(ID), TTI(nullptr), DT(nullptr),
+ Entry(nullptr) {
initializeConstantHoistingPass(*PassRegistry::getPassRegistry());
}
@@ -206,7 +209,16 @@ bool ConstantHoisting::runOnFunction(Function &Fn) {
/// \brief Find the constant materialization insertion point.
Instruction *ConstantHoisting::findMatInsertPt(Instruction *Inst,
unsigned Idx) const {
- // The simple and common case.
+ // If the operand is a cast instruction, then we have to materialize the
+ // constant before the cast instruction.
+ if (Idx != ~0U) {
+ Value *Opnd = Inst->getOperand(Idx);
+ if (auto CastInst = dyn_cast<Instruction>(Opnd))
+ if (CastInst->isCast())
+ return CastInst;
+ }
+
+ // The simple and common case. This also includes constant expressions.
if (!isa<PHINode>(Inst) && !isa<LandingPadInst>(Inst))
return Inst;
@@ -228,7 +240,7 @@ findConstantInsertionPoint(const ConstantInfo &ConstInfo) const {
SmallPtrSet<BasicBlock *, 8> BBs;
for (auto const &RCI : ConstInfo.RebasedConstants)
for (auto const &U : RCI.Uses)
- BBs.insert(U.Inst->getParent());
+ BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent());
if (BBs.count(Entry))
return &Entry->front();
@@ -487,8 +499,8 @@ void ConstantHoisting::emitBaseConstants(Instruction *Base, Constant *Offset,
ClonedCastInst->insertAfter(CastInst);
// Use the same debug location as the original cast instruction.
ClonedCastInst->setDebugLoc(CastInst->getDebugLoc());
- DEBUG(dbgs() << "Clone instruction: " << *ClonedCastInst << '\n'
- << "To : " << *CastInst << '\n');
+ DEBUG(dbgs() << "Clone instruction: " << *CastInst << '\n'
+ << "To : " << *ClonedCastInst << '\n');
}
DEBUG(dbgs() << "Update: " << *ConstUser.Inst << '\n');
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 7045b36..dd51ce1 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -18,7 +18,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "constprop"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -31,6 +30,8 @@
#include <set>
using namespace llvm;
+#define DEBUG_TYPE "constprop"
+
STATISTIC(NumInstKilled, "Number of instructions killed");
namespace {
@@ -68,7 +69,7 @@ bool ConstantPropagation::runOnFunction(Function &F) {
}
bool Changed = false;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
while (!WorkList.empty()) {
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 0490767..0829462 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "correlated-value-propagation"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -26,6 +25,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "correlated-value-propagation"
+
STATISTIC(NumPhis, "Number of phis propagated");
STATISTIC(NumSelects, "Number of selects propagated");
STATISTIC(NumMemAccess, "Number of memory access targets propagated");
@@ -138,7 +139,7 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) {
}
bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
- Value *Pointer = 0;
+ Value *Pointer = nullptr;
if (LoadInst *L = dyn_cast<LoadInst>(I))
Pointer = L->getPointerOperand();
else
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 8377fd9..99fac75 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "dce"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/InstIterator.h"
@@ -26,6 +25,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "dce"
+
STATISTIC(DIEEliminated, "Number of insts removed by DIE pass");
STATISTIC(DCEEliminated, "Number of insts removed");
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index f54c00d..3af8ee7 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -15,7 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "dse"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -38,6 +37,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "dse"
+
STATISTIC(NumFastStores, "Number of stores deleted");
STATISTIC(NumFastOther , "Number of other instrs removed");
@@ -49,7 +50,7 @@ namespace {
const TargetLibraryInfo *TLI;
static char ID; // Pass identification, replacement for typeid
- DSE() : FunctionPass(ID), AA(0), MD(0), DT(0) {
+ DSE() : FunctionPass(ID), AA(nullptr), MD(nullptr), DT(nullptr) {
initializeDSEPass(*PassRegistry::getPassRegistry());
}
@@ -69,7 +70,7 @@ namespace {
if (DT->isReachableFromEntry(I))
Changed |= runOnBasicBlock(*I);
- AA = 0; MD = 0; DT = 0;
+ AA = nullptr; MD = nullptr; DT = nullptr;
return Changed;
}
@@ -111,9 +112,9 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
/// If ValueSet is non-null, remove any deleted instructions from it as well.
///
static void DeleteDeadInstruction(Instruction *I,
- MemoryDependenceAnalysis &MD,
- const TargetLibraryInfo *TLI,
- SmallSetVector<Value*, 16> *ValueSet = 0) {
+ MemoryDependenceAnalysis &MD,
+ const TargetLibraryInfo *TLI,
+ SmallSetVector<Value*, 16> *ValueSet = nullptr) {
SmallVector<Instruction*, 32> NowDeadInsts;
NowDeadInsts.push_back(I);
@@ -131,7 +132,7 @@ static void DeleteDeadInstruction(Instruction *I,
for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
Value *Op = DeadInst->getOperand(op);
- DeadInst->setOperand(op, 0);
+ DeadInst->setOperand(op, nullptr);
// If this operand just became dead, add it to the NowDeadInsts list.
if (!Op->use_empty()) continue;
@@ -203,13 +204,13 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
// If we don't have target data around, an unknown size in Location means
// that we should use the size of the pointee type. This isn't valid for
// memset/memcpy, which writes more than an i8.
- if (Loc.Size == AliasAnalysis::UnknownSize && DL == 0)
+ if (Loc.Size == AliasAnalysis::UnknownSize && DL == nullptr)
return AliasAnalysis::Location();
return Loc;
}
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
- if (II == 0) return AliasAnalysis::Location();
+ if (!II) return AliasAnalysis::Location();
switch (II->getIntrinsicID()) {
default: return AliasAnalysis::Location(); // Unhandled intrinsic.
@@ -217,7 +218,7 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
// If we don't have target data around, an unknown size in Location means
// that we should use the size of the pointee type. This isn't valid for
// init.trampoline, which writes more than an i8.
- if (DL == 0) return AliasAnalysis::Location();
+ if (!DL) return AliasAnalysis::Location();
// FIXME: We don't know the size of the trampoline, so we can't really
// handle it here.
@@ -359,7 +360,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// If we have no DataLayout information around, then the size of the store
// is inferrable from the pointee type. If they are the same type, then
// we know that the store is safe.
- if (DL == 0 && Later.Ptr->getType() == Earlier.Ptr->getType())
+ if (DL == nullptr && Later.Ptr->getType() == Earlier.Ptr->getType())
return OverwriteComplete;
return OverwriteUnknown;
@@ -373,7 +374,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// Otherwise, we have to have size information, and the later store has to be
// larger than the earlier one.
if (Later.Size == AliasAnalysis::UnknownSize ||
- Earlier.Size == AliasAnalysis::UnknownSize || DL == 0)
+ Earlier.Size == AliasAnalysis::UnknownSize || DL == nullptr)
return OverwriteUnknown;
// Check to see if the later store is to the entire object (either a global,
@@ -461,7 +462,7 @@ static bool isPossibleSelfRead(Instruction *Inst,
// Self reads can only happen for instructions that read memory. Get the
// location read.
AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA);
- if (InstReadLoc.Ptr == 0) return false; // Not a reading instruction.
+ if (!InstReadLoc.Ptr) return false; // Not a reading instruction.
// If the read and written loc obviously don't alias, it isn't a read.
if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false;
@@ -528,7 +529,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
DeleteDeadInstruction(SI, *MD, TLI);
- if (NextInst == 0) // Next instruction deleted.
+ if (!NextInst) // Next instruction deleted.
BBI = BB.begin();
else if (BBI != BB.begin()) // Revisit this instruction if possible.
--BBI;
@@ -543,7 +544,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA);
// If we didn't get a useful location, fail.
- if (Loc.Ptr == 0)
+ if (!Loc.Ptr)
continue;
while (InstDep.isDef() || InstDep.isClobber()) {
@@ -557,7 +558,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
Instruction *DepWrite = InstDep.getInst();
AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA);
// If we didn't get a useful location, or if it isn't a size, bail out.
- if (DepLoc.Ptr == 0)
+ if (!DepLoc.Ptr)
break;
// If we find a write that is a) removable (i.e., non-volatile), b) is
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index af2c3d1..735f5c1 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "early-cse"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/ScopedHashTable.h"
@@ -29,6 +28,8 @@
#include <vector>
using namespace llvm;
+#define DEBUG_TYPE "early-cse"
+
STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
STATISTIC(NumCSE, "Number of instructions CSE'd");
STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
@@ -207,7 +208,7 @@ namespace {
return false;
CallInst *CI = dyn_cast<CallInst>(Inst);
- if (CI == 0 || !CI->onlyReadsMemory())
+ if (!CI || !CI->onlyReadsMemory())
return false;
return true;
}
@@ -405,14 +406,14 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// have invalidated the live-out memory values of our parent value. For now,
// just be conservative and invalidate memory if this block has multiple
// predecessors.
- if (BB->getSinglePredecessor() == 0)
+ if (!BB->getSinglePredecessor())
++CurrentGeneration;
/// LastStore - Keep track of the last non-volatile store that we saw... for
/// as long as there in no instruction that reads memory. If we see a store
/// to the same location, we delete the dead store. This zaps trivial dead
/// stores which can occur in bitfield code among other things.
- StoreInst *LastStore = 0;
+ StoreInst *LastStore = nullptr;
bool Changed = false;
@@ -462,7 +463,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
// Ignore volatile loads.
if (!LI->isSimple()) {
- LastStore = 0;
+ LastStore = nullptr;
continue;
}
@@ -470,7 +471,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// generation, replace this instruction.
std::pair<Value*, unsigned> InVal =
AvailableLoads->lookup(Inst->getOperand(0));
- if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+ if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << " to: "
<< *InVal.first << '\n');
if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
@@ -483,20 +484,20 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// Otherwise, remember that we have this instruction.
AvailableLoads->insert(Inst->getOperand(0),
std::pair<Value*, unsigned>(Inst, CurrentGeneration));
- LastStore = 0;
+ LastStore = nullptr;
continue;
}
// If this instruction may read from memory, forget LastStore.
if (Inst->mayReadFromMemory())
- LastStore = 0;
+ LastStore = nullptr;
// If this is a read-only call, process it.
if (CallValue::canHandle(Inst)) {
// If we have an available version of this call, and if it is the right
// generation, replace this instruction.
std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst);
- if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+ if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << " to: "
<< *InVal.first << '\n');
if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
@@ -528,7 +529,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
LastStore->eraseFromParent();
Changed = true;
++NumDSE;
- LastStore = 0;
+ LastStore = nullptr;
continue;
}
@@ -558,7 +559,7 @@ bool EarlyCSE::runOnFunction(Function &F) {
std::vector<StackNode *> nodesToProcess;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp
index e7f2564..0430c18 100644
--- a/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "flattencfg"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/CFG.h"
@@ -19,6 +18,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "flattencfg"
+
namespace {
struct FlattenCFGPass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 33c387c..6d07ddd 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -15,11 +15,11 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "gvn"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -50,6 +50,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "gvn"
+
STATISTIC(NumGVNInstr, "Number of instructions deleted");
STATISTIC(NumGVNLoad, "Number of loads deleted");
STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
@@ -213,13 +215,13 @@ Expression ValueTable::create_cmp_expression(unsigned Opcode,
}
Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
- assert(EI != 0 && "Not an ExtractValueInst?");
+ assert(EI && "Not an ExtractValueInst?");
Expression e;
e.type = EI->getType();
e.opcode = 0;
IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
- if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
+ if (I != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
// EI might be an extract from one of our recognised intrinsics. If it
// is we'll synthesize a semantically equivalent expression instead on
// an extract value expression.
@@ -327,7 +329,7 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
MD->getNonLocalCallDependency(CallSite(C));
// FIXME: Move the checking logic to MemDep!
- CallInst* cdep = 0;
+ CallInst* cdep = nullptr;
// Check to see if we have a single dominating call instruction that is
// identical to C.
@@ -338,8 +340,8 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
// We don't handle non-definitions. If we already have a call, reject
// instruction dependencies.
- if (!I->getResult().isDef() || cdep != 0) {
- cdep = 0;
+ if (!I->getResult().isDef() || cdep != nullptr) {
+ cdep = nullptr;
break;
}
@@ -350,7 +352,7 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
continue;
}
- cdep = 0;
+ cdep = nullptr;
break;
}
@@ -551,7 +553,7 @@ namespace {
static AvailableValueInBlock getUndef(BasicBlock *BB) {
AvailableValueInBlock Res;
Res.BB = BB;
- Res.Val.setPointer(0);
+ Res.Val.setPointer(nullptr);
Res.Val.setInt(UndefVal);
Res.Offset = 0;
return Res;
@@ -611,7 +613,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit GVN(bool noloads = false)
- : FunctionPass(ID), NoLoads(noloads), MD(0) {
+ : FunctionPass(ID), NoLoads(noloads), MD(nullptr) {
initializeGVNPass(*PassRegistry::getPassRegistry());
}
@@ -649,7 +651,7 @@ namespace {
/// removeFromLeaderTable - Scan the list of values corresponding to a given
/// value number, and remove the given instruction if encountered.
void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) {
- LeaderTableEntry* Prev = 0;
+ LeaderTableEntry* Prev = nullptr;
LeaderTableEntry* Curr = &LeaderTable[N];
while (Curr->Val != I || Curr->BB != BB) {
@@ -661,8 +663,8 @@ namespace {
Prev->Next = Curr->Next;
} else {
if (!Curr->Next) {
- Curr->Val = 0;
- Curr->BB = 0;
+ Curr->Val = nullptr;
+ Curr->BB = nullptr;
} else {
LeaderTableEntry* Next = Curr->Next;
Curr->Val = Next->Val;
@@ -855,7 +857,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
Instruction *InsertPt,
const DataLayout &DL) {
if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL))
- return 0;
+ return nullptr;
// If this is already the right type, just return it.
Type *StoredValTy = StoredVal->getType();
@@ -1060,7 +1062,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
const DataLayout &DL) {
// If the mem operation is a non-constant size, we can't handle it.
ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
- if (SizeCst == 0) return -1;
+ if (!SizeCst) return -1;
uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
// If this is memset, we just need to see if the offset is valid in the size
@@ -1075,10 +1077,10 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
MemTransferInst *MTI = cast<MemTransferInst>(MI);
Constant *Src = dyn_cast<Constant>(MTI->getSource());
- if (Src == 0) return -1;
+ if (!Src) return -1;
GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &DL));
- if (GV == 0 || !GV->isConstant()) return -1;
+ if (!GV || !GV->isConstant()) return -1;
// See if the access is within the bounds of the transfer.
int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
@@ -1420,8 +1422,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
// If this is a clobber and L is the first instruction in its block, then
// we have the first instruction in the entry block.
if (DepLI != LI && Address && DL) {
- int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(),
- LI->getPointerOperand(),
+ int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), Address,
DepLI, *DL);
if (Offset != -1) {
@@ -1469,8 +1470,8 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
if (S->getValueOperand()->getType() != LI->getType()) {
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
- if (DL == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
- LI->getType(), *DL)) {
+ if (!DL || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
+ LI->getType(), *DL)) {
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1486,7 +1487,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
if (LD->getType() != LI->getType()) {
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
- if (DL == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)){
+ if (!DL || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)) {
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1539,7 +1540,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Check to see how many predecessors have the loaded value fully
// available.
- DenseMap<BasicBlock*, Value*> PredLoads;
+ MapVector<BasicBlock *, Value *> PredLoads;
DenseMap<BasicBlock*, char> FullyAvailableBlocks;
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
@@ -1553,7 +1554,6 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) {
continue;
}
- PredLoads[Pred] = 0;
if (Pred->getTerminator()->getNumSuccessors() != 1) {
if (isa<IndirectBrInst>(Pred->getTerminator())) {
@@ -1570,11 +1570,14 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
}
CriticalEdgePred.push_back(Pred);
+ } else {
+ // Only add the predecessors that will not be split for now.
+ PredLoads[Pred] = nullptr;
}
}
// Decide whether PRE is profitable for this load.
- unsigned NumUnavailablePreds = PredLoads.size();
+ unsigned NumUnavailablePreds = PredLoads.size() + CriticalEdgePred.size();
assert(NumUnavailablePreds != 0 &&
"Fully available value should already be eliminated!");
@@ -1586,12 +1589,10 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
return false;
// Split critical edges, and update the unavailable predecessors accordingly.
- for (SmallVectorImpl<BasicBlock *>::iterator I = CriticalEdgePred.begin(),
- E = CriticalEdgePred.end(); I != E; I++) {
- BasicBlock *OrigPred = *I;
+ for (BasicBlock *OrigPred : CriticalEdgePred) {
BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
- PredLoads.erase(OrigPred);
- PredLoads[NewPred] = 0;
+ assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!");
+ PredLoads[NewPred] = nullptr;
DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
<< LoadBB->getName() << '\n');
}
@@ -1599,9 +1600,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Check if the load can safely be moved to all the unavailable predecessors.
bool CanDoPRE = true;
SmallVector<Instruction*, 8> NewInsts;
- for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
- E = PredLoads.end(); I != E; ++I) {
- BasicBlock *UnavailablePred = I->first;
+ for (auto &PredLoad : PredLoads) {
+ BasicBlock *UnavailablePred = PredLoad.first;
// Do PHI translation to get its value in the predecessor if necessary. The
// returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
@@ -1610,20 +1610,20 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// the load on the pred (?!?), so we can insert code to materialize the
// pointer if it is not available.
PHITransAddr Address(LI->getPointerOperand(), DL);
- Value *LoadPtr = 0;
+ Value *LoadPtr = nullptr;
LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
*DT, NewInsts);
// If we couldn't find or insert a computation of this phi translated value,
// we fail PRE.
- if (LoadPtr == 0) {
+ if (!LoadPtr) {
DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
<< *LI->getPointerOperand() << "\n");
CanDoPRE = false;
break;
}
- I->second = LoadPtr;
+ PredLoad.second = LoadPtr;
}
if (!CanDoPRE) {
@@ -1632,8 +1632,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (MD) MD->removeInstruction(I);
I->eraseFromParent();
}
- // HINT:Don't revert the edge-splitting as following transformation may
- // also need to split these critial edges.
+ // HINT: Don't revert the edge-splitting as following transformation may
+ // also need to split these critical edges.
return !CriticalEdgePred.empty();
}
@@ -1654,10 +1654,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
VN.lookup_or_add(NewInsts[i]);
}
- for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
- E = PredLoads.end(); I != E; ++I) {
- BasicBlock *UnavailablePred = I->first;
- Value *LoadPtr = I->second;
+ for (const auto &PredLoad : PredLoads) {
+ BasicBlock *UnavailablePred = PredLoad.first;
+ Value *LoadPtr = PredLoad.second;
Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
LI->getAlignment(),
@@ -1776,7 +1775,7 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
MDNode *ReplMD = Metadata[i].second;
switch(Kind) {
default:
- ReplInst->setMetadata(Kind, NULL); // Remove unknown metadata
+ ReplInst->setMetadata(Kind, nullptr); // Remove unknown metadata
break;
case LLVMContext::MD_dbg:
llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg");
@@ -1832,7 +1831,7 @@ bool GVN::processLoad(LoadInst *L) {
// a common base + constant offset, and if the previous store (or memset)
// completely covers this load. This sort of thing can happen in bitfield
// access code.
- Value *AvailVal = 0;
+ Value *AvailVal = nullptr;
if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) {
int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
L->getPointerOperand(),
@@ -1920,7 +1919,7 @@ bool GVN::processLoad(LoadInst *L) {
if (DL) {
StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
L, *DL);
- if (StoredVal == 0)
+ if (!StoredVal)
return false;
DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
@@ -1949,7 +1948,7 @@ bool GVN::processLoad(LoadInst *L) {
if (DL) {
AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(),
L, *DL);
- if (AvailableVal == 0)
+ if (!AvailableVal)
return false;
DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
@@ -1999,9 +1998,9 @@ bool GVN::processLoad(LoadInst *L) {
// a few comparisons of DFS numbers.
Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
LeaderTableEntry Vals = LeaderTable[num];
- if (!Vals.Val) return 0;
+ if (!Vals.Val) return nullptr;
- Value *Val = 0;
+ Value *Val = nullptr;
if (DT->dominates(Vals.BB, BB)) {
Val = Vals.Val;
if (isa<Constant>(Val)) return Val;
@@ -2052,7 +2051,7 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
const BasicBlock *Src = E.getStart();
assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
(void)Src;
- return Pred != 0;
+ return Pred != nullptr;
}
/// propagateEquality - The given values are known to be equal in every block
@@ -2296,7 +2295,7 @@ bool GVN::processInstruction(Instruction *I) {
// Perform fast-path value-number based elimination of values inherited from
// dominators.
Value *repl = findLeader(I->getParent(), Num);
- if (repl == 0) {
+ if (!repl) {
// Failure, just remember this instance for future use.
addToLeaderTable(Num, I, I->getParent());
return false;
@@ -2319,7 +2318,7 @@ bool GVN::runOnFunction(Function& F) {
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
VN.setMemDep(MD);
@@ -2421,10 +2420,7 @@ bool GVN::processBlock(BasicBlock *BB) {
bool GVN::performPRE(Function &F) {
bool Changed = false;
SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap;
- for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
- DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
- BasicBlock *CurrentBlock = *DI;
-
+ for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
// Nothing to PRE in the entry block.
if (CurrentBlock == &F.getEntryBlock()) continue;
@@ -2464,7 +2460,7 @@ bool GVN::performPRE(Function &F) {
// more complicated to get right.
unsigned NumWith = 0;
unsigned NumWithout = 0;
- BasicBlock *PREPred = 0;
+ BasicBlock *PREPred = nullptr;
predMap.clear();
for (pred_iterator PI = pred_begin(CurrentBlock),
@@ -2482,8 +2478,8 @@ bool GVN::performPRE(Function &F) {
}
Value* predV = findLeader(P, ValNo);
- if (predV == 0) {
- predMap.push_back(std::make_pair(static_cast<Value *>(0), P));
+ if (!predV) {
+ predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
PREPred = P;
++NumWithout;
} else if (predV == CurInst) {
@@ -2637,9 +2633,8 @@ bool GVN::iterateOnFunction(Function &F) {
//
std::vector<BasicBlock *> BBVect;
BBVect.reserve(256);
- for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
- DE = df_end(DT->getRootNode()); DI != DE; ++DI)
- BBVect.push_back(DI->getBlock());
+ for (DomTreeNode *x : depth_first(DT->getRootNode()))
+ BBVect.push_back(x->getBlock());
for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end();
I != E; I++)
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 8ffd64b..990d067 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -51,7 +51,6 @@
// note that we saved 2 registers here almostly "for free".
// ===---------------------------------------------------------------------===//
-#define DEBUG_TYPE "global-merge"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -70,6 +69,8 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
+#define DEBUG_TYPE "global-merge"
+
cl::opt<bool>
EnableGlobalMerge("global-merge", cl::Hidden,
cl::desc("Enable global merge pass"),
@@ -107,7 +108,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid.
- explicit GlobalMerge(const TargetMachine *TM = 0)
+ explicit GlobalMerge(const TargetMachine *TM = nullptr)
: FunctionPass(ID), TM(TM) {
initializeGlobalMergePass(*PassRegistry::getPassRegistry());
}
@@ -173,7 +174,8 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
GlobalValue::InternalLinkage,
MergedInit, "_MergedGlobals",
- 0, GlobalVariable::NotThreadLocal,
+ nullptr,
+ GlobalVariable::NotThreadLocal,
AddrSpace);
for (size_t k = i; k < j; ++k) {
Constant *Idx[2] = {
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 7537632..e83a5c4 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -24,7 +24,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "indvars"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
@@ -50,6 +49,8 @@
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
using namespace llvm;
+#define DEBUG_TYPE "indvars"
+
STATISTIC(NumWidened , "Number of indvars widened");
STATISTIC(NumReplaced , "Number of exit values replaced");
STATISTIC(NumLFTR , "Number of loop exit tests replaced");
@@ -79,8 +80,8 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- IndVarSimplify() : LoopPass(ID), LI(0), SE(0), DT(0), DL(0),
- Changed(false) {
+ IndVarSimplify() : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr),
+ DL(nullptr), Changed(false) {
initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
}
@@ -196,7 +197,7 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
if (!PHI)
return User;
- Instruction *InsertPt = 0;
+ Instruction *InsertPt = nullptr;
for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
if (PHI->getIncomingValue(i) != Def)
continue;
@@ -257,13 +258,13 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// an add or increment value can not be represented by an integer.
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
- if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
+ if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
// If this is not an add of the PHI with a constantfp, or if the constant fp
// is not an integer, bail out.
ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
int64_t IncValue;
- if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
+ if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
!ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
return;
@@ -280,7 +281,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
if (!Compare)
Compare = dyn_cast<FCmpInst>(U2);
- if (Compare == 0 || !Compare->hasOneUse() ||
+ if (!Compare || !Compare->hasOneUse() ||
!isa<BranchInst>(Compare->user_back()))
return;
@@ -301,7 +302,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// transform it.
ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
int64_t ExitValue;
- if (ExitValueVal == 0 ||
+ if (ExitValueVal == nullptr ||
!ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
return;
@@ -651,7 +652,8 @@ namespace {
Type *WidestNativeType; // Widest integer type created [sz]ext
bool IsSigned; // Was an sext user seen before a zext?
- WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {}
+ WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr),
+ IsSigned(false) {}
};
}
@@ -693,7 +695,7 @@ struct NarrowIVDefUse {
Instruction *NarrowUse;
Instruction *WideDef;
- NarrowIVDefUse(): NarrowDef(0), NarrowUse(0), WideDef(0) {}
+ NarrowIVDefUse(): NarrowDef(nullptr), NarrowUse(nullptr), WideDef(nullptr) {}
NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD):
NarrowDef(ND), NarrowUse(NU), WideDef(WD) {}
@@ -736,9 +738,9 @@ public:
L(LI->getLoopFor(OrigPhi->getParent())),
SE(SEv),
DT(DTree),
- WidePhi(0),
- WideInc(0),
- WideIncExpr(0),
+ WidePhi(nullptr),
+ WideInc(nullptr),
+ WideIncExpr(nullptr),
DeadInsts(DI) {
assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
}
@@ -793,7 +795,7 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
unsigned Opcode = DU.NarrowUse->getOpcode();
switch (Opcode) {
default:
- return 0;
+ return nullptr;
case Instruction::Add:
case Instruction::Mul:
case Instruction::UDiv:
@@ -838,14 +840,14 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
// Handle the common case of add<nsw/nuw>
if (DU.NarrowUse->getOpcode() != Instruction::Add)
- return 0;
+ return nullptr;
// One operand (NarrowDef) has already been extended to WideDef. Now determine
// if extending the other will lead to a recurrence.
unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
- const SCEV *ExtendOperExpr = 0;
+ const SCEV *ExtendOperExpr = nullptr;
const OverflowingBinaryOperator *OBO =
cast<OverflowingBinaryOperator>(DU.NarrowUse);
if (IsSigned && OBO->hasNoSignedWrap())
@@ -855,7 +857,7 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
ExtendOperExpr = SE->getZeroExtendExpr(
SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
else
- return 0;
+ return nullptr;
// When creating this AddExpr, don't apply the current operations NSW or NUW
// flags. This instruction may be guarded by control flow that the no-wrap
@@ -866,7 +868,7 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));
if (!AddRec || AddRec->getLoop() != L)
- return 0;
+ return nullptr;
return AddRec;
}
@@ -877,14 +879,14 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
/// recurrence. Otherwise return NULL.
const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
if (!SE->isSCEVable(NarrowUse->getType()))
- return 0;
+ return nullptr;
const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
if (SE->getTypeSizeInBits(NarrowExpr->getType())
>= SE->getTypeSizeInBits(WideType)) {
// NarrowUse implicitly widens its operand. e.g. a gep with a narrow
// index. So don't follow this use.
- return 0;
+ return nullptr;
}
const SCEV *WideExpr = IsSigned ?
@@ -892,7 +894,7 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
SE->getZeroExtendExpr(NarrowExpr, WideType);
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
if (!AddRec || AddRec->getLoop() != L)
- return 0;
+ return nullptr;
return AddRec;
}
@@ -930,7 +932,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi
<< " to " << *WidePhi << "\n");
}
- return 0;
+ return nullptr;
}
}
// Our raison d'etre! Eliminate sign and zero extension.
@@ -968,7 +970,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// push the uses of WideDef here.
// No further widening is needed. The deceased [sz]ext had done it for us.
- return 0;
+ return nullptr;
}
// Does this user itself evaluate to a recurrence after widening?
@@ -981,7 +983,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// follow it. Instead insert a Trunc to kill off the original use,
// eventually isolating the original narrow IV so it can be removed.
truncateIVUse(DU, DT);
- return 0;
+ return nullptr;
}
// Assume block terminators cannot evaluate to a recurrence. We can't to
// insert a Trunc after a terminator if there happens to be a critical edge.
@@ -990,14 +992,14 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// Reuse the IV increment that SCEVExpander created as long as it dominates
// NarrowUse.
- Instruction *WideUse = 0;
+ Instruction *WideUse = nullptr;
if (WideAddRec == WideIncExpr
&& Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
WideUse = WideInc;
else {
WideUse = CloneIVUser(DU);
if (!WideUse)
- return 0;
+ return nullptr;
}
// Evaluation of WideAddRec ensured that the narrow expression could be
// extended outside the loop without overflow. This suggests that the wide use
@@ -1008,7 +1010,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
<< ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
DeadInsts.push_back(WideUse);
- return 0;
+ return nullptr;
}
// Returning WideUse pushes it on the worklist.
@@ -1043,7 +1045,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Is this phi an induction variable?
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
if (!AddRec)
- return NULL;
+ return nullptr;
// Widen the induction variable expression.
const SCEV *WideIVExpr = IsSigned ?
@@ -1056,7 +1058,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Can the IV be extended outside the loop without overflow?
AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
if (!AddRec || AddRec->getLoop() != L)
- return NULL;
+ return nullptr;
// An AddRec must have loop-invariant operands. Since this AddRec is
// materialized by a loop header phi, the expression cannot have any post-loop
@@ -1282,7 +1284,7 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
Instruction *IncI = dyn_cast<Instruction>(IncV);
if (!IncI)
- return 0;
+ return nullptr;
switch (IncI->getOpcode()) {
case Instruction::Add:
@@ -1293,17 +1295,17 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
if (IncI->getNumOperands() == 2)
break;
default:
- return 0;
+ return nullptr;
}
PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
if (Phi && Phi->getParent() == L->getHeader()) {
if (isLoopInvariant(IncI->getOperand(1), L, DT))
return Phi;
- return 0;
+ return nullptr;
}
if (IncI->getOpcode() == Instruction::GetElementPtr)
- return 0;
+ return nullptr;
// Allow add/sub to be commuted.
Phi = dyn_cast<PHINode>(IncI->getOperand(1));
@@ -1311,7 +1313,7 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
if (isLoopInvariant(IncI->getOperand(0), L, DT))
return Phi;
}
- return 0;
+ return nullptr;
}
/// Return the compare guarding the loop latch, or NULL for unrecognized tests.
@@ -1321,7 +1323,7 @@ static ICmpInst *getLoopTest(Loop *L) {
BasicBlock *LatchBlock = L->getLoopLatch();
// Don't bother with LFTR if the loop is not properly simplified.
if (!LatchBlock)
- return 0;
+ return nullptr;
BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
assert(BI && "expected exit branch");
@@ -1446,8 +1448,8 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
// Loop over all of the PHI nodes, looking for a simple counter.
- PHINode *BestPhi = 0;
- const SCEV *BestInit = 0;
+ PHINode *BestPhi = nullptr;
+ const SCEV *BestInit = nullptr;
BasicBlock *LatchBlock = L->getLoopLatch();
assert(LatchBlock && "needsLFTR should guarantee a loop latch");
@@ -1571,7 +1573,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
// IVInit integer and IVCount pointer would only occur if a canonical IV
// were generated on top of case #2, which is not expected.
- const SCEV *IVLimit = 0;
+ const SCEV *IVLimit = nullptr;
// For unit stride, IVCount = Start + BECount with 2's complement overflow.
// For non-zero Start, compute IVCount here.
if (AR->getStart()->isZero())
@@ -1813,7 +1815,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
DeadInsts.clear();
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 067deb7..230a381 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "jump-threading"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -38,6 +37,8 @@
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
+#define DEBUG_TYPE "jump-threading"
+
STATISTIC(NumThreads, "Number of jumps threaded");
STATISTIC(NumFolds, "Number of terminators folded");
STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
@@ -153,7 +154,7 @@ bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
LVI = &getAnalysis<LazyValueInfo>();
@@ -308,7 +309,7 @@ void JumpThreading::FindLoopHeaders(Function &F) {
/// Returns null if Val is null or not an appropriate constant.
static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
if (!Val)
- return 0;
+ return nullptr;
// Undef is "known" enough.
if (UndefValue *U = dyn_cast<UndefValue>(Val))
@@ -352,7 +353,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// If V is a non-instruction value, or an instruction in a different block,
// then it can't be derived from a PHI.
Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0 || I->getParent() != BB) {
+ if (!I || I->getParent() != BB) {
// Okay, if this is a live-in value, see if it has a known value at the end
// of any of our predecessors.
@@ -495,7 +496,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, DL);
- if (Res == 0) {
+ if (!Res) {
if (!isa<Constant>(RHS))
continue;
@@ -581,7 +582,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// Either operand will do, so be sure to pick the one that's a known
// constant.
// FIXME: Do this more cleverly if both values are known constants?
- KnownCond = (TrueVal != 0);
+ KnownCond = (TrueVal != nullptr);
}
// See if the select has a known constant value for this predecessor.
@@ -737,7 +738,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
Instruction *CondInst = dyn_cast<Instruction>(Condition);
// All the rest of our checks depend on the condition being an instruction.
- if (CondInst == 0) {
+ if (!CondInst) {
// FIXME: Unify this with code below.
if (ProcessThreadableEdges(Condition, BB, Preference))
return true;
@@ -890,7 +891,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
SmallPtrSet<BasicBlock*, 8> PredsScanned;
typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
AvailablePredsTy AvailablePreds;
- BasicBlock *OneUnavailablePred = 0;
+ BasicBlock *OneUnavailablePred = nullptr;
// If we got here, the loaded value is transparent through to the start of the
// block. Check to see if it is available in any of the predecessor blocks.
@@ -904,16 +905,16 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Scan the predecessor to see if the value is available in the pred.
BBIt = PredBB->end();
- MDNode *ThisTBAATag = 0;
+ MDNode *ThisTBAATag = nullptr;
Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6,
- 0, &ThisTBAATag);
+ nullptr, &ThisTBAATag);
if (!PredAvailable) {
OneUnavailablePred = PredBB;
continue;
}
// If tbaa tags disagree or are not present, forget about them.
- if (TBAATag != ThisTBAATag) TBAATag = 0;
+ if (TBAATag != ThisTBAATag) TBAATag = nullptr;
// If so, this load is partially redundant. Remember this info so that we
// can create a PHI node.
@@ -929,7 +930,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// predecessor, we want to insert a merge block for those common predecessors.
// This ensures that we only have to insert one reload, thus not increasing
// code size.
- BasicBlock *UnavailablePred = 0;
+ BasicBlock *UnavailablePred = nullptr;
// If there is exactly one predecessor where the value is unavailable, the
// already computed 'OneUnavailablePred' block is it. If it ends in an
@@ -996,7 +997,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
BasicBlock *P = *PI;
AvailablePredsTy::iterator I =
std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
- std::make_pair(P, (Value*)0));
+ std::make_pair(P, (Value*)nullptr));
assert(I != AvailablePreds.end() && I->first == P &&
"Didn't find entry for predecessor!");
@@ -1103,7 +1104,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
SmallPtrSet<BasicBlock*, 16> SeenPreds;
SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
- BasicBlock *OnlyDest = 0;
+ BasicBlock *OnlyDest = nullptr;
BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
@@ -1120,7 +1121,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
BasicBlock *DestBB;
if (isa<UndefValue>(Val))
- DestBB = 0;
+ DestBB = nullptr;
else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
@@ -1171,7 +1172,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
// If the threadable edges are branching on an undefined value, we get to pick
// the destination that these predecessors should get to.
- if (MostPopularDest == 0)
+ if (!MostPopularDest)
MostPopularDest = BB->getTerminator()->
getSuccessor(GetBestDestForJumpOnUndef(BB));
@@ -1273,7 +1274,7 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
}
// Determine which value to split on, true, false, or undef if neither.
- ConstantInt *SplitVal = 0;
+ ConstantInt *SplitVal = nullptr;
if (NumTrue > NumFalse)
SplitVal = ConstantInt::getTrue(BB->getContext());
else if (NumTrue != 0 || NumFalse != 0)
@@ -1294,7 +1295,7 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// help us. However, we can just replace the LHS or RHS with the constant.
if (BlocksToFoldInto.size() ==
cast<PHINode>(BB->front()).getNumIncomingValues()) {
- if (SplitVal == 0) {
+ if (!SplitVal) {
// If all preds provide undef, just nuke the xor, because it is undef too.
BO->replaceAllUsesWith(UndefValue::get(BO->getType()));
BO->eraseFromParent();
@@ -1531,7 +1532,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// can just clone the bits from BB into the end of the new PredBB.
BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
- if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) {
+ if (!OldPredBranch || !OldPredBranch->isUnconditional()) {
PredBB = SplitEdge(PredBB, BB, this);
OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
}
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index b69f2dc..0a8d16f 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -30,7 +30,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "licm"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -60,6 +59,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "licm"
+
STATISTIC(NumSunk , "Number of instructions sunk out of loop");
STATISTIC(NumHoisted , "Number of instructions hoisted out of loop");
STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
@@ -223,7 +224,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
@@ -315,8 +316,8 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
"Parent loop not left in LCSSA form after LICM!");
// Clear out loops state information for the next iteration
- CurLoop = 0;
- Preheader = 0;
+ CurLoop = nullptr;
+ Preheader = nullptr;
// If this loop is nested inside of another one, save the alias information
// for when we process the outer loop.
@@ -334,7 +335,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
/// iteration.
///
void LICM::SinkRegion(DomTreeNode *N) {
- assert(N != 0 && "Null dominator tree node?");
+ assert(N != nullptr && "Null dominator tree node?");
BasicBlock *BB = N->getBlock();
// If this subregion is not in the top level loop at all, exit.
@@ -381,7 +382,7 @@ void LICM::SinkRegion(DomTreeNode *N) {
/// before uses, allowing us to hoist a loop body in one pass without iteration.
///
void LICM::HoistRegion(DomTreeNode *N) {
- assert(N != 0 && "Null dominator tree node?");
+ assert(N != nullptr && "Null dominator tree node?");
BasicBlock *BB = N->getBlock();
// If this subregion is not in the top level loop at all, exit.
@@ -774,7 +775,7 @@ void LICM::PromoteAliasSet(AliasSet &AS,
// We start with an alignment of one and try to find instructions that allow
// us to prove better alignment.
unsigned Alignment = 1;
- MDNode *TBAATag = 0;
+ MDNode *TBAATag = nullptr;
// Check that all of the pointers in the alias set have the same type. We
// cannot (yet) promote a memory location that is loaded and stored in
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 9a520c8..5ab686a 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-delete"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -23,6 +22,8 @@
#include "llvm/IR/Dominators.h"
using namespace llvm;
+#define DEBUG_TYPE "loop-delete"
+
STATISTIC(NumDeleted, "Number of loops deleted");
namespace {
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index e5e8b84..26a83df 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -41,7 +41,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-idiom"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -61,6 +60,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "loop-idiom"
+
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
@@ -114,7 +115,7 @@ namespace {
Value *matchCondition (BranchInst *Br, BasicBlock *NonZeroTarget) const;
/// Return true iff the idiom is detected in the loop. and 1) \p CntInst
- /// is set to the instruction counting the pupulation bit. 2) \p CntPhi
+ /// is set to the instruction counting the population bit. 2) \p CntPhi
/// is set to the corresponding phi node. 3) \p Var is set to the value
/// whose population bits are being counted.
bool detectIdiom
@@ -138,7 +139,7 @@ namespace {
static char ID;
explicit LoopIdiomRecognize() : LoopPass(ID) {
initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
- DL = 0; DT = 0; SE = 0; TLI = 0; TTI = 0;
+ DL = nullptr; DT = nullptr; SE = nullptr; TLI = nullptr; TTI = nullptr;
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
@@ -182,7 +183,7 @@ namespace {
if (DL)
return DL;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
return DL;
}
@@ -247,7 +248,7 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE,
for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
Value *Op = DeadInst->getOperand(op);
- DeadInst->setOperand(op, 0);
+ DeadInst->setOperand(op, nullptr);
// If this operand just became dead, add it to the NowDeadInsts list.
if (!Op->use_empty()) continue;
@@ -292,9 +293,9 @@ bool LIRUtil::isAlmostEmpty(BasicBlock *BB) {
BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) {
if (BasicBlock *BB = PreHead->getSinglePredecessor()) {
BranchInst *Br = getBranch(BB);
- return Br && Br->isConditional() ? BB : 0;
+ return Br && Br->isConditional() ? BB : nullptr;
}
- return 0;
+ return nullptr;
}
//===----------------------------------------------------------------------===//
@@ -304,7 +305,7 @@ BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) {
//===----------------------------------------------------------------------===//
NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR):
- LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(0) {
+ LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(nullptr) {
}
bool NclPopcountRecognize::preliminaryScreen() {
@@ -341,25 +342,25 @@ bool NclPopcountRecognize::preliminaryScreen() {
return true;
}
-Value *NclPopcountRecognize::matchCondition (BranchInst *Br,
- BasicBlock *LoopEntry) const {
+Value *NclPopcountRecognize::matchCondition(BranchInst *Br,
+ BasicBlock *LoopEntry) const {
if (!Br || !Br->isConditional())
- return 0;
+ return nullptr;
ICmpInst *Cond = dyn_cast<ICmpInst>(Br->getCondition());
if (!Cond)
- return 0;
+ return nullptr;
ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
if (!CmpZero || !CmpZero->isZero())
- return 0;
+ return nullptr;
ICmpInst::Predicate Pred = Cond->getPredicate();
if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) ||
(Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry))
return Cond->getOperand(0);
- return 0;
+ return nullptr;
}
bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
@@ -390,9 +391,9 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
Value *VarX1, *VarX0;
PHINode *PhiX, *CountPhi;
- DefX2 = CountInst = 0;
- VarX1 = VarX0 = 0;
- PhiX = CountPhi = 0;
+ DefX2 = CountInst = nullptr;
+ VarX1 = VarX0 = nullptr;
+ PhiX = CountPhi = nullptr;
LoopEntry = *(CurLoop->block_begin());
// step 1: Check if the loop-back branch is in desirable form.
@@ -439,7 +440,7 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
// step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
{
- CountInst = NULL;
+ CountInst = nullptr;
for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(),
IterE = LoopEntry->end(); Iter != IterE; Iter++) {
Instruction *Inst = Iter;
@@ -744,7 +745,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
// If processing the store invalidated our iterator, start over from the
// top of the block.
- if (InstPtr == 0)
+ if (!InstPtr)
I = BB->begin();
continue;
}
@@ -757,7 +758,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
// If processing the memset invalidated our iterator, start over from the
// top of the block.
- if (InstPtr == 0)
+ if (!InstPtr)
I = BB->begin();
continue;
}
@@ -784,7 +785,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
// random store we can't handle.
const SCEVAddRecExpr *StoreEv =
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
- if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
+ if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
return false;
// Check to see if the stride matches the size of the store. If so, then we
@@ -792,7 +793,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
unsigned StoreSize = (unsigned)SizeInBits >> 3;
const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
- if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) {
+ if (!Stride || StoreSize != Stride->getValue()->getValue()) {
// TODO: Could also handle negative stride here someday, that will require
// the validity check in mayLoopAccessLocation to be updated though.
// Enable this to print exact negative strides.
@@ -841,7 +842,7 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
// loop, which indicates a strided store. If we have something else, it's a
// random store we can't handle.
const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
- if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine())
+ if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine())
return false;
// Reject memsets that are so large that they overflow an unsigned.
@@ -855,7 +856,7 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
// TODO: Could also handle negative stride here someday, that will require the
// validity check in mayLoopAccessLocation to be updated though.
- if (Stride == 0 || MSI->getLength() != Stride->getValue())
+ if (!Stride || MSI->getLength() != Stride->getValue())
return false;
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
@@ -908,23 +909,23 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout &DL) {
// array. We could theoretically do a store to an alloca or something, but
// that doesn't seem worthwhile.
Constant *C = dyn_cast<Constant>(V);
- if (C == 0) return 0;
+ if (!C) return nullptr;
// Only handle simple values that are a power of two bytes in size.
uint64_t Size = DL.getTypeSizeInBits(V->getType());
if (Size == 0 || (Size & 7) || (Size & (Size-1)))
- return 0;
+ return nullptr;
// Don't care enough about darwin/ppc to implement this.
if (DL.isBigEndian())
- return 0;
+ return nullptr;
// Convert to size in bytes.
Size /= 8;
// TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
// if the top and bottom are the same (e.g. for vectors and large integers).
- if (Size > 16) return 0;
+ if (Size > 16) return nullptr;
// If the constant is exactly 16 bytes, just use it.
if (Size == 16) return C;
@@ -949,7 +950,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// are stored. A store of i32 0x01020304 can never be turned into a memset,
// but it can be turned into memset_pattern if the target supports it.
Value *SplatValue = isBytewiseValue(StoredVal);
- Constant *PatternValue = 0;
+ Constant *PatternValue = nullptr;
unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
@@ -960,13 +961,13 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// promote the memset.
CurLoop->isLoopInvariant(SplatValue)) {
// Keep and use SplatValue.
- PatternValue = 0;
+ PatternValue = nullptr;
} else if (DestAS == 0 &&
TLI->has(LibFunc::memset_pattern16) &&
(PatternValue = getMemSetPatternValue(StoredVal, *DL))) {
// Don't create memset_pattern16s with address spaces.
// It looks like we can use PatternValue!
- SplatValue = 0;
+ SplatValue = nullptr;
} else {
// Otherwise, this isn't an idiom we can transform. For example, we can't
// do anything with a 3-byte store.
@@ -1033,7 +1034,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
Int8PtrTy,
Int8PtrTy,
IntPtr,
- (void*)0);
+ (void*)nullptr);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
// an constant array of 16-bytes. Plop the value into a mergable global.
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 263ba93..ab1a939 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-instsimplify"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
@@ -26,6 +25,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "loop-instsimplify"
+
STATISTIC(NumSimplified, "Number of redundant instructions simplified");
namespace {
@@ -70,10 +71,10 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0;
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
LoopInfo *LI = &getAnalysis<LoopInfo>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SmallVector<BasicBlock*, 8> ExitBlocks;
@@ -126,7 +127,15 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
++NumSimplified;
}
}
- LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ if (res) {
+ // RecursivelyDeleteTriviallyDeadInstruction can remove
+ // more than one instruction, so simply incrementing the
+ // iterator does not work. When instructions get deleted
+ // re-iterate instead.
+ BI = BB->begin(); BE = BB->end();
+ LocalChanged |= res;
+ }
if (IsSubloopHeader && !isa<PHINode>(I))
break;
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index 81c1e42..8b5e036 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-reroll"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
@@ -36,6 +35,8 @@
using namespace llvm;
+#define DEBUG_TYPE "loop-reroll"
+
STATISTIC(NumRerolledLoops, "Number of rerolled loops");
static cl::opt<unsigned>
@@ -945,7 +946,7 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
bool InReduction = Reductions.isPairInSame(J1, J2);
if (!(InReduction && J1->isAssociative())) {
- bool Swapped = false, SomeOpMatched = false;;
+ bool Swapped = false, SomeOpMatched = false;
for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) {
Value *Op2 = J2->getOperand(j);
@@ -1133,7 +1134,7 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
SE = &getAnalysis<ScalarEvolution>();
TLI = &getAnalysis<TargetLibraryInfo>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
BasicBlock *Header = L->getHeader();
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index fde6bac..2ce5831 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-rotate"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CodeMetrics.h"
@@ -24,6 +23,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -31,7 +31,11 @@
#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
-#define MAX_HEADER_SIZE 16
+#define DEBUG_TYPE "loop-rotate"
+
+static cl::opt<unsigned>
+DefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden,
+ cl::desc("The default maximum header size for automatic loop rotation"));
STATISTIC(NumRotated, "Number of loops rotated");
namespace {
@@ -39,8 +43,12 @@ namespace {
class LoopRotate : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopRotate() : LoopPass(ID) {
+ LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
initializeLoopRotatePass(*PassRegistry::getPassRegistry());
+ if (SpecifiedMaxHeaderSize == -1)
+ MaxHeaderSize = DefaultRotationThreshold;
+ else
+ MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);
}
// LCSSA form makes instruction renaming easier.
@@ -61,6 +69,7 @@ namespace {
bool rotateLoop(Loop *L, bool SimplifiedLatch);
private:
+ unsigned MaxHeaderSize;
LoopInfo *LI;
const TargetTransformInfo *TTI;
};
@@ -74,7 +83,9 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
-Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
+Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {
+ return new LoopRotate(MaxHeaderSize);
+}
/// Rotate Loop L as many times as possible. Return true if
/// the loop is rotated at least once.
@@ -82,6 +93,9 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
if (skipOptnoneFunction(L))
return false;
+ // Save the loop metadata.
+ MDNode *LoopMD = L->getLoopID();
+
LI = &getAnalysis<LoopInfo>();
TTI = &getAnalysis<TargetTransformInfo>();
@@ -96,6 +110,12 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
MadeChange = true;
SimplifiedLatch = false;
}
+
+ // Restore the loop metadata.
+ // NB! We presume LoopRotation DOESN'T ADD its own metadata.
+ if ((MadeChange || SimplifiedLatch) && LoopMD)
+ L->setLoopID(LoopMD);
+
return MadeChange;
}
@@ -281,7 +301,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
BasicBlock *OrigLatch = L->getLoopLatch();
BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
- if (BI == 0 || BI->isUnconditional())
+ if (!BI || BI->isUnconditional())
return false;
// If the loop header is not one of the loop exiting blocks then
@@ -292,7 +312,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// If the loop latch already contains a branch that leaves the loop then the
// loop is already rotated.
- if (OrigLatch == 0)
+ if (!OrigLatch)
return false;
// Rotate if either the loop latch does *not* exit the loop, or if the loop
@@ -310,7 +330,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
<< " instructions: "; L->dump());
return false;
}
- if (Metrics.NumInsts > MAX_HEADER_SIZE)
+ if (Metrics.NumInsts > MaxHeaderSize)
return false;
}
@@ -319,7 +339,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// If the loop could not be converted to canonical form, it must have an
// indirectbr in it, just give up.
- if (OrigPreheader == 0)
+ if (!OrigPreheader)
return false;
// Anything ScalarEvolution may know about this loop or the PHI nodes
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 272a16d..914b56a 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -53,7 +53,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-reduce"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Hashing.h"
@@ -78,6 +77,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "loop-reduce"
+
/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for
/// bail out. This threshold is far beyond the number of users that LSR can
/// conceivably solve, so it should not affect generated code, but catches the
@@ -237,7 +238,15 @@ struct Formula {
int64_t Scale;
/// BaseRegs - The list of "base" registers for this use. When this is
- /// non-empty,
+ /// non-empty. The canonical representation of a formula is
+ /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
+ /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
+ /// #1 enforces that the scaled register is always used when at least two
+ /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.
+ /// #2 enforces that 1 * reg is reg.
+ /// This invariant can be temporarly broken while building a formula.
+ /// However, every formula inserted into the LSRInstance must be in canonical
+ /// form.
SmallVector<const SCEV *, 4> BaseRegs;
/// ScaledReg - The 'scaled' register for this use. This should be non-null
@@ -250,12 +259,18 @@ struct Formula {
int64_t UnfoldedOffset;
Formula()
- : BaseGV(0), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(0),
- UnfoldedOffset(0) {}
+ : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0),
+ ScaledReg(nullptr), UnfoldedOffset(0) {}
void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
- unsigned getNumRegs() const;
+ bool isCanonical() const;
+
+ void Canonicalize();
+
+ bool Unscale();
+
+ size_t getNumRegs() const;
Type *getType() const;
void DeleteBaseReg(const SCEV *&S);
@@ -345,12 +360,58 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
BaseRegs.push_back(Sum);
HasBaseReg = true;
}
+ Canonicalize();
+}
+
+/// \brief Check whether or not this formula statisfies the canonical
+/// representation.
+/// \see Formula::BaseRegs.
+bool Formula::isCanonical() const {
+ if (ScaledReg)
+ return Scale != 1 || !BaseRegs.empty();
+ return BaseRegs.size() <= 1;
+}
+
+/// \brief Helper method to morph a formula into its canonical representation.
+/// \see Formula::BaseRegs.
+/// Every formula having more than one base register, must use the ScaledReg
+/// field. Otherwise, we would have to do special cases everywhere in LSR
+/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
+/// On the other hand, 1*reg should be canonicalized into reg.
+void Formula::Canonicalize() {
+ if (isCanonical())
+ return;
+ // So far we did not need this case. This is easy to implement but it is
+ // useless to maintain dead code. Beside it could hurt compile time.
+ assert(!BaseRegs.empty() && "1*reg => reg, should not be needed.");
+ // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
+ ScaledReg = BaseRegs.back();
+ BaseRegs.pop_back();
+ Scale = 1;
+ size_t BaseRegsSize = BaseRegs.size();
+ size_t Try = 0;
+ // If ScaledReg is an invariant, try to find a variant expression.
+ while (Try < BaseRegsSize && !isa<SCEVAddRecExpr>(ScaledReg))
+ std::swap(ScaledReg, BaseRegs[Try++]);
+}
+
+/// \brief Get rid of the scale in the formula.
+/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
+/// \return true if it was possible to get rid of the scale, false otherwise.
+/// \note After this operation the formula may not be in the canonical form.
+bool Formula::Unscale() {
+ if (Scale != 1)
+ return false;
+ Scale = 0;
+ BaseRegs.push_back(ScaledReg);
+ ScaledReg = nullptr;
+ return true;
}
/// getNumRegs - Return the total number of register operands used by this
/// formula. This does not include register uses implied by non-constant
/// addrec strides.
-unsigned Formula::getNumRegs() const {
+size_t Formula::getNumRegs() const {
return !!ScaledReg + BaseRegs.size();
}
@@ -360,7 +421,7 @@ Type *Formula::getType() const {
return !BaseRegs.empty() ? BaseRegs.front()->getType() :
ScaledReg ? ScaledReg->getType() :
BaseGV ? BaseGV->getType() :
- 0;
+ nullptr;
}
/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
@@ -487,11 +548,11 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
// Check for a division of a constant by a constant.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
if (!RC)
- return 0;
+ return nullptr;
const APInt &LA = C->getValue()->getValue();
const APInt &RA = RC->getValue()->getValue();
if (LA.srem(RA) != 0)
- return 0;
+ return nullptr;
return SE.getConstant(LA.sdiv(RA));
}
@@ -500,16 +561,16 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
IgnoreSignificantBits);
- if (!Step) return 0;
+ if (!Step) return nullptr;
const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
IgnoreSignificantBits);
- if (!Start) return 0;
+ if (!Start) return nullptr;
// FlagNW is independent of the start value, step direction, and is
// preserved with smaller magnitude steps.
// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
}
- return 0;
+ return nullptr;
}
// Distribute the sdiv over add operands, if the add doesn't overflow.
@@ -520,12 +581,12 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
I != E; ++I) {
const SCEV *Op = getExactSDiv(*I, RHS, SE,
IgnoreSignificantBits);
- if (!Op) return 0;
+ if (!Op) return nullptr;
Ops.push_back(Op);
}
return SE.getAddExpr(Ops);
}
- return 0;
+ return nullptr;
}
// Check for a multiply operand that we can pull RHS out of.
@@ -544,13 +605,13 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
}
Ops.push_back(S);
}
- return Found ? SE.getMulExpr(Ops) : 0;
+ return Found ? SE.getMulExpr(Ops) : nullptr;
}
- return 0;
+ return nullptr;
}
// Otherwise we don't know.
- return 0;
+ return nullptr;
}
/// ExtractImmediate - If S involves the addition of a constant integer value,
@@ -604,7 +665,7 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
SCEV::FlagAnyWrap);
return Result;
}
- return 0;
+ return nullptr;
}
/// isAddressUse - Returns true if the specified instruction is using the
@@ -755,12 +816,12 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
Value *V = DeadInsts.pop_back_val();
Instruction *I = dyn_cast_or_null<Instruction>(V);
- if (I == 0 || !isInstructionTriviallyDead(I))
+ if (!I || !isInstructionTriviallyDead(I))
continue;
for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
if (Instruction *U = dyn_cast<Instruction>(*OI)) {
- *OI = 0;
+ *OI = nullptr;
if (U->use_empty())
DeadInsts.push_back(U);
}
@@ -775,9 +836,18 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
namespace {
class LSRUse;
}
-// Check if it is legal to fold 2 base registers.
-static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
- const Formula &F);
+
+/// \brief Check if the addressing mode defined by \p F is completely
+/// folded in \p LU at isel time.
+/// This includes address-mode folding and special icmp tricks.
+/// This function returns true if \p LU can accommodate what \p F
+/// defines and up to 1 base + 1 scaled + offset.
+/// In other words, if \p F has several base registers, this function may
+/// still return true. Therefore, users still need to account for
+/// additional base registers and/or unfolded offsets to derive an
+/// accurate cost model.
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ const LSRUse &LU, const Formula &F);
// Get the cost of the scaling factor used in F for LU.
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F);
@@ -828,7 +898,7 @@ public:
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
- SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
+ SmallPtrSet<const SCEV *, 16> *LoserRegs = nullptr);
void print(raw_ostream &OS) const;
void dump() const;
@@ -921,6 +991,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
SmallPtrSet<const SCEV *, 16> *LoserRegs) {
+ assert(F.isCanonical() && "Cost is accurate only for canonical formula");
// Tally up the registers.
if (const SCEV *ScaledReg = F.ScaledReg) {
if (VisitedRegs.count(ScaledReg)) {
@@ -944,11 +1015,13 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
}
// Determine how many (unfolded) adds we'll need inside the loop.
- size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0);
+ size_t NumBaseParts = F.getNumRegs();
if (NumBaseParts > 1)
// Do not count the base and a possible second register if the target
// allows to fold 2 registers.
- NumBaseAdds += NumBaseParts - (1 + isLegal2RegAMUse(TTI, LU, F));
+ NumBaseAdds +=
+ NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
+ NumBaseAdds += (F.UnfoldedOffset != 0);
// Accumulate non-free scaling amounts.
ScaleCost += getScalingFactorCost(TTI, LU, F);
@@ -1047,7 +1120,8 @@ struct LSRFixup {
}
LSRFixup::LSRFixup()
- : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {}
+ : UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)),
+ Offset(0) {}
/// isUseFullyOutsideLoop - Test whether this fixup always uses its
/// value outside of the given loop.
@@ -1183,7 +1257,7 @@ public:
MaxOffset(INT64_MIN),
AllFixupsOutsideLoop(true),
RigidFormula(false),
- WidestFixupType(0) {}
+ WidestFixupType(nullptr) {}
bool HasFormulaWithSameRegs(const Formula &F) const;
bool InsertFormula(const Formula &F);
@@ -1208,7 +1282,10 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
+/// The formula must be in canonical form.
bool LSRUse::InsertFormula(const Formula &F) {
+ assert(F.isCanonical() && "Invalid canonical representation");
+
if (!Formulae.empty() && RigidFormula)
return false;
@@ -1234,6 +1311,8 @@ bool LSRUse::InsertFormula(const Formula &F) {
// Record registers now being used by this use.
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+ if (F.ScaledReg)
+ Regs.insert(F.ScaledReg);
return true;
}
@@ -1300,12 +1379,10 @@ void LSRUse::dump() const {
}
#endif
-/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
-/// be completely folded into the user instruction at isel time. This includes
-/// address-mode folding and special icmp tricks.
-static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
- Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale) {
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ LSRUse::KindType Kind, Type *AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg, int64_t Scale) {
switch (Kind) {
case LSRUse::Address:
return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
@@ -1356,10 +1433,11 @@ static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
llvm_unreachable("Invalid LSRUse Kind!");
}
-static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
- int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
- GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale) {
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ int64_t MinOffset, int64_t MaxOffset,
+ LSRUse::KindType Kind, Type *AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg, int64_t Scale) {
// Check for overflow.
if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
(MinOffset > 0))
@@ -1370,9 +1448,41 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
return false;
MaxOffset = (uint64_t)BaseOffset + MaxOffset;
- return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg,
- Scale) &&
- isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale);
+ return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
+ HasBaseReg, Scale) &&
+ isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,
+ HasBaseReg, Scale);
+}
+
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ int64_t MinOffset, int64_t MaxOffset,
+ LSRUse::KindType Kind, Type *AccessTy,
+ const Formula &F) {
+ // For the purpose of isAMCompletelyFolded either having a canonical formula
+ // or a scale not equal to zero is correct.
+ // Problems may arise from non canonical formulae having a scale == 0.
+ // Strictly speaking it would best to just rely on canonical formulae.
+ // However, when we generate the scaled formulae, we first check that the
+ // scaling factor is profitable before computing the actual ScaledReg for
+ // compile time sake.
+ assert((F.isCanonical() || F.Scale != 0));
+ return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
+ F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
+}
+
+/// isLegalUse - Test whether we know how to expand the current formula.
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+ int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
+ int64_t Scale) {
+ // We know how to expand completely foldable formulae.
+ return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
+ BaseOffset, HasBaseReg, Scale) ||
+ // Or formulae that use a base register produced by a sum of base
+ // registers.
+ (Scale == 1 &&
+ isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
+ BaseGV, BaseOffset, true, 0));
}
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
@@ -1382,36 +1492,23 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
F.BaseOffset, F.HasBaseReg, F.Scale);
}
-static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
- const Formula &F) {
- // If F is used as an Addressing Mode, it may fold one Base plus one
- // scaled register. If the scaled register is nil, do as if another
- // element of the base regs is a 1-scaled register.
- // This is possible if BaseRegs has at least 2 registers.
-
- // If this is not an address calculation, this is not an addressing mode
- // use.
- if (LU.Kind != LSRUse::Address)
- return false;
-
- // F is already scaled.
- if (F.Scale != 0)
- return false;
-
- // We need to keep one register for the base and one to scale.
- if (F.BaseRegs.size() < 2)
- return false;
-
- return isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
- F.BaseGV, F.BaseOffset, F.HasBaseReg, 1);
- }
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ const LSRUse &LU, const Formula &F) {
+ return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
+ F.Scale);
+}
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F) {
if (!F.Scale)
return 0;
- assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
- LU.AccessTy, F) && "Illegal formula in use.");
+
+ // If the use is not completely folded in that instruction, we will have to
+ // pay an extra cost only for scale != 1.
+ if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, F))
+ return F.Scale != 1;
switch (LU.Kind) {
case LSRUse::Address: {
@@ -1430,12 +1527,10 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
}
case LSRUse::ICmpZero:
- // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg.
- // Therefore, return 0 in case F.Scale == -1.
- return F.Scale != -1;
-
case LSRUse::Basic:
case LSRUse::Special:
+ // The use is completely folded, i.e., everything is folded into the
+ // instruction.
return 0;
}
@@ -1460,7 +1555,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
HasBaseReg = true;
}
- return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
+ return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
+ HasBaseReg, Scale);
}
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
@@ -1485,8 +1581,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
// base and a scale.
int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
- return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
- BaseOffset, HasBaseReg, Scale);
+ return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
+ BaseOffset, HasBaseReg, Scale);
}
namespace {
@@ -1515,7 +1611,7 @@ struct IVChain {
SmallVector<IVInc,1> Incs;
const SCEV *ExprBase;
- IVChain() : ExprBase(0) {}
+ IVChain() : ExprBase(nullptr) {}
IVChain(const IVInc &Head, const SCEV *Base)
: Incs(1, Head), ExprBase(Base) {}
@@ -1642,8 +1738,19 @@ class LSRInstance {
void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
unsigned Depth = 0);
+
+ void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base, unsigned Depth,
+ size_t Idx, bool IsScaledReg = false);
void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base, size_t Idx,
+ bool IsScaledReg = false);
void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base,
+ const SmallVectorImpl<int64_t> &Worklist,
+ size_t Idx, bool IsScaledReg = false);
void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
@@ -1721,7 +1828,7 @@ void LSRInstance::OptimizeShadowIV() {
IVUsers::const_iterator CandidateUI = UI;
++UI;
Instruction *ShadowUse = CandidateUI->getUser();
- Type *DestTy = 0;
+ Type *DestTy = nullptr;
bool IsSigned = false;
/* If shadow use is a int->float cast then insert a second IV
@@ -1783,7 +1890,7 @@ void LSRInstance::OptimizeShadowIV() {
continue;
/* Initialize new IV, double d = 0.0 in above example. */
- ConstantInt *C = 0;
+ ConstantInt *C = nullptr;
if (Incr->getOperand(0) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(1));
else if (Incr->getOperand(1) == PH)
@@ -1905,7 +2012,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
// for ICMP_ULE here because the comparison would be with zero, which
// isn't interesting.
CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
- const SCEVNAryExpr *Max = 0;
+ const SCEVNAryExpr *Max = nullptr;
if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
Pred = ICmpInst::ICMP_SLE;
Max = S;
@@ -1948,7 +2055,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
- Value *NewRHS = 0;
+ Value *NewRHS = nullptr;
if (ICmpInst::isTrueWhenEqual(Pred)) {
// Look for n+1, and grab n.
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
@@ -2018,7 +2125,7 @@ LSRInstance::OptimizeLoopTermCond() {
continue;
// Search IVUsesByStride to find Cond's IVUse if there is one.
- IVStrideUse *CondUse = 0;
+ IVStrideUse *CondUse = nullptr;
ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
if (!FindIVUserForCond(Cond, CondUse))
continue;
@@ -2071,12 +2178,12 @@ LSRInstance::OptimizeLoopTermCond() {
// Check for possible scaled-address reuse.
Type *AccessTy = getAccessType(UI->getUser());
int64_t Scale = C->getSExtValue();
- if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+ if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
/*BaseOffset=*/ 0,
/*HasBaseReg=*/ false, Scale))
goto decline_post_inc;
Scale = -Scale;
- if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+ if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
/*BaseOffset=*/ 0,
/*HasBaseReg=*/ false, Scale))
goto decline_post_inc;
@@ -2146,23 +2253,25 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
// the uses will have all its uses outside the loop, for example.
if (LU.Kind != Kind)
return false;
+
+ // Check for a mismatched access type, and fall back conservatively as needed.
+ // TODO: Be less conservative when the type is similar and can use the same
+ // addressing modes.
+ if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
+ NewAccessTy = Type::getVoidTy(AccessTy->getContext());
+
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
- if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
LU.MaxOffset - NewOffset, HasBaseReg))
return false;
NewMinOffset = NewOffset;
} else if (NewOffset > LU.MaxOffset) {
- if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
NewOffset - LU.MinOffset, HasBaseReg))
return false;
NewMaxOffset = NewOffset;
}
- // Check for a mismatched access type, and fall back conservatively as needed.
- // TODO: Be less conservative when the type is similar and can use the same
- // addressing modes.
- if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
- NewAccessTy = Type::getVoidTy(AccessTy->getContext());
// Update the use.
LU.MinOffset = NewMinOffset;
@@ -2183,7 +2292,7 @@ LSRInstance::getUse(const SCEV *&Expr,
int64_t Offset = ExtractImmediate(Expr, SE);
// Basic uses can't accept any offset, for example.
- if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,
Offset, /*HasBaseReg=*/ true)) {
Expr = Copy;
Offset = 0;
@@ -2267,7 +2376,7 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
}
// Nothing looked good.
- return 0;
+ return nullptr;
}
void LSRInstance::CollectInterestingTypesAndFactors() {
@@ -2385,7 +2494,7 @@ static const SCEV *getExprBase(const SCEV *S) {
default: // uncluding scUnknown.
return S;
case scConstant:
- return 0;
+ return nullptr;
case scTruncate:
return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
case scZeroExtend:
@@ -2476,7 +2585,7 @@ isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
&& SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
--cost;
}
- const SCEV *LastIncExpr = 0;
+ const SCEV *LastIncExpr = nullptr;
unsigned NumConstIncrements = 0;
unsigned NumVarIncrements = 0;
unsigned NumReusedIncrements = 0;
@@ -2535,7 +2644,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
// Visit all existing chains. Check if its IVOper can be computed as a
// profitable loop invariant increment from the last link in the Chain.
unsigned ChainIdx = 0, NChains = IVChainVec.size();
- const SCEV *LastIncExpr = 0;
+ const SCEV *LastIncExpr = nullptr;
for (; ChainIdx < NChains; ++ChainIdx) {
IVChain &Chain = IVChainVec[ChainIdx];
@@ -2755,7 +2864,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
int64_t IncOffset = IncConst->getValue()->getSExtValue();
if (!isAlwaysFoldable(TTI, LSRUse::Address,
- getAccessType(UserInst), /*BaseGV=*/ 0,
+ getAccessType(UserInst), /*BaseGV=*/ nullptr,
IncOffset, /*HaseBaseReg=*/ false))
return false;
@@ -2773,7 +2882,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
// findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
IVOpEnd, L, SE);
- Value *IVSrc = 0;
+ Value *IVSrc = nullptr;
while (IVOpIter != IVOpEnd) {
IVSrc = getWideOperand(*IVOpIter);
@@ -2800,7 +2909,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
Type *IVTy = IVSrc->getType();
Type *IntTy = SE.getEffectiveSCEVType(IVTy);
- const SCEV *LeftOverExpr = 0;
+ const SCEV *LeftOverExpr = nullptr;
for (IVChain::const_iterator IncI = Chain.begin(),
IncE = Chain.end(); IncI != IncE; ++IncI) {
@@ -2831,7 +2940,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
TTI)) {
assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
IVSrc = IVOper;
- LeftOverExpr = 0;
+ LeftOverExpr = nullptr;
}
}
Type *OperTy = IncI->IVOperand->getType();
@@ -2886,7 +2995,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LF.PostIncLoops = UI->getPostIncLoops();
LSRUse::KindType Kind = LSRUse::Basic;
- Type *AccessTy = 0;
+ Type *AccessTy = nullptr;
if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
Kind = LSRUse::Address;
AccessTy = getAccessType(LF.UserInst);
@@ -2917,7 +3026,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
- N = TransformForPostIncUse(Normalize, N, CI, 0,
+ N = TransformForPostIncUse(Normalize, N, CI, nullptr,
LF.PostIncLoops, SE, DT);
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
@@ -2992,6 +3101,9 @@ void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
+ // Do not insert formula that we will not be able to expand.
+ assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
+ "Formula is illegal");
if (!LU.InsertFormula(F))
return false;
@@ -3068,7 +3180,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
LSRFixup &LF = getNewFixup();
LF.UserInst = const_cast<Instruction *>(UserInst);
LF.OperandValToReplace = U;
- std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0);
+ std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, nullptr);
LF.LUIdx = P.first;
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
@@ -3107,7 +3219,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
if (Remainder)
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
}
- return 0;
+ return nullptr;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
if (AR->getStart()->isZero())
@@ -3119,7 +3231,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
// does not pertain to this loop.
if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
- Remainder = 0;
+ Remainder = nullptr;
}
if (Remainder != AR->getStart()) {
if (!Remainder)
@@ -3141,90 +3253,110 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
if (Remainder)
Ops.push_back(SE.getMulExpr(C, Remainder));
- return 0;
+ return nullptr;
}
}
return S;
}
-/// GenerateReassociations - Split out subexpressions from adds and the bases of
-/// addrecs.
-void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
- Formula Base,
- unsigned Depth) {
- // Arbitrarily cap recursion to protect compile time.
- if (Depth >= 3) return;
-
- for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
- const SCEV *BaseReg = Base.BaseRegs[i];
+/// \brief Helper function for LSRInstance::GenerateReassociations.
+void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base,
+ unsigned Depth, size_t Idx,
+ bool IsScaledReg) {
+ const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
+ SmallVector<const SCEV *, 8> AddOps;
+ const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
+ if (Remainder)
+ AddOps.push_back(Remainder);
+
+ if (AddOps.size() == 1)
+ return;
- SmallVector<const SCEV *, 8> AddOps;
- const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE);
- if (Remainder)
- AddOps.push_back(Remainder);
+ for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
+ JE = AddOps.end();
+ J != JE; ++J) {
- if (AddOps.size() == 1) continue;
+ // Loop-variant "unknown" values are uninteresting; we won't be able to
+ // do anything meaningful with them.
+ if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
+ continue;
- for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
- JE = AddOps.end(); J != JE; ++J) {
+ // Don't pull a constant into a register if the constant could be folded
+ // into an immediate field.
+ if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, *J, Base.getNumRegs() > 1))
+ continue;
- // Loop-variant "unknown" values are uninteresting; we won't be able to
- // do anything meaningful with them.
- if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
- continue;
+ // Collect all operands except *J.
+ SmallVector<const SCEV *, 8> InnerAddOps(
+ ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+ InnerAddOps.append(std::next(J),
+ ((const SmallVector<const SCEV *, 8> &)AddOps).end());
+
+ // Don't leave just a constant behind in a register if the constant could
+ // be folded into an immediate field.
+ if (InnerAddOps.size() == 1 &&
+ isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
+ continue;
- // Don't pull a constant into a register if the constant could be folded
- // into an immediate field.
- if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
- LU.AccessTy, *J, Base.getNumRegs() > 1))
- continue;
+ const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
+ if (InnerSum->isZero())
+ continue;
+ Formula F = Base;
- // Collect all operands except *J.
- SmallVector<const SCEV *, 8> InnerAddOps(
- ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
- InnerAddOps.append(std::next(J),
- ((const SmallVector<const SCEV *, 8> &)AddOps).end());
-
- // Don't leave just a constant behind in a register if the constant could
- // be folded into an immediate field.
- if (InnerAddOps.size() == 1 &&
- isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
- LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
- continue;
+ // Add the remaining pieces of the add back into the new formula.
+ const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
+ if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
+ TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ InnerSumSC->getValue()->getZExtValue())) {
+ F.UnfoldedOffset =
+ (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue();
+ if (IsScaledReg)
+ F.ScaledReg = nullptr;
+ else
+ F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
+ } else if (IsScaledReg)
+ F.ScaledReg = InnerSum;
+ else
+ F.BaseRegs[Idx] = InnerSum;
+
+ // Add J as its own register, or an unfolded immediate.
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
+ if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
+ TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ SC->getValue()->getZExtValue()))
+ F.UnfoldedOffset =
+ (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue();
+ else
+ F.BaseRegs.push_back(*J);
+ // We may have changed the number of register in base regs, adjust the
+ // formula accordingly.
+ F.Canonicalize();
+
+ if (InsertFormula(LU, LUIdx, F))
+ // If that formula hadn't been seen before, recurse to find more like
+ // it.
+ GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1);
+ }
+}
- const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
- if (InnerSum->isZero())
- continue;
- Formula F = Base;
+/// GenerateReassociations - Split out subexpressions from adds and the bases of
+/// addrecs.
+void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
+ Formula Base, unsigned Depth) {
+ assert(Base.isCanonical() && "Input must be in the canonical form");
+ // Arbitrarily cap recursion to protect compile time.
+ if (Depth >= 3)
+ return;
- // Add the remaining pieces of the add back into the new formula.
- const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
- if (InnerSumSC &&
- SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
- TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
- InnerSumSC->getValue()->getZExtValue())) {
- F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
- InnerSumSC->getValue()->getZExtValue();
- F.BaseRegs.erase(F.BaseRegs.begin() + i);
- } else
- F.BaseRegs[i] = InnerSum;
-
- // Add J as its own register, or an unfolded immediate.
- const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
- if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
- TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
- SC->getValue()->getZExtValue()))
- F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
- SC->getValue()->getZExtValue();
- else
- F.BaseRegs.push_back(*J);
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+ GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
- if (InsertFormula(LU, LUIdx, F))
- // If that formula hadn't been seen before, recurse to find more like
- // it.
- GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1);
- }
- }
+ if (Base.Scale == 1)
+ GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
+ /* Idx */ -1, /* IsScaledReg */ true);
}
/// GenerateCombinations - Generate a formula consisting of all of the
@@ -3232,8 +3364,12 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// This method is only interesting on a plurality of registers.
- if (Base.BaseRegs.size() <= 1) return;
+ if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1)
+ return;
+ // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
+ // processing the formula.
+ Base.Unscale();
Formula F = Base;
F.BaseRegs.clear();
SmallVector<const SCEV *, 4> Ops;
@@ -3253,29 +3389,87 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
// rather than proceed with zero in a register.
if (!Sum->isZero()) {
F.BaseRegs.push_back(Sum);
+ F.Canonicalize();
(void)InsertFormula(LU, LUIdx, F);
}
}
}
+/// \brief Helper function for LSRInstance::GenerateSymbolicOffsets.
+void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base, size_t Idx,
+ bool IsScaledReg) {
+ const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
+ GlobalValue *GV = ExtractSymbol(G, SE);
+ if (G->isZero() || !GV)
+ return;
+ Formula F = Base;
+ F.BaseGV = GV;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
+ return;
+ if (IsScaledReg)
+ F.ScaledReg = G;
+ else
+ F.BaseRegs[Idx] = G;
+ (void)InsertFormula(LU, LUIdx, F);
+}
+
/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// We can't add a symbolic offset if the address already contains one.
if (Base.BaseGV) return;
- for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
- const SCEV *G = Base.BaseRegs[i];
- GlobalValue *GV = ExtractSymbol(G, SE);
- if (G->isZero() || !GV)
- continue;
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+ GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
+ if (Base.Scale == 1)
+ GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1,
+ /* IsScaledReg */ true);
+}
+
+/// \brief Helper function for LSRInstance::GenerateConstantOffsets.
+void LSRInstance::GenerateConstantOffsetsImpl(
+ LSRUse &LU, unsigned LUIdx, const Formula &Base,
+ const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
+ const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
+ for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
+ E = Worklist.end();
+ I != E; ++I) {
Formula F = Base;
- F.BaseGV = GV;
- if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
- continue;
- F.BaseRegs[i] = G;
- (void)InsertFormula(LU, LUIdx, F);
+ F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
+ if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
+ LU.AccessTy, F)) {
+ // Add the offset to the base register.
+ const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
+ // If it cancelled out, drop the base register, otherwise update it.
+ if (NewG->isZero()) {
+ if (IsScaledReg) {
+ F.Scale = 0;
+ F.ScaledReg = nullptr;
+ } else
+ F.DeleteBaseReg(F.BaseRegs[Idx]);
+ F.Canonicalize();
+ } else if (IsScaledReg)
+ F.ScaledReg = NewG;
+ else
+ F.BaseRegs[Idx] = NewG;
+
+ (void)InsertFormula(LU, LUIdx, F);
+ }
}
+
+ int64_t Imm = ExtractImmediate(G, SE);
+ if (G->isZero() || Imm == 0)
+ return;
+ Formula F = Base;
+ F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
+ return;
+ if (IsScaledReg)
+ F.ScaledReg = G;
+ else
+ F.BaseRegs[Idx] = G;
+ (void)InsertFormula(LU, LUIdx, F);
}
/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
@@ -3288,38 +3482,11 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
if (LU.MaxOffset != LU.MinOffset)
Worklist.push_back(LU.MaxOffset);
- for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
- const SCEV *G = Base.BaseRegs[i];
-
- for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
- E = Worklist.end(); I != E; ++I) {
- Formula F = Base;
- F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
- if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
- LU.AccessTy, F)) {
- // Add the offset to the base register.
- const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
- // If it cancelled out, drop the base register, otherwise update it.
- if (NewG->isZero()) {
- std::swap(F.BaseRegs[i], F.BaseRegs.back());
- F.BaseRegs.pop_back();
- } else
- F.BaseRegs[i] = NewG;
-
- (void)InsertFormula(LU, LUIdx, F);
- }
- }
-
- int64_t Imm = ExtractImmediate(G, SE);
- if (G->isZero() || Imm == 0)
- continue;
- Formula F = Base;
- F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
- if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
- continue;
- F.BaseRegs[i] = G;
- (void)InsertFormula(LU, LUIdx, F);
- }
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+ GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
+ if (Base.Scale == 1)
+ GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1,
+ /* IsScaledReg */ true);
}
/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
@@ -3419,7 +3586,11 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
if (!IntTy) return;
// If this Formula already has a scaled register, we can't add another one.
- if (Base.Scale != 0) return;
+ // Try to unscale the formula to generate a better scale.
+ if (Base.Scale != 0 && !Base.Unscale())
+ return;
+
+ assert(Base.Scale == 0 && "Unscale did not did its job!");
// Check each interesting stride.
for (SmallSetVector<int64_t, 8>::const_iterator
@@ -3460,6 +3631,11 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
Formula F = Base;
F.ScaledReg = Quotient;
F.DeleteBaseReg(F.BaseRegs[i]);
+ // The canonical representation of 1*reg is reg, which is already in
+ // Base. In that case, do not try to insert the formula, it will be
+ // rejected anyway.
+ if (F.Scale == 1 && F.BaseRegs.empty())
+ continue;
(void)InsertFormula(LU, LUIdx, F);
}
}
@@ -3624,7 +3800,12 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// TODO: Use a more targeted data structure.
for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
- const Formula &F = LU.Formulae[L];
+ Formula F = LU.Formulae[L];
+ // FIXME: The code for the scaled and unscaled registers looks
+ // very similar but slightly different. Investigate if they
+ // could be merged. That way, we would not have to unscale the
+ // Formula.
+ F.Unscale();
// Use the immediate in the scaled register.
if (F.ScaledReg == OrigReg) {
int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
@@ -3650,6 +3831,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
continue;
// OK, looks good.
+ NewF.Canonicalize();
(void)InsertFormula(LU, LUIdx, NewF);
} else {
// Use the immediate in a base register.
@@ -3683,6 +3865,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
goto skip_formula;
// Ok, looks good.
+ NewF.Canonicalize();
(void)InsertFormula(LU, LUIdx, NewF);
break;
skip_formula:;
@@ -3936,7 +4119,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
- if (F.BaseOffset == 0 || F.Scale != 0)
+ if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
continue;
LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
@@ -4033,7 +4216,7 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
// Pick the register which is used by the most LSRUses, which is likely
// to be a good reuse register candidate.
- const SCEV *Best = 0;
+ const SCEV *Best = nullptr;
unsigned BestNum = 0;
for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
I != E; ++I) {
@@ -4130,19 +4313,22 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
- // Ignore formulae which do not use any of the required registers.
- bool SatisfiedReqReg = true;
+ // Ignore formulae which may not be ideal in terms of register reuse of
+ // ReqRegs. The formula should use all required registers before
+ // introducing new ones.
+ int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
JE = ReqRegs.end(); J != JE; ++J) {
const SCEV *Reg = *J;
- if ((!F.ScaledReg || F.ScaledReg != Reg) &&
- std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
+ if ((F.ScaledReg && F.ScaledReg == Reg) ||
+ std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) !=
F.BaseRegs.end()) {
- SatisfiedReqReg = false;
- break;
+ --NumReqRegsToFind;
+ if (NumReqRegsToFind == 0)
+ break;
}
}
- if (!SatisfiedReqReg) {
+ if (NumReqRegsToFind != 0) {
// If none of the formulae satisfied the required registers, then we could
// clear ReqRegs and try again. Currently, we simply give up in this case.
continue;
@@ -4240,7 +4426,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
}
bool AllDominate = true;
- Instruction *BetterPos = 0;
+ Instruction *BetterPos = nullptr;
Instruction *Tentative = IDom->getTerminator();
for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
E = Inputs.end(); I != E; ++I) {
@@ -4379,11 +4565,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
LF.UserInst, LF.OperandValToReplace,
Loops, SE, DT);
- Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
+ Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, IP)));
}
// Expand the ScaledReg portion.
- Value *ICmpScaledV = 0;
+ Value *ICmpScaledV = nullptr;
if (F.Scale != 0) {
const SCEV *ScaledS = F.ScaledReg;
@@ -4394,25 +4580,34 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
Loops, SE, DT);
if (LU.Kind == LSRUse::ICmpZero) {
- // An interesting way of "folding" with an icmp is to use a negated
- // scale, which we'll implement by inserting it into the other operand
- // of the icmp.
- assert(F.Scale == -1 &&
- "The only scale supported by ICmpZero uses is -1!");
- ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
+ // Expand ScaleReg as if it was part of the base regs.
+ if (F.Scale == 1)
+ Ops.push_back(
+ SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)));
+ else {
+ // An interesting way of "folding" with an icmp is to use a negated
+ // scale, which we'll implement by inserting it into the other operand
+ // of the icmp.
+ assert(F.Scale == -1 &&
+ "The only scale supported by ICmpZero uses is -1!");
+ ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP);
+ }
} else {
// Otherwise just expand the scaled register and an explicit scale,
// which is expected to be matched as part of the address.
// Flush the operand list to suppress SCEVExpander hoisting address modes.
- if (!Ops.empty() && LU.Kind == LSRUse::Address) {
+ // Unless the addressing mode will not be folded.
+ if (!Ops.empty() && LU.Kind == LSRUse::Address &&
+ isAMCompletelyFolded(TTI, LU, F)) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
- ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
- ScaledS = SE.getMulExpr(ScaledS,
- SE.getConstant(ScaledS->getType(), F.Scale));
+ ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP));
+ if (F.Scale != 1)
+ ScaledS =
+ SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
Ops.push_back(ScaledS);
}
}
@@ -4490,7 +4685,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
}
CI->setOperand(1, ICmpScaledV);
} else {
- assert(F.Scale == 0 &&
+ // A scale of 1 means that the scale has been expanded as part of the
+ // base regs.
+ assert((F.Scale == 0 || F.Scale == 1) &&
"ICmp does not support folding a global value and "
"a scale at the same time!");
Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
@@ -4531,7 +4728,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
Loop *PNLoop = LI.getLoopFor(Parent);
if (!PNLoop || Parent != PNLoop->getHeader()) {
// Split the critical edge.
- BasicBlock *NewBB = 0;
+ BasicBlock *NewBB = nullptr;
if (!Parent->isLandingPad()) {
NewBB = SplitCriticalEdge(BB, Parent, P,
/*MergeIdenticalEdges=*/true,
@@ -4560,7 +4757,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
}
std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
- Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
+ Inserted.insert(std::make_pair(BB, static_cast<Value *>(nullptr)));
if (!Pair.second)
PN->setIncomingValue(i, Pair.first->second);
else {
@@ -4670,7 +4867,7 @@ LSRInstance::LSRInstance(Loop *L, Pass *P)
DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()),
LI(P->getAnalysis<LoopInfo>()),
TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
- IVIncInsertPos(0) {
+ IVIncInsertPos(nullptr) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
return;
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index ecd350b..fc28fd2 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -12,7 +12,6 @@
// counts of loops easily.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-unroll"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopPass.h"
@@ -29,6 +28,8 @@
using namespace llvm;
+#define DEBUG_TYPE "loop-unroll"
+
static cl::opt<unsigned>
UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
cl::desc("The cut-off point for automatic loop unrolling"));
@@ -237,9 +238,12 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
}
uint64_t Size = (uint64_t)LoopSize*Count;
- if (TripCount != 1 && Size > Threshold) {
- DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
- << " because size: " << Size << ">" << Threshold << "\n");
+ if (TripCount != 1 &&
+ (Size > Threshold || (Count != TripCount && Size > PartialThreshold))) {
+ if (Size > Threshold)
+ DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
+ << " because size: " << Size << ">" << Threshold << "\n");
+
bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
if (!AllowPartial && !(Runtime && TripCount == 0)) {
DEBUG(dbgs() << " will not try to unroll partially because "
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 5954f4a..977c53a 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -26,7 +26,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-unswitch"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -53,6 +52,8 @@
#include <set>
using namespace llvm;
+#define DEBUG_TYPE "loop-unswitch"
+
STATISTIC(NumBranches, "Number of branches unswitched");
STATISTIC(NumSwitches, "Number of switches unswitched");
STATISTIC(NumSelects , "Number of selects unswitched");
@@ -96,7 +97,7 @@ namespace {
public:
LUAnalysisCache() :
- CurLoopInstructions(0), CurrentLoopProperties(0),
+ CurLoopInstructions(nullptr), CurrentLoopProperties(nullptr),
MaxSize(Threshold)
{}
@@ -151,8 +152,8 @@ namespace {
static char ID; // Pass ID, replacement for typeid
explicit LoopUnswitch(bool Os = false) :
LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
- currentLoop(0), DT(0), loopHeader(0),
- loopPreheader(0) {
+ currentLoop(nullptr), DT(nullptr), loopHeader(nullptr),
+ loopPreheader(nullptr) {
initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
}
@@ -180,15 +181,6 @@ namespace {
BranchesInfo.forgetLoop(currentLoop);
}
- /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
- /// remove it.
- void RemoveLoopFromWorklist(Loop *L) {
- std::vector<Loop*>::iterator I = std::find(LoopProcessWorklist.begin(),
- LoopProcessWorklist.end(), L);
- if (I != LoopProcessWorklist.end())
- LoopProcessWorklist.erase(I);
- }
-
void initLoopData() {
loopHeader = currentLoop->getHeader();
loopPreheader = currentLoop->getLoopPreheader();
@@ -212,9 +204,8 @@ namespace {
Instruction *InsertPt);
void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
- void RemoveLoopFromHierarchy(Loop *L);
- bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0,
- BasicBlock **LoopExit = 0);
+ bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = nullptr,
+ BasicBlock **LoopExit = nullptr);
};
}
@@ -283,8 +274,8 @@ void LUAnalysisCache::forgetLoop(const Loop *L) {
LoopsProperties.erase(LIt);
}
- CurrentLoopProperties = 0;
- CurLoopInstructions = 0;
+ CurrentLoopProperties = nullptr;
+ CurLoopInstructions = nullptr;
}
// Mark case value as unswitched.
@@ -355,10 +346,10 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
// We can never unswitch on vector conditions.
if (Cond->getType()->isVectorTy())
- return 0;
+ return nullptr;
// Constants should be folded, not unswitched on!
- if (isa<Constant>(Cond)) return 0;
+ if (isa<Constant>(Cond)) return nullptr;
// TODO: Handle: br (VARIANT|INVARIANT).
@@ -378,7 +369,7 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
return RHS;
}
- return 0;
+ return nullptr;
}
bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
@@ -389,7 +380,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
LPM = &LPM_Ref;
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : 0;
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
currentLoop = L;
Function *F = currentLoop->getHeader()->getParent();
bool Changed = false;
@@ -461,7 +452,7 @@ bool LoopUnswitch::processCurrentLoop() {
// Find a value to unswitch on:
// FIXME: this should chose the most expensive case!
// FIXME: scan for a case with a non-critical edge?
- Constant *UnswitchVal = 0;
+ Constant *UnswitchVal = nullptr;
// Do not process same value again and again.
// At this point we have some cases already unswitched and
@@ -518,7 +509,7 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
if (!L->contains(BB)) {
// Otherwise, this is a loop exit, this is fine so long as this is the
// first exit.
- if (ExitBB != 0) return false;
+ if (ExitBB) return false;
ExitBB = BB;
return true;
}
@@ -545,10 +536,10 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
std::set<BasicBlock*> Visited;
Visited.insert(L->getHeader()); // Branches to header make infinite loops.
- BasicBlock *ExitBB = 0;
+ BasicBlock *ExitBB = nullptr;
if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
return ExitBB;
- return 0;
+ return nullptr;
}
/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is
@@ -569,7 +560,7 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
TerminatorInst *HeaderTerm = Header->getTerminator();
LLVMContext &Context = Header->getContext();
- BasicBlock *LoopExitBB = 0;
+ BasicBlock *LoopExitBB = nullptr;
if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
// If the header block doesn't end with a conditional branch on Cond, we
// can't handle it.
@@ -639,8 +630,8 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
/// unswitch the loop, reprocess the pieces, then return true.
bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
Function *F = loopHeader->getParent();
- Constant *CondVal = 0;
- BasicBlock *ExitBlock = 0;
+ Constant *CondVal = nullptr;
+ BasicBlock *ExitBlock = nullptr;
if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
// If the condition is trivial, always unswitch. There is no code growth
@@ -948,17 +939,6 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
++NumSimplify;
}
-/// RemoveLoopFromHierarchy - We have discovered that the specified loop has
-/// become unwrapped, either because the backedge was deleted, or because the
-/// edge into the header was removed. If the edge into the header from the
-/// latch block was removed, the loop is unwrapped but subloops are still alive,
-/// so they just reparent loops. If the loops are actually dead, they will be
-/// removed later.
-void LoopUnswitch::RemoveLoopFromHierarchy(Loop *L) {
- LPM->deleteLoopFromQueue(L);
- RemoveLoopFromWorklist(L);
-}
-
// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has
// the value specified by Val in the specified loop, or we know it does NOT have
// that value. Rewrite any uses of LIC or of properties correlated to it.
@@ -1020,7 +1000,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
// If we know that LIC is not Val, use this info to simplify code.
SwitchInst *SI = dyn_cast<SwitchInst>(UI);
- if (SI == 0 || !isa<ConstantInt>(Val)) continue;
+ if (!SI || !isa<ConstantInt>(Val)) continue;
SwitchInst::CaseIt DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
// Default case is live for multiple values.
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index 7c0a623..4251ac4 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loweratomic"
#include "llvm/Transforms/Scalar.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -20,6 +19,8 @@
#include "llvm/Pass.h"
using namespace llvm;
+#define DEBUG_TYPE "loweratomic"
+
static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
IRBuilder<> Builder(CXI->getParent(), CXI);
Value *Ptr = CXI->getPointerOperand();
@@ -42,7 +43,7 @@ static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
Value *Val = RMWI->getValOperand();
LoadInst *Orig = Builder.CreateLoad(Ptr);
- Value *Res = NULL;
+ Value *Res = nullptr;
switch (RMWI->getOperation()) {
default: llvm_unreachable("Unexpected RMW operation");
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 2603c96..b6bc792 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "memcpyopt"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -33,6 +32,8 @@
#include <list>
using namespace llvm;
+#define DEBUG_TYPE "memcpyopt"
+
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
@@ -49,7 +50,7 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
int64_t Offset = 0;
for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
- if (OpC == 0)
+ if (!OpC)
return VariableIdxFound = true;
if (OpC->isZero()) continue; // No offset.
@@ -89,12 +90,12 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
// If one pointer is a GEP and the other isn't, then see if the GEP is a
// constant offset from the base, as in "P" and "gep P, 1".
- if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
+ if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD);
return !VariableIdxFound;
}
- if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
+ if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD);
return !VariableIdxFound;
}
@@ -317,9 +318,9 @@ namespace {
static char ID; // Pass identification, replacement for typeid
MemCpyOpt() : FunctionPass(ID) {
initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
- MD = 0;
- TLI = 0;
- DL = 0;
+ MD = nullptr;
+ TLI = nullptr;
+ DL = nullptr;
}
bool runOnFunction(Function &F) override;
@@ -373,7 +374,7 @@ INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
/// attempts to merge them together into a memcpy/memset.
Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
Value *StartPtr, Value *ByteVal) {
- if (DL == 0) return 0;
+ if (!DL) return nullptr;
// Okay, so we now have a single store that can be splatable. Scan to find
// all subsequent stores of the same value to offset from the same pointer.
@@ -426,7 +427,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// If we have no ranges, then we just had a single store with nothing that
// could be merged in. This is a very common case of course.
if (Ranges.empty())
- return 0;
+ return nullptr;
// If we had at least one store that could be merged in, add the starting
// store as well. We try to avoid this unless there is at least something
@@ -440,7 +441,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// Now that we have full information about ranges, loop over the ranges and
// emit memset's for anything big enough to be worthwhile.
- Instruction *AMemSet = 0;
+ Instruction *AMemSet = nullptr;
for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
I != E; ++I) {
const MemsetRange &Range = *I;
@@ -491,7 +492,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (!SI->isSimple()) return false;
- if (DL == 0) return false;
+ if (!DL) return false;
// Detect cases where we're performing call slot forwarding, but
// happen to be using a load-store pair to implement it, rather than
@@ -500,7 +501,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (LI->isSimple() && LI->hasOneUse() &&
LI->getParent() == SI->getParent()) {
MemDepResult ldep = MD->getDependency(LI);
- CallInst *C = 0;
+ CallInst *C = nullptr;
if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
C = dyn_cast<CallInst>(ldep.getInst());
@@ -512,7 +513,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
for (BasicBlock::iterator I = --BasicBlock::iterator(SI),
E = C; I != E; --I) {
if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
- C = 0;
+ C = nullptr;
break;
}
}
@@ -603,7 +604,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
return false;
// Check that all of src is copied to dest.
- if (DL == 0) return false;
+ if (!DL) return false;
ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
if (!srcArraySize)
@@ -846,7 +847,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
// The optimizations after this point require the memcpy size.
ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
- if (CopySize == 0) return false;
+ if (!CopySize) return false;
// The are three possible optimizations we can do for memcpy:
// a) memcpy-memcpy xform which exposes redundance for DSE.
@@ -929,7 +930,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
/// processByValArgument - This is called on every byval argument in call sites.
bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
- if (DL == 0) return false;
+ if (!DL) return false;
// Find out what feeds this byval argument.
Value *ByValArg = CS.getArgument(ArgNo);
@@ -946,13 +947,13 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
// a memcpy, see if we can byval from the source of the memcpy instead of the
// result.
MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
- if (MDep == 0 || MDep->isVolatile() ||
+ if (!MDep || MDep->isVolatile() ||
ByValArg->stripPointerCasts() != MDep->getDest())
return false;
// The length of the memcpy must be larger or equal to the size of the byval.
ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
- if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
+ if (!C1 || C1->getValue().getZExtValue() < ByValSize)
return false;
// Get the alignment of the byval. If the call doesn't specify the alignment,
@@ -1043,7 +1044,7 @@ bool MemCpyOpt::runOnFunction(Function &F) {
bool MadeChange = false;
MD = &getAnalysis<MemoryDependenceAnalysis>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
// If we don't have at least memset and memcpy, there is little point of doing
@@ -1058,6 +1059,6 @@ bool MemCpyOpt::runOnFunction(Function &F) {
MadeChange = true;
}
- MD = 0;
+ MD = nullptr;
return MadeChange;
}
diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 2f19935..7cce89e 100644
--- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "partially-inline-libcalls"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Intrinsics.h"
@@ -25,6 +24,8 @@
using namespace llvm;
+#define DEBUG_TYPE "partially-inline-libcalls"
+
namespace {
class PartiallyInlineLibCalls : public FunctionPass {
public:
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index b6b4d97..986d6a4 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -20,7 +20,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "reassociate"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
@@ -42,6 +41,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "reassociate"
+
STATISTIC(NumChanged, "Number of insts reassociated");
STATISTIC(NumAnnihil, "Number of expr tree annihilated");
STATISTIC(NumFactor , "Number of multiplies factored");
@@ -122,14 +123,14 @@ namespace {
public:
XorOpnd(Value *V);
- bool isInvalid() const { return SymbolicPart == 0; }
+ bool isInvalid() const { return SymbolicPart == nullptr; }
bool isOrExpr() const { return isOr; }
Value *getValue() const { return OrigVal; }
Value *getSymbolicPart() const { return SymbolicPart; }
unsigned getSymbolicRank() const { return SymbolicRank; }
const APInt &getConstPart() const { return ConstPart; }
- void Invalidate() { SymbolicPart = OrigVal = 0; }
+ void Invalidate() { SymbolicPart = OrigVal = nullptr; }
void setSymbolicRank(unsigned R) { SymbolicRank = R; }
// Sort the XorOpnd-Pointer in ascending order of symbolic-value-rank.
@@ -236,7 +237,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
if (V->hasOneUse() && isa<Instruction>(V) &&
cast<Instruction>(V)->getOpcode() == Opcode)
return cast<BinaryOperator>(V);
- return 0;
+ return nullptr;
}
static bool isUnmovableInstruction(Instruction *I) {
@@ -284,7 +285,7 @@ void Reassociate::BuildRankMap(Function &F) {
unsigned Reassociate::getRank(Value *V) {
Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0) {
+ if (!I) {
if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
return 0; // Otherwise it's a global or constant, rank 0.
}
@@ -705,7 +706,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
// ExpressionChanged - Non-null if the rewritten expression differs from the
// original in some non-trivial way, requiring the clearing of optional flags.
// Flags are cleared from the operator in ExpressionChanged up to I inclusive.
- BinaryOperator *ExpressionChanged = 0;
+ BinaryOperator *ExpressionChanged = nullptr;
for (unsigned i = 0; ; ++i) {
// The last operation (which comes earliest in the IR) is special as both
// operands will come from Ops, rather than just one with the other being
@@ -995,7 +996,7 @@ static Value *EmitAddTreeOfValues(Instruction *I,
/// remove Factor from the tree and return the new tree.
Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
- if (!BO) return 0;
+ if (!BO) return nullptr;
SmallVector<RepeatedValue, 8> Tree;
MadeChange |= LinearizeExprTree(BO, Tree);
@@ -1029,7 +1030,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
if (!FoundFactor) {
// Make sure to restore the operands to the expression tree.
RewriteExprTree(BO, Factors);
- return 0;
+ return nullptr;
}
BasicBlock::iterator InsertPt = BO; ++InsertPt;
@@ -1114,7 +1115,7 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
++NumAnnihil;
}
}
- return 0;
+ return nullptr;
}
/// Helper funciton of CombineXorOpnd(). It creates a bitwise-and
@@ -1135,7 +1136,7 @@ static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
}
return Opnd;
}
- return 0;
+ return nullptr;
}
// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
@@ -1261,7 +1262,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
return V;
if (Ops.size() == 1)
- return 0;
+ return nullptr;
SmallVector<XorOpnd, 8> Opnds;
SmallVector<XorOpnd*, 8> OpndPtrs;
@@ -1294,7 +1295,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
std::stable_sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor());
// Step 3: Combine adjacent operands
- XorOpnd *PrevOpnd = 0;
+ XorOpnd *PrevOpnd = nullptr;
bool Changed = false;
for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
XorOpnd *CurrOpnd = OpndPtrs[i];
@@ -1328,7 +1329,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
PrevOpnd = CurrOpnd;
} else {
CurrOpnd->Invalidate();
- PrevOpnd = 0;
+ PrevOpnd = nullptr;
}
Changed = true;
}
@@ -1358,7 +1359,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
}
}
- return 0;
+ return nullptr;
}
/// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This
@@ -1445,7 +1446,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
// Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
// where they are actually the same multiply.
unsigned MaxOcc = 0;
- Value *MaxOccVal = 0;
+ Value *MaxOccVal = nullptr;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul);
if (!BOp)
@@ -1543,7 +1544,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
}
- return 0;
+ return nullptr;
}
/// \brief Build up a vector of value/power pairs factoring a product.
@@ -1688,14 +1689,14 @@ Value *Reassociate::OptimizeMul(BinaryOperator *I,
// We can only optimize the multiplies when there is a chain of more than
// three, such that a balanced tree might require fewer total multiplies.
if (Ops.size() < 4)
- return 0;
+ return nullptr;
// Try to turn linear trees of multiplies without other uses of the
// intermediate stages into minimal multiply DAGs with perfect sub-expression
// re-use.
SmallVector<Factor, 4> Factors;
if (!collectMultiplyFactors(Ops, Factors))
- return 0; // All distinct factors, so nothing left for us to do.
+ return nullptr; // All distinct factors, so nothing left for us to do.
IRBuilder<> Builder(I);
Value *V = buildMinimalMultiplyDAG(Builder, Factors);
@@ -1704,14 +1705,14 @@ Value *Reassociate::OptimizeMul(BinaryOperator *I,
ValueEntry NewEntry = ValueEntry(getRank(V), V);
Ops.insert(std::lower_bound(Ops.begin(), Ops.end(), NewEntry), NewEntry);
- return 0;
+ return nullptr;
}
Value *Reassociate::OptimizeExpression(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops) {
// Now that we have the linearized expression tree, try to optimize it.
// Start by folding any constants that we found.
- Constant *Cst = 0;
+ Constant *Cst = nullptr;
unsigned Opcode = I->getOpcode();
while (!Ops.empty() && isa<Constant>(Ops.back().Op)) {
Constant *C = cast<Constant>(Ops.pop_back_val().Op);
@@ -1761,7 +1762,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
if (Ops.size() != NumOps)
return OptimizeExpression(I, Ops);
- return 0;
+ return nullptr;
}
/// EraseInst - Zap the given instruction, adding interesting operands to the
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index d9809ce..b6023e2 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "reg2mem"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/BasicBlock.h"
@@ -30,6 +29,8 @@
#include <list>
using namespace llvm;
+#define DEBUG_TYPE "reg2mem"
+
STATISTIC(NumRegsDemoted, "Number of registers demoted");
STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index b8f10e9..feeb231 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -17,7 +17,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sccp"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -42,6 +41,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "sccp"
+
STATISTIC(NumInstRemoved, "Number of instructions removed");
STATISTIC(NumDeadBlocks , "Number of basic blocks unreachable");
@@ -81,7 +82,7 @@ class LatticeVal {
}
public:
- LatticeVal() : Val(0, undefined) {}
+ LatticeVal() : Val(nullptr, undefined) {}
bool isUndefined() const { return getLatticeValue() == undefined; }
bool isConstant() const {
@@ -133,7 +134,7 @@ public:
ConstantInt *getConstantInt() const {
if (isConstant())
return dyn_cast<ConstantInt>(getConstant());
- return 0;
+ return nullptr;
}
void markForcedConstant(Constant *V) {
@@ -403,7 +404,7 @@ private:
if (Constant *C = dyn_cast<Constant>(V)) {
Constant *Elt = C->getAggregateElement(i);
- if (Elt == 0)
+ if (!Elt)
LV.markOverdefined(); // Unknown sort of constant.
else if (isa<UndefValue>(Elt))
; // Undef values remain undefined.
@@ -522,7 +523,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
LatticeVal BCValue = getValueState(BI->getCondition());
ConstantInt *CI = BCValue.getConstantInt();
- if (CI == 0) {
+ if (!CI) {
// Overdefined condition variables, and branches on unfoldable constant
// conditions, mean the branch could go either way.
if (!BCValue.isUndefined())
@@ -549,7 +550,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
LatticeVal SCValue = getValueState(SI->getCondition());
ConstantInt *CI = SCValue.getConstantInt();
- if (CI == 0) { // Overdefined or undefined condition?
+ if (!CI) { // Overdefined or undefined condition?
// All destinations are executable!
if (!SCValue.isUndefined())
Succs.assign(TI.getNumSuccessors(), true);
@@ -594,7 +595,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
// Overdefined condition variables mean the branch could go either way,
// undef conditions mean that neither edge is feasible yet.
ConstantInt *CI = BCValue.getConstantInt();
- if (CI == 0)
+ if (!CI)
return !BCValue.isUndefined();
// Constant condition variables mean the branch can only go a single way.
@@ -612,7 +613,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
LatticeVal SCValue = getValueState(SI->getCondition());
ConstantInt *CI = SCValue.getConstantInt();
- if (CI == 0)
+ if (!CI)
return !SCValue.isUndefined();
return SI->findCaseValue(CI).getCaseSuccessor() == To;
@@ -626,7 +627,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
#ifndef NDEBUG
dbgs() << "Unknown terminator instruction: " << *TI << '\n';
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
// visit Implementations - Something changed in this instruction, either an
@@ -667,7 +668,7 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
// constant. If they are constant and don't agree, the PHI is overdefined.
// If there are no executable operands, the PHI remains undefined.
//
- Constant *OperandVal = 0;
+ Constant *OperandVal = nullptr;
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
LatticeVal IV = getValueState(PN.getIncomingValue(i));
if (IV.isUndefined()) continue; // Doesn't influence PHI node.
@@ -678,7 +679,7 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
if (IV.isOverdefined()) // PHI node becomes overdefined!
return markOverdefined(&PN);
- if (OperandVal == 0) { // Grab the first value.
+ if (!OperandVal) { // Grab the first value.
OperandVal = IV.getConstant();
continue;
}
@@ -774,7 +775,7 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
StructType *STy = dyn_cast<StructType>(IVI.getType());
- if (STy == 0)
+ if (!STy)
return markOverdefined(&IVI);
// If this has more than one index, we can't handle it, drive all results to
@@ -862,7 +863,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
// If this is an AND or OR with 0 or -1, it doesn't matter that the other
// operand is overdefined.
if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Or) {
- LatticeVal *NonOverdefVal = 0;
+ LatticeVal *NonOverdefVal = nullptr;
if (!V1State.isOverdefined())
NonOverdefVal = &V1State;
else if (!V2State.isOverdefined())
@@ -1081,7 +1082,7 @@ void SCCPSolver::visitCallSite(CallSite CS) {
// The common case is that we aren't tracking the callee, either because we
// are not doing interprocedural analysis or the callee is indirect, or is
// external. Handle these cases first.
- if (F == 0 || F->isDeclaration()) {
+ if (!F || F->isDeclaration()) {
CallOverdefined:
// Void return and not tracking callee, just bail.
if (I->getType()->isVoidTy()) return;
@@ -1555,7 +1556,7 @@ bool SCCP::runOnFunction(Function &F) {
DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
const DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SCCPSolver Solver(DL, TLI);
@@ -1684,7 +1685,7 @@ static bool AddressIsTaken(const GlobalValue *GV) {
bool IPSCCP::runOnModule(Module &M) {
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SCCPSolver Solver(DL, TLI);
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index ed5e618..04bf4f8 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -23,7 +23,6 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sroa"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -64,6 +63,8 @@
using namespace llvm;
+#define DEBUG_TYPE "sroa"
+
STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");
@@ -159,8 +160,8 @@ public:
Use *getUse() const { return UseAndIsSplittable.getPointer(); }
- bool isDead() const { return getUse() == 0; }
- void kill() { UseAndIsSplittable.setPointer(0); }
+ bool isDead() const { return getUse() == nullptr; }
+ void kill() { UseAndIsSplittable.setPointer(nullptr); }
/// \brief Support for ordering ranges.
///
@@ -320,7 +321,7 @@ static Value *foldSelectInst(SelectInst &SI) {
if (SI.getOperand(1) == SI.getOperand(2))
return SI.getOperand(1);
- return 0;
+ return nullptr;
}
/// \brief Builder for the alloca slices.
@@ -642,7 +643,7 @@ private:
Uses.push_back(std::make_pair(I, cast<Instruction>(U)));
} while (!Uses.empty());
- return 0;
+ return nullptr;
}
void visitPHINode(PHINode &PN) {
@@ -724,7 +725,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
AI(AI),
#endif
- PointerEscapingInstr(0) {
+ PointerEscapingInstr(nullptr) {
SliceBuilder PB(DL, AI, *this);
SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
if (PtrI.isEscaped() || PtrI.isAborted()) {
@@ -873,7 +874,7 @@ public:
for (SmallVectorImpl<DbgValueInst *>::const_iterator I = DVIs.begin(),
E = DVIs.end(); I != E; ++I) {
DbgValueInst *DVI = *I;
- Value *Arg = 0;
+ Value *Arg = nullptr;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
@@ -969,7 +970,7 @@ class SROA : public FunctionPass {
public:
SROA(bool RequiresDomTree = true)
: FunctionPass(ID), RequiresDomTree(RequiresDomTree),
- C(0), DL(0), DT(0) {
+ C(nullptr), DL(nullptr), DT(nullptr) {
initializeSROAPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
@@ -1011,9 +1012,9 @@ INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates",
static Type *findCommonType(AllocaSlices::const_iterator B,
AllocaSlices::const_iterator E,
uint64_t EndOffset) {
- Type *Ty = 0;
+ Type *Ty = nullptr;
bool TyIsCommon = true;
- IntegerType *ITy = 0;
+ IntegerType *ITy = nullptr;
// Note that we need to look at *every* alloca slice's Use to ensure we
// always get consistent results regardless of the order of slices.
@@ -1024,7 +1025,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
continue;
- Type *UserTy = 0;
+ Type *UserTy = nullptr;
if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
UserTy = LI->getType();
} else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
@@ -1074,7 +1075,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
/// FIXME: This should be hoisted into a generic utility, likely in
/// Transforms/Util/Local.h
static bool isSafePHIToSpeculate(PHINode &PN,
- const DataLayout *DL = 0) {
+ const DataLayout *DL = nullptr) {
// For now, we can only do this promotion if the load is in the same block
// as the PHI, and if there are no stores between the phi and load.
// TODO: Allow recursive phi users.
@@ -1084,7 +1085,7 @@ static bool isSafePHIToSpeculate(PHINode &PN,
bool HaveLoad = false;
for (User *U : PN.users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
- if (LI == 0 || !LI->isSimple())
+ if (!LI || !LI->isSimple())
return false;
// For now we only allow loads in the same block as the PHI. This is
@@ -1191,7 +1192,8 @@ static void speculatePHINodeLoads(PHINode &PN) {
///
/// We can do this to a select if its only uses are loads and if the operand
/// to the select can be loaded unconditionally.
-static bool isSafeSelectToSpeculate(SelectInst &SI, const DataLayout *DL = 0) {
+static bool isSafeSelectToSpeculate(SelectInst &SI,
+ const DataLayout *DL = nullptr) {
Value *TValue = SI.getTrueValue();
Value *FValue = SI.getFalseValue();
bool TDerefable = TValue->isDereferenceablePointer();
@@ -1199,7 +1201,7 @@ static bool isSafeSelectToSpeculate(SelectInst &SI, const DataLayout *DL = 0) {
for (User *U : SI.users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
- if (LI == 0 || !LI->isSimple())
+ if (!LI || !LI->isSimple())
return false;
// Both operands to the select need to be dereferencable, either
@@ -1332,19 +1334,21 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
// We can't recurse through pointer types.
if (Ty->isPointerTy())
- return 0;
+ return nullptr;
// We try to analyze GEPs over vectors here, but note that these GEPs are
// extremely poorly defined currently. The long-term goal is to remove GEPing
// over a vector from the IR completely.
if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
unsigned ElementSizeInBits = DL.getTypeSizeInBits(VecTy->getScalarType());
- if (ElementSizeInBits % 8)
- return 0; // GEPs over non-multiple of 8 size vector elements are invalid.
+ if (ElementSizeInBits % 8 != 0) {
+ // GEPs over non-multiple of 8 size vector elements are invalid.
+ return nullptr;
+ }
APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
APInt NumSkippedElements = Offset.sdiv(ElementSize);
if (NumSkippedElements.ugt(VecTy->getNumElements()))
- return 0;
+ return nullptr;
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(),
@@ -1356,7 +1360,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy));
APInt NumSkippedElements = Offset.sdiv(ElementSize);
if (NumSkippedElements.ugt(ArrTy->getNumElements()))
- return 0;
+ return nullptr;
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
@@ -1366,17 +1370,17 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
StructType *STy = dyn_cast<StructType>(Ty);
if (!STy)
- return 0;
+ return nullptr;
const StructLayout *SL = DL.getStructLayout(STy);
uint64_t StructOffset = Offset.getZExtValue();
if (StructOffset >= SL->getSizeInBytes())
- return 0;
+ return nullptr;
unsigned Index = SL->getElementContainingOffset(StructOffset);
Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));
Type *ElementTy = STy->getElementType(Index);
if (Offset.uge(DL.getTypeAllocSize(ElementTy)))
- return 0; // The offset points into alignment padding.
+ return nullptr; // The offset points into alignment padding.
Indices.push_back(IRB.getInt32(Index));
return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
@@ -1402,14 +1406,14 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
// Don't consider any GEPs through an i8* as natural unless the TargetTy is
// an i8.
if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8))
- return 0;
+ return nullptr;
Type *ElementTy = Ty->getElementType();
if (!ElementTy->isSized())
- return 0; // We can't GEP through an unsized element.
+ return nullptr; // We can't GEP through an unsized element.
APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy));
if (ElementSize == 0)
- return 0; // Zero-length arrays can't help us build a natural GEP.
+ return nullptr; // Zero-length arrays can't help us build a natural GEP.
APInt NumSkippedElements = Offset.sdiv(ElementSize);
Offset -= NumSkippedElements * ElementSize;
@@ -1445,11 +1449,11 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
// We may end up computing an offset pointer that has the wrong type. If we
// never are able to compute one directly that has the correct type, we'll
// fall back to it, so keep it around here.
- Value *OffsetPtr = 0;
+ Value *OffsetPtr = nullptr;
// Remember any i8 pointer we come across to re-use if we need to do a raw
// byte offset.
- Value *Int8Ptr = 0;
+ Value *Int8Ptr = nullptr;
APInt Int8PtrOffset(Offset.getBitWidth(), 0);
Type *TargetTy = PointerTy->getPointerElementType();
@@ -2043,14 +2047,14 @@ public:
NewAllocaBeginOffset(NewAllocaBeginOffset),
NewAllocaEndOffset(NewAllocaEndOffset),
NewAllocaTy(NewAI.getAllocatedType()),
- VecTy(IsVectorPromotable ? cast<VectorType>(NewAllocaTy) : 0),
- ElementTy(VecTy ? VecTy->getElementType() : 0),
+ VecTy(IsVectorPromotable ? cast<VectorType>(NewAllocaTy) : nullptr),
+ ElementTy(VecTy ? VecTy->getElementType() : nullptr),
ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0),
IntTy(IsIntegerPromotable
? Type::getIntNTy(
NewAI.getContext(),
DL.getTypeSizeInBits(NewAI.getAllocatedType()))
- : 0),
+ : nullptr),
BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(),
OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers),
IRB(NewAI.getContext(), ConstantFolder()) {
@@ -2144,7 +2148,7 @@ private:
///
/// You can optionally pass a type to this routine and if that type's ABI
/// alignment is itself suitable, this will return zero.
- unsigned getSliceAlign(Type *Ty = 0) {
+ unsigned getSliceAlign(Type *Ty = nullptr) {
unsigned NewAIAlign = NewAI.getAlignment();
if (!NewAIAlign)
NewAIAlign = DL.getABITypeAlignment(NewAI.getAllocatedType());
@@ -2594,7 +2598,7 @@ private:
unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
unsigned NumElements = EndIndex - BeginIndex;
IntegerType *SubIntTy
- = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
+ = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : nullptr;
// Reset the other pointer type to match the register type we're going to
// use, but using the address space of the original other pointer.
@@ -2992,22 +2996,22 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
return stripAggregateTypeWrapping(DL, Ty);
if (Offset > DL.getTypeAllocSize(Ty) ||
(DL.getTypeAllocSize(Ty) - Offset) < Size)
- return 0;
+ return nullptr;
if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
// We can't partition pointers...
if (SeqTy->isPointerTy())
- return 0;
+ return nullptr;
Type *ElementTy = SeqTy->getElementType();
uint64_t ElementSize = DL.getTypeAllocSize(ElementTy);
uint64_t NumSkippedElements = Offset / ElementSize;
if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) {
if (NumSkippedElements >= ArrTy->getNumElements())
- return 0;
+ return nullptr;
} else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) {
if (NumSkippedElements >= VecTy->getNumElements())
- return 0;
+ return nullptr;
}
Offset -= NumSkippedElements * ElementSize;
@@ -3015,7 +3019,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
if (Offset > 0 || Size < ElementSize) {
// Bail if the partition ends in a different array element.
if ((Offset + Size) > ElementSize)
- return 0;
+ return nullptr;
// Recurse through the element type trying to peel off offset bytes.
return getTypePartition(DL, ElementTy, Offset, Size);
}
@@ -3026,20 +3030,20 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
assert(Size > ElementSize);
uint64_t NumElements = Size / ElementSize;
if (NumElements * ElementSize != Size)
- return 0;
+ return nullptr;
return ArrayType::get(ElementTy, NumElements);
}
StructType *STy = dyn_cast<StructType>(Ty);
if (!STy)
- return 0;
+ return nullptr;
const StructLayout *SL = DL.getStructLayout(STy);
if (Offset >= SL->getSizeInBytes())
- return 0;
+ return nullptr;
uint64_t EndOffset = Offset + Size;
if (EndOffset > SL->getSizeInBytes())
- return 0;
+ return nullptr;
unsigned Index = SL->getElementContainingOffset(Offset);
Offset -= SL->getElementOffset(Index);
@@ -3047,12 +3051,12 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
Type *ElementTy = STy->getElementType(Index);
uint64_t ElementSize = DL.getTypeAllocSize(ElementTy);
if (Offset >= ElementSize)
- return 0; // The offset points into alignment padding.
+ return nullptr; // The offset points into alignment padding.
// See if any partition must be contained by the element.
if (Offset > 0 || Size < ElementSize) {
if ((Offset + Size) > ElementSize)
- return 0;
+ return nullptr;
return getTypePartition(DL, ElementTy, Offset, Size);
}
assert(Offset == 0);
@@ -3065,14 +3069,14 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
if (EndOffset < SL->getSizeInBytes()) {
unsigned EndIndex = SL->getElementContainingOffset(EndOffset);
if (Index == EndIndex)
- return 0; // Within a single element and its padding.
+ return nullptr; // Within a single element and its padding.
// Don't try to form "natural" types if the elements don't line up with the
// expected size.
// FIXME: We could potentially recurse down through the last element in the
// sub-struct to find a natural end point.
if (SL->getElementOffset(EndIndex) != EndOffset)
- return 0;
+ return nullptr;
assert(Index < EndIndex);
EE = STy->element_begin() + EndIndex;
@@ -3083,7 +3087,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
STy->isPacked());
const StructLayout *SubSL = DL.getStructLayout(SubTy);
if (Size != SubSL->getSizeInBytes())
- return 0; // The sub-struct doesn't have quite the size needed.
+ return nullptr; // The sub-struct doesn't have quite the size needed.
return SubTy;
}
@@ -3108,7 +3112,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
// Try to compute a friendly type for this partition of the alloca. This
// won't always succeed, in which case we fall back to a legal integer type
// or an i8 array of an appropriate size.
- Type *SliceTy = 0;
+ Type *SliceTy = nullptr;
if (Type *CommonUseTy = findCommonType(B, E, EndOffset))
if (DL->getTypeAllocSize(CommonUseTy) >= SliceSize)
SliceTy = CommonUseTy;
@@ -3155,7 +3159,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
// the alloca's alignment unconstrained.
if (Alignment <= DL->getABITypeAlignment(SliceTy))
Alignment = 0;
- NewAI = new AllocaInst(SliceTy, 0, Alignment,
+ NewAI = new AllocaInst(SliceTy, nullptr, Alignment,
AI.getName() + ".sroa." + Twine(B - S.begin()), &AI);
++NumNewAllocas;
}
@@ -3494,7 +3498,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
for (Use &Operand : I->operands())
if (Instruction *U = dyn_cast<Instruction>(Operand)) {
// Zero out the operand and see if it becomes trivially dead.
- Operand = 0;
+ Operand = nullptr;
if (isInstructionTriviallyDead(U))
DeadInsts.insert(U);
}
@@ -3612,7 +3616,7 @@ bool SROA::runOnFunction(Function &F) {
DL = &DLP->getDataLayout();
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : 0;
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
BasicBlock &EntryBB = F.getEntryBlock();
for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp
index 20d6daa..8e557aa 100644
--- a/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/lib/Transforms/Scalar/SampleProfile.cpp
@@ -22,8 +22,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sample-profile"
-
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -54,6 +52,8 @@
using namespace llvm;
+#define DEBUG_TYPE "sample-profile"
+
// Command line option to specify the file to read samples from. This is
// mainly used for debugging.
static cl::opt<std::string> SampleProfileFile(
@@ -120,8 +120,8 @@ typedef DenseMap<BasicBlock *, SmallVector<BasicBlock *, 8>> BlockEdgeMap;
class SampleFunctionProfile {
public:
SampleFunctionProfile()
- : TotalSamples(0), TotalHeadSamples(0), HeaderLineno(0), DT(0), PDT(0),
- LI(0), Ctx(0) {}
+ : TotalSamples(0), TotalHeadSamples(0), HeaderLineno(0), DT(nullptr),
+ PDT(nullptr), LI(nullptr), Ctx(nullptr) {}
unsigned getFunctionLoc(Function &F);
bool emitAnnotations(Function &F, DominatorTree *DomTree,
@@ -315,7 +315,7 @@ protected:
/// \brief Name of the profile file to load.
StringRef Filename;
- /// \brief Flag indicating whether the profile input loaded succesfully.
+ /// \brief Flag indicating whether the profile input loaded successfully.
bool ProfileIsValid;
};
}
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index e950eba..f8f828c 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -64,6 +64,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeStructurizeCFGPass(Registry);
initializeSinkingPass(Registry);
initializeTailCallElimPass(Registry);
+ initializeSeparateConstOffsetFromGEPPass(Registry);
}
void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
@@ -181,6 +182,7 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
void LLVMAddVerifierPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createVerifierPass());
+ // FIXME: should this also add createDebugInfoVerifierPass()?
}
void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) {
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index e7b5ab2..58192fc 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -19,7 +19,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "scalarrepl"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
@@ -52,6 +51,8 @@
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
+#define DEBUG_TYPE "scalarrepl"
+
STATISTIC(NumReplaced, "Number of allocas broken up");
STATISTIC(NumPromoted, "Number of allocas promoted");
STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
@@ -304,7 +305,7 @@ public:
explicit ConvertToScalarInfo(unsigned Size, const DataLayout &DL,
unsigned SLT)
: AllocaSize(Size), DL(DL), ScalarLoadThreshold(SLT), IsNotTrivial(false),
- ScalarKind(Unknown), VectorTy(0), HadNonMemTransferAccess(false),
+ ScalarKind(Unknown), VectorTy(nullptr), HadNonMemTransferAccess(false),
HadDynamicAccess(false) { }
AllocaInst *TryConvert(AllocaInst *AI);
@@ -332,8 +333,8 @@ private:
AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// If we can't convert this scalar, or if mem2reg can trivially do it, bail
// out.
- if (!CanConvertToScalar(AI, 0, 0) || !IsNotTrivial)
- return 0;
+ if (!CanConvertToScalar(AI, 0, nullptr) || !IsNotTrivial)
+ return nullptr;
// If an alloca has only memset / memcpy uses, it may still have an Unknown
// ScalarKind. Treat it as an Integer below.
@@ -361,23 +362,24 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// Do not convert to scalar integer if the alloca size exceeds the
// scalar load threshold.
if (BitWidth > ScalarLoadThreshold)
- return 0;
+ return nullptr;
if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
!HadNonMemTransferAccess && !DL.fitsInLegalInteger(BitWidth))
- return 0;
+ return nullptr;
// Dynamic accesses on integers aren't yet supported. They need us to shift
// by a dynamic amount which could be difficult to work out as we might not
// know whether to use a left or right shift.
if (ScalarKind == Integer && HadDynamicAccess)
- return 0;
+ return nullptr;
DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
// Create and insert the integer alloca.
NewTy = IntegerType::get(AI->getContext(), BitWidth);
}
- AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
- ConvertUsesToScalar(AI, NewAI, 0, 0);
+ AllocaInst *NewAI = new AllocaInst(NewTy, nullptr, "",
+ AI->getParent()->begin());
+ ConvertUsesToScalar(AI, NewAI, 0, nullptr);
return NewAI;
}
@@ -508,7 +510,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
- Value *GEPNonConstantIdx = 0;
+ Value *GEPNonConstantIdx = nullptr;
if (!GEP->hasAllConstantIndices()) {
if (!isa<VectorType>(PtrTy->getElementType()))
return false;
@@ -564,7 +566,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
if (NonConstantIdx)
return false;
ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
- if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
+ if (!Len || Len->getZExtValue() != AllocaSize || Offset != 0)
return false;
IsNotTrivial = true; // Can't be mem2reg'd.
@@ -608,7 +610,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
- Value* GEPNonConstantIdx = 0;
+ Value* GEPNonConstantIdx = nullptr;
if (!GEP->hasAllConstantIndices()) {
assert(!NonConstantIdx &&
"Dynamic GEP reading from dynamic GEP unsupported");
@@ -671,7 +673,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(
ConstantInt::get(User->getContext(), APVal),
- Old, Offset, 0, Builder);
+ Old, Offset, nullptr, Builder);
Builder.CreateStore(New, NewAI);
// If the load we just inserted is now dead, then the memset overwrote
@@ -809,7 +811,7 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
Offset+Layout.getElementOffsetInBits(i),
- 0, Builder);
+ nullptr, Builder);
Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
@@ -822,7 +824,8 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
Value *Res = UndefValue::get(AT);
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
- Offset+i*EltSize, 0, Builder);
+ Offset+i*EltSize, nullptr,
+ Builder);
Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
@@ -938,7 +941,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
Value *Elt = Builder.CreateExtractValue(SV, i);
Old = ConvertScalar_InsertValue(Elt, Old,
Offset+Layout.getElementOffsetInBits(i),
- 0, Builder);
+ nullptr, Builder);
}
return Old;
}
@@ -949,7 +952,8 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType());
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = Builder.CreateExtractValue(SV, i);
- Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, 0, Builder);
+ Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, nullptr,
+ Builder);
}
return Old;
}
@@ -1024,7 +1028,7 @@ bool SROA::runOnFunction(Function &F) {
return false;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
bool Changed = performPromotion(F);
@@ -1054,7 +1058,7 @@ class AllocaPromoter : public LoadAndStorePromoter {
public:
AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
DIBuilder *DB)
- : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {}
+ : LoadAndStorePromoter(Insts, S), AI(nullptr), DIB(DB) {}
void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
// Remember which alloca we're promoting (for isInstInList).
@@ -1100,7 +1104,7 @@ public:
for (SmallVectorImpl<DbgValueInst *>::const_iterator I = DVIs.begin(),
E = DVIs.end(); I != E; ++I) {
DbgValueInst *DVI = *I;
- Value *Arg = NULL;
+ Value *Arg = nullptr;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
@@ -1143,7 +1147,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) {
for (User *U : SI->users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
- if (LI == 0 || !LI->isSimple()) return false;
+ if (!LI || !LI->isSimple()) return false;
// Both operands to the select need to be dereferencable, either absolutely
// (e.g. allocas) or at this point because we can see other accesses to it.
@@ -1183,7 +1187,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) {
unsigned MaxAlign = 0;
for (User *U : PN->users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
- if (LI == 0 || !LI->isSimple()) return false;
+ if (!LI || !LI->isSimple()) return false;
// For now we only allow loads in the same block as the PHI. This is a
// common case that happens when instcombine merges two loads through a PHI.
@@ -1380,7 +1384,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *Pred = PN->getIncomingBlock(i);
LoadInst *&Load = InsertedLoads[Pred];
- if (Load == 0) {
+ if (!Load) {
Load = new LoadInst(PN->getIncomingValue(i),
PN->getName() + "." + Pred->getName(),
Pred->getTerminator());
@@ -1400,7 +1404,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
bool SROA::performPromotion(Function &F) {
std::vector<AllocaInst*> Allocas;
- DominatorTree *DT = 0;
+ DominatorTree *DT = nullptr;
if (HasDomTree)
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -1537,7 +1541,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
ElementAllocas.reserve(ST->getNumContainedTypes());
for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
- AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
+ AllocaInst *NA = new AllocaInst(ST->getContainedType(i), nullptr,
AI->getAlignment(),
AI->getName() + "." + Twine(i), AI);
ElementAllocas.push_back(NA);
@@ -1548,7 +1552,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
ElementAllocas.reserve(AT->getNumElements());
Type *ElTy = AT->getElementType();
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
- AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
+ AllocaInst *NA = new AllocaInst(ElTy, nullptr, AI->getAlignment(),
AI->getName() + "." + Twine(i), AI);
ElementAllocas.push_back(NA);
WorkList.push_back(NA); // Add to worklist for recursive processing
@@ -1577,7 +1581,7 @@ void SROA::DeleteDeadInstructions() {
// Zero out the operand and see if it becomes trivially dead.
// (But, don't add allocas to the dead instruction list -- they are
// already on the worklist and will be deleted separately.)
- *OI = 0;
+ *OI = nullptr;
if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U))
DeadInsts.push_back(U);
}
@@ -1604,12 +1608,10 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
isSafeForScalarRepl(GEPI, GEPOffset, Info);
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
- if (Length == 0)
- return MarkUnsafe(Info, User);
- if (Length->isNegative())
+ if (!Length || Length->isNegative())
return MarkUnsafe(Info, User);
- isSafeMemAccess(Offset, Length->getZExtValue(), 0,
+ isSafeMemAccess(Offset, Length->getZExtValue(), nullptr,
U.getOperandNo() == 0, Info, MI,
true /*AllowWholeAccess*/);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
@@ -1744,12 +1746,12 @@ static bool isHomogeneousAggregate(Type *T, unsigned &NumElts,
Type *&EltTy) {
if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
NumElts = AT->getNumElements();
- EltTy = (NumElts == 0 ? 0 : AT->getElementType());
+ EltTy = (NumElts == 0 ? nullptr : AT->getElementType());
return true;
}
if (StructType *ST = dyn_cast<StructType>(T)) {
NumElts = ST->getNumContainedTypes();
- EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0));
+ EltTy = (NumElts == 0 ? nullptr : ST->getContainedType(0));
for (unsigned n = 1; n < NumElts; ++n) {
if (ST->getContainedType(n) != EltTy)
return false;
@@ -2038,7 +2040,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
// In this case, it must be the last GEP operand which is dynamic so keep that
// aside until we've found the constant GEP offset then add it back in at the
// end.
- Value* NonConstantIdx = 0;
+ Value* NonConstantIdx = nullptr;
if (!GEPI->hasAllConstantIndices())
NonConstantIdx = Indices.pop_back_val();
Offset += DL->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
@@ -2108,7 +2110,8 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
if (NewOffset) {
// Splice the first element and index 'NewOffset' bytes in. SROA will
// split the alloca again later.
- Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy());
+ unsigned AS = AI->getType()->getAddressSpace();
+ Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy(AS));
V = Builder.CreateGEP(V, Builder.getInt64(NewOffset));
IdxTy = NewElts[Idx]->getAllocatedType();
@@ -2155,7 +2158,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// appropriate type. The "Other" pointer is the pointer that goes to memory
// that doesn't have anything to do with the alloca that we are promoting. For
// memset, this Value* stays null.
- Value *OtherPtr = 0;
+ Value *OtherPtr = nullptr;
unsigned MemAlignment = MI->getAlignment();
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
if (Inst == MTI->getRawDest())
@@ -2207,7 +2210,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// If this is a memcpy/memmove, emit a GEP of the other element address.
- Value *OtherElt = 0;
+ Value *OtherElt = nullptr;
unsigned OtherEltAlign = MemAlignment;
if (OtherPtr) {
@@ -2449,7 +2452,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
// There are two forms here: AI could be an array or struct. Both cases
// have different ways to compute the element offset.
- const StructLayout *Layout = 0;
+ const StructLayout *Layout = nullptr;
uint64_t ArrayEltBitOffset = 0;
if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
Layout = DL->getStructLayout(EltSTy);
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index 006375c..7a73f11 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "scalarizer"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
@@ -25,6 +24,8 @@
using namespace llvm;
+#define DEBUG_TYPE "scalarizer"
+
namespace {
// Used to store the scattered form of a vector.
typedef SmallVector<Value *, 8> ValueVector;
@@ -48,7 +49,7 @@ public:
// insert them before BBI in BB. If Cache is nonnull, use it to cache
// the results.
Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
- ValueVector *cachePtr = 0);
+ ValueVector *cachePtr = nullptr);
// Return component I, creating a new Value for it if necessary.
Value *operator[](unsigned I);
@@ -101,7 +102,7 @@ struct BinarySplitter {
// Information about a load or store that we're scalarizing.
struct VectorLayout {
- VectorLayout() : VecTy(0), ElemTy(0), VecAlign(0), ElemSize(0) {}
+ VectorLayout() : VecTy(nullptr), ElemTy(nullptr), VecAlign(0), ElemSize(0) {}
// Return the alignment of element I.
uint64_t getElemAlign(unsigned I) {
@@ -186,9 +187,9 @@ Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
Ty = PtrTy->getElementType();
Size = Ty->getVectorNumElements();
if (!CachePtr)
- Tmp.resize(Size, 0);
+ Tmp.resize(Size, nullptr);
else if (CachePtr->empty())
- CachePtr->resize(Size, 0);
+ CachePtr->resize(Size, nullptr);
else
assert(Size == CachePtr->size() && "Inconsistent vector sizes");
}
@@ -241,7 +242,7 @@ bool Scalarizer::doInitialization(Module &M) {
bool Scalarizer::runOnFunction(Function &F) {
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
BasicBlock *BB = BBI;
for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
new file mode 100644
index 0000000..b8529e1
--- /dev/null
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -0,0 +1,623 @@
+//===-- SeparateConstOffsetFromGEP.cpp - ------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loop unrolling may create many similar GEPs for array accesses.
+// e.g., a 2-level loop
+//
+// float a[32][32]; // global variable
+//
+// for (int i = 0; i < 2; ++i) {
+// for (int j = 0; j < 2; ++j) {
+// ...
+// ... = a[x + i][y + j];
+// ...
+// }
+// }
+//
+// will probably be unrolled to:
+//
+// gep %a, 0, %x, %y; load
+// gep %a, 0, %x, %y + 1; load
+// gep %a, 0, %x + 1, %y; load
+// gep %a, 0, %x + 1, %y + 1; load
+//
+// LLVM's GVN does not use partial redundancy elimination yet, and is thus
+// unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
+// significant slowdown in targets with limited addressing modes. For instance,
+// because the PTX target does not support the reg+reg addressing mode, the
+// NVPTX backend emits PTX code that literally computes the pointer address of
+// each GEP, wasting tons of registers. It emits the following PTX for the
+// first load and similar PTX for other loads.
+//
+// mov.u32 %r1, %x;
+// mov.u32 %r2, %y;
+// mul.wide.u32 %rl2, %r1, 128;
+// mov.u64 %rl3, a;
+// add.s64 %rl4, %rl3, %rl2;
+// mul.wide.u32 %rl5, %r2, 4;
+// add.s64 %rl6, %rl4, %rl5;
+// ld.global.f32 %f1, [%rl6];
+//
+// To reduce the register pressure, the optimization implemented in this file
+// merges the common part of a group of GEPs, so we can compute each pointer
+// address by adding a simple offset to the common part, saving many registers.
+//
+// It works by splitting each GEP into a variadic base and a constant offset.
+// The variadic base can be computed once and reused by multiple GEPs, and the
+// constant offsets can be nicely folded into the reg+immediate addressing mode
+// (supported by most targets) without using any extra register.
+//
+// For instance, we transform the four GEPs and four loads in the above example
+// into:
+//
+// base = gep a, 0, x, y
+// load base
+// laod base + 1 * sizeof(float)
+// load base + 32 * sizeof(float)
+// load base + 33 * sizeof(float)
+//
+// Given the transformed IR, a backend that supports the reg+immediate
+// addressing mode can easily fold the pointer arithmetics into the loads. For
+// example, the NVPTX backend can easily fold the pointer arithmetics into the
+// ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
+//
+// mov.u32 %r1, %tid.x;
+// mov.u32 %r2, %tid.y;
+// mul.wide.u32 %rl2, %r1, 128;
+// mov.u64 %rl3, a;
+// add.s64 %rl4, %rl3, %rl2;
+// mul.wide.u32 %rl5, %r2, 4;
+// add.s64 %rl6, %rl4, %rl5;
+// ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX
+// ld.global.f32 %f2, [%rl6+4]; // much better
+// ld.global.f32 %f3, [%rl6+128]; // much better
+// ld.global.f32 %f4, [%rl6+132]; // much better
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
+ "disable-separate-const-offset-from-gep", cl::init(false),
+ cl::desc("Do not separate the constant offset from a GEP instruction"),
+ cl::Hidden);
+
+namespace {
+
+/// \brief A helper class for separating a constant offset from a GEP index.
+///
+/// In real programs, a GEP index may be more complicated than a simple addition
+/// of something and a constant integer which can be trivially splitted. For
+/// example, to split ((a << 3) | 5) + b, we need to search deeper for the
+/// constant offset, so that we can separate the index to (a << 3) + b and 5.
+///
+/// Therefore, this class looks into the expression that computes a given GEP
+/// index, and tries to find a constant integer that can be hoisted to the
+/// outermost level of the expression as an addition. Not every constant in an
+/// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a +
+/// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,
+/// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
+class ConstantOffsetExtractor {
+ public:
+ /// Extracts a constant offset from the given GEP index. It outputs the
+ /// numeric value of the extracted constant offset (0 if failed), and a
+ /// new index representing the remainder (equal to the original index minus
+ /// the constant offset).
+ /// \p Idx The given GEP index
+ /// \p NewIdx The new index to replace
+ /// \p DL The datalayout of the module
+ /// \p IP Calculating the new index requires new instructions. IP indicates
+ /// where to insert them (typically right before the GEP).
+ static int64_t Extract(Value *Idx, Value *&NewIdx, const DataLayout *DL,
+ Instruction *IP);
+ /// Looks for a constant offset without extracting it. The meaning of the
+ /// arguments and the return value are the same as Extract.
+ static int64_t Find(Value *Idx, const DataLayout *DL);
+
+ private:
+ ConstantOffsetExtractor(const DataLayout *Layout, Instruction *InsertionPt)
+ : DL(Layout), IP(InsertionPt) {}
+ /// Searches the expression that computes V for a constant offset. If the
+ /// searching is successful, update UserChain as a path from V to the constant
+ /// offset.
+ int64_t find(Value *V);
+ /// A helper function to look into both operands of a binary operator U.
+ /// \p IsSub Whether U is a sub operator. If so, we need to negate the
+ /// constant offset at some point.
+ int64_t findInEitherOperand(User *U, bool IsSub);
+ /// After finding the constant offset and how it is reached from the GEP
+ /// index, we build a new index which is a clone of the old one except the
+ /// constant offset is removed. For example, given (a + (b + 5)) and knowning
+ /// the constant offset is 5, this function returns (a + b).
+ ///
+ /// We cannot simply change the constant to zero because the expression that
+ /// computes the index or its intermediate result may be used by others.
+ Value *rebuildWithoutConstantOffset();
+ // A helper function for rebuildWithoutConstantOffset that rebuilds the direct
+ // user (U) of the constant offset (C).
+ Value *rebuildLeafWithoutConstantOffset(User *U, Value *C);
+ /// Returns a clone of U except the first occurrence of From with To.
+ Value *cloneAndReplace(User *U, Value *From, Value *To);
+
+ /// Returns true if LHS and RHS have no bits in common, i.e., LHS | RHS == 0.
+ bool NoCommonBits(Value *LHS, Value *RHS) const;
+ /// Computes which bits are known to be one or zero.
+ /// \p KnownOne Mask of all bits that are known to be one.
+ /// \p KnownZero Mask of all bits that are known to be zero.
+ void ComputeKnownBits(Value *V, APInt &KnownOne, APInt &KnownZero) const;
+ /// Finds the first use of Used in U. Returns -1 if not found.
+ static unsigned FindFirstUse(User *U, Value *Used);
+ /// Returns whether OPC (sext or zext) can be distributed to the operands of
+ /// BO. e.g., sext can be distributed to the operands of an "add nsw" because
+ /// sext (add nsw a, b) == add nsw (sext a), (sext b).
+ static bool Distributable(unsigned OPC, BinaryOperator *BO);
+
+ /// The path from the constant offset to the old GEP index. e.g., if the GEP
+ /// index is "a * b + (c + 5)". After running function find, UserChain[0] will
+ /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
+ /// UserChain[2] will be the entire expression "a * b + (c + 5)".
+ ///
+ /// This path helps rebuildWithoutConstantOffset rebuild the new GEP index.
+ SmallVector<User *, 8> UserChain;
+ /// The data layout of the module. Used in ComputeKnownBits.
+ const DataLayout *DL;
+ Instruction *IP; /// Insertion position of cloned instructions.
+};
+
+/// \brief A pass that tries to split every GEP in the function into a variadic
+/// base and a constant offset. It is a FunctionPass because searching for the
+/// constant offset may inspect other basic blocks.
+class SeparateConstOffsetFromGEP : public FunctionPass {
+ public:
+ static char ID;
+ SeparateConstOffsetFromGEP() : FunctionPass(ID) {
+ initializeSeparateConstOffsetFromGEPPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DataLayoutPass>();
+ AU.addRequired<TargetTransformInfo>();
+ }
+ bool runOnFunction(Function &F) override;
+
+ private:
+ /// Tries to split the given GEP into a variadic base and a constant offset,
+ /// and returns true if the splitting succeeds.
+ bool splitGEP(GetElementPtrInst *GEP);
+ /// Finds the constant offset within each index, and accumulates them. This
+ /// function only inspects the GEP without changing it. The output
+ /// NeedsExtraction indicates whether we can extract a non-zero constant
+ /// offset from any index.
+ int64_t accumulateByteOffset(GetElementPtrInst *GEP, const DataLayout *DL,
+ bool &NeedsExtraction);
+};
+} // anonymous namespace
+
+char SeparateConstOffsetFromGEP::ID = 0;
+INITIALIZE_PASS_BEGIN(
+ SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
+ "Split GEPs to a variadic base and a constant offset for better CSE", false,
+ false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(DataLayoutPass)
+INITIALIZE_PASS_END(
+ SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
+ "Split GEPs to a variadic base and a constant offset for better CSE", false,
+ false)
+
+FunctionPass *llvm::createSeparateConstOffsetFromGEPPass() {
+ return new SeparateConstOffsetFromGEP();
+}
+
+bool ConstantOffsetExtractor::Distributable(unsigned OPC, BinaryOperator *BO) {
+ assert(OPC == Instruction::SExt || OPC == Instruction::ZExt);
+
+ // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
+ // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
+ if (BO->getOpcode() == Instruction::Add ||
+ BO->getOpcode() == Instruction::Sub) {
+ return (OPC == Instruction::SExt && BO->hasNoSignedWrap()) ||
+ (OPC == Instruction::ZExt && BO->hasNoUnsignedWrap());
+ }
+
+ // sext/zext (and/or/xor A, B) == and/or/xor (sext/zext A), (sext/zext B)
+ // -instcombine also leverages this invariant to do the reverse
+ // transformation to reduce integer casts.
+ return BO->getOpcode() == Instruction::And ||
+ BO->getOpcode() == Instruction::Or ||
+ BO->getOpcode() == Instruction::Xor;
+}
+
+int64_t ConstantOffsetExtractor::findInEitherOperand(User *U, bool IsSub) {
+ assert(U->getNumOperands() == 2);
+ int64_t ConstantOffset = find(U->getOperand(0));
+ // If we found a constant offset in the left operand, stop and return that.
+ // This shortcut might cause us to miss opportunities of combining the
+ // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
+ // However, such cases are probably already handled by -instcombine,
+ // given this pass runs after the standard optimizations.
+ if (ConstantOffset != 0) return ConstantOffset;
+ ConstantOffset = find(U->getOperand(1));
+ // If U is a sub operator, negate the constant offset found in the right
+ // operand.
+ return IsSub ? -ConstantOffset : ConstantOffset;
+}
+
+int64_t ConstantOffsetExtractor::find(Value *V) {
+ // TODO(jingyue): We can even trace into integer/pointer casts, such as
+ // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
+ // integers because it gives good enough results for our benchmarks.
+ assert(V->getType()->isIntegerTy());
+
+ User *U = dyn_cast<User>(V);
+ // We cannot do much with Values that are not a User, such as BasicBlock and
+ // MDNode.
+ if (U == nullptr) return 0;
+
+ int64_t ConstantOffset = 0;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U)) {
+ // Hooray, we found it!
+ ConstantOffset = CI->getSExtValue();
+ } else if (Operator *O = dyn_cast<Operator>(U)) {
+ // The GEP index may be more complicated than a simple addition of a
+ // varaible and a constant. Therefore, we trace into subexpressions for more
+ // hoisting opportunities.
+ switch (O->getOpcode()) {
+ case Instruction::Add: {
+ ConstantOffset = findInEitherOperand(U, false);
+ break;
+ }
+ case Instruction::Sub: {
+ ConstantOffset = findInEitherOperand(U, true);
+ break;
+ }
+ case Instruction::Or: {
+ // If LHS and RHS don't have common bits, (LHS | RHS) is equivalent to
+ // (LHS + RHS).
+ if (NoCommonBits(U->getOperand(0), U->getOperand(1)))
+ ConstantOffset = findInEitherOperand(U, false);
+ break;
+ }
+ case Instruction::SExt:
+ case Instruction::ZExt: {
+ // We trace into sext/zext if the operator can be distributed to its
+ // operand. e.g., we can transform into "sext (add nsw a, 5)" and
+ // extract constant 5, because
+ // sext (add nsw a, 5) == add nsw (sext a), 5
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0))) {
+ if (Distributable(O->getOpcode(), BO))
+ ConstantOffset = find(U->getOperand(0));
+ }
+ break;
+ }
+ }
+ }
+ // If we found a non-zero constant offset, adds it to the path for future
+ // transformation (rebuildWithoutConstantOffset). Zero is a valid constant
+ // offset, but doesn't help this optimization.
+ if (ConstantOffset != 0)
+ UserChain.push_back(U);
+ return ConstantOffset;
+}
+
+unsigned ConstantOffsetExtractor::FindFirstUse(User *U, Value *Used) {
+ for (unsigned I = 0, E = U->getNumOperands(); I < E; ++I) {
+ if (U->getOperand(I) == Used)
+ return I;
+ }
+ return -1;
+}
+
+Value *ConstantOffsetExtractor::cloneAndReplace(User *U, Value *From,
+ Value *To) {
+ // Finds in U the first use of From. It is safe to ignore future occurrences
+ // of From, because findInEitherOperand similarly stops searching the right
+ // operand when the first operand has a non-zero constant offset.
+ unsigned OpNo = FindFirstUse(U, From);
+ assert(OpNo != (unsigned)-1 && "UserChain wasn't built correctly");
+
+ // ConstantOffsetExtractor::find only follows Operators (i.e., Instructions
+ // and ConstantExprs). Therefore, U is either an Instruction or a
+ // ConstantExpr.
+ if (Instruction *I = dyn_cast<Instruction>(U)) {
+ Instruction *Clone = I->clone();
+ Clone->setOperand(OpNo, To);
+ Clone->insertBefore(IP);
+ return Clone;
+ }
+ // cast<Constant>(To) is safe because a ConstantExpr only uses Constants.
+ return cast<ConstantExpr>(U)
+ ->getWithOperandReplaced(OpNo, cast<Constant>(To));
+}
+
+Value *ConstantOffsetExtractor::rebuildLeafWithoutConstantOffset(User *U,
+ Value *C) {
+ assert(U->getNumOperands() <= 2 &&
+ "We didn't trace into any operator with more than 2 operands");
+ // If U has only one operand which is the constant offset, removing the
+ // constant offset leaves U as a null value.
+ if (U->getNumOperands() == 1)
+ return Constant::getNullValue(U->getType());
+
+ // U->getNumOperands() == 2
+ unsigned OpNo = FindFirstUse(U, C); // U->getOperand(OpNo) == C
+ assert(OpNo < 2 && "UserChain wasn't built correctly");
+ Value *TheOther = U->getOperand(1 - OpNo); // The other operand of U
+ // If U = C - X, removing C makes U = -X; otherwise U will simply be X.
+ if (!isa<SubOperator>(U) || OpNo == 1)
+ return TheOther;
+ if (isa<ConstantExpr>(U))
+ return ConstantExpr::getNeg(cast<Constant>(TheOther));
+ return BinaryOperator::CreateNeg(TheOther, "", IP);
+}
+
+Value *ConstantOffsetExtractor::rebuildWithoutConstantOffset() {
+ assert(UserChain.size() > 0 && "you at least found a constant, right?");
+ // Start with the constant and go up through UserChain, each time building a
+ // clone of the subexpression but with the constant removed.
+ // e.g., to build a clone of (a + (b + (c + 5)) but with the 5 removed, we
+ // first c, then (b + c), and finally (a + (b + c)).
+ //
+ // Fast path: if the GEP index is a constant, simply returns 0.
+ if (UserChain.size() == 1)
+ return ConstantInt::get(UserChain[0]->getType(), 0);
+
+ Value *Remainder =
+ rebuildLeafWithoutConstantOffset(UserChain[1], UserChain[0]);
+ for (size_t I = 2; I < UserChain.size(); ++I)
+ Remainder = cloneAndReplace(UserChain[I], UserChain[I - 1], Remainder);
+ return Remainder;
+}
+
+int64_t ConstantOffsetExtractor::Extract(Value *Idx, Value *&NewIdx,
+ const DataLayout *DL,
+ Instruction *IP) {
+ ConstantOffsetExtractor Extractor(DL, IP);
+ // Find a non-zero constant offset first.
+ int64_t ConstantOffset = Extractor.find(Idx);
+ if (ConstantOffset == 0)
+ return 0;
+ // Then rebuild a new index with the constant removed.
+ NewIdx = Extractor.rebuildWithoutConstantOffset();
+ return ConstantOffset;
+}
+
+int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL) {
+ return ConstantOffsetExtractor(DL, nullptr).find(Idx);
+}
+
+void ConstantOffsetExtractor::ComputeKnownBits(Value *V, APInt &KnownOne,
+ APInt &KnownZero) const {
+ IntegerType *IT = cast<IntegerType>(V->getType());
+ KnownOne = APInt(IT->getBitWidth(), 0);
+ KnownZero = APInt(IT->getBitWidth(), 0);
+ llvm::computeKnownBits(V, KnownZero, KnownOne, DL, 0);
+}
+
+bool ConstantOffsetExtractor::NoCommonBits(Value *LHS, Value *RHS) const {
+ assert(LHS->getType() == RHS->getType() &&
+ "LHS and RHS should have the same type");
+ APInt LHSKnownOne, LHSKnownZero, RHSKnownOne, RHSKnownZero;
+ ComputeKnownBits(LHS, LHSKnownOne, LHSKnownZero);
+ ComputeKnownBits(RHS, RHSKnownOne, RHSKnownZero);
+ return (LHSKnownZero | RHSKnownZero).isAllOnesValue();
+}
+
+int64_t SeparateConstOffsetFromGEP::accumulateByteOffset(
+ GetElementPtrInst *GEP, const DataLayout *DL, bool &NeedsExtraction) {
+ NeedsExtraction = false;
+ int64_t AccumulativeByteOffset = 0;
+ gep_type_iterator GTI = gep_type_begin(*GEP);
+ for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
+ if (isa<SequentialType>(*GTI)) {
+ // Tries to extract a constant offset from this GEP index.
+ int64_t ConstantOffset =
+ ConstantOffsetExtractor::Find(GEP->getOperand(I), DL);
+ if (ConstantOffset != 0) {
+ NeedsExtraction = true;
+ // A GEP may have multiple indices. We accumulate the extracted
+ // constant offset to a byte offset, and later offset the remainder of
+ // the original GEP with this byte offset.
+ AccumulativeByteOffset +=
+ ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
+ }
+ }
+ }
+ return AccumulativeByteOffset;
+}
+
+bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
+ // Skip vector GEPs.
+ if (GEP->getType()->isVectorTy())
+ return false;
+
+ // The backend can already nicely handle the case where all indices are
+ // constant.
+ if (GEP->hasAllConstantIndices())
+ return false;
+
+ bool Changed = false;
+
+ // Shortcuts integer casts. Eliminating these explicit casts can make
+ // subsequent optimizations more obvious: ConstantOffsetExtractor needn't
+ // trace into these casts.
+ if (GEP->isInBounds()) {
+ // Doing this to inbounds GEPs is safe because their indices are guaranteed
+ // to be non-negative and in bounds.
+ gep_type_iterator GTI = gep_type_begin(*GEP);
+ for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
+ if (isa<SequentialType>(*GTI)) {
+ if (Operator *O = dyn_cast<Operator>(GEP->getOperand(I))) {
+ if (O->getOpcode() == Instruction::SExt ||
+ O->getOpcode() == Instruction::ZExt) {
+ GEP->setOperand(I, O->getOperand(0));
+ Changed = true;
+ }
+ }
+ }
+ }
+ }
+
+ const DataLayout *DL = &getAnalysis<DataLayoutPass>().getDataLayout();
+ bool NeedsExtraction;
+ int64_t AccumulativeByteOffset =
+ accumulateByteOffset(GEP, DL, NeedsExtraction);
+
+ if (!NeedsExtraction)
+ return Changed;
+ // Before really splitting the GEP, check whether the backend supports the
+ // addressing mode we are about to produce. If no, this splitting probably
+ // won't be beneficial.
+ TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
+ if (!TTI.isLegalAddressingMode(GEP->getType()->getElementType(),
+ /*BaseGV=*/nullptr, AccumulativeByteOffset,
+ /*HasBaseReg=*/true, /*Scale=*/0)) {
+ return Changed;
+ }
+
+ // Remove the constant offset in each GEP index. The resultant GEP computes
+ // the variadic base.
+ gep_type_iterator GTI = gep_type_begin(*GEP);
+ for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
+ if (isa<SequentialType>(*GTI)) {
+ Value *NewIdx = nullptr;
+ // Tries to extract a constant offset from this GEP index.
+ int64_t ConstantOffset =
+ ConstantOffsetExtractor::Extract(GEP->getOperand(I), NewIdx, DL, GEP);
+ if (ConstantOffset != 0) {
+ assert(NewIdx != nullptr &&
+ "ConstantOffset != 0 implies NewIdx is set");
+ GEP->setOperand(I, NewIdx);
+ // Clear the inbounds attribute because the new index may be off-bound.
+ // e.g.,
+ //
+ // b = add i64 a, 5
+ // addr = gep inbounds float* p, i64 b
+ //
+ // is transformed to:
+ //
+ // addr2 = gep float* p, i64 a
+ // addr = gep float* addr2, i64 5
+ //
+ // If a is -4, although the old index b is in bounds, the new index a is
+ // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
+ // inbounds keyword is not present, the offsets are added to the base
+ // address with silently-wrapping two's complement arithmetic".
+ // Therefore, the final code will be a semantically equivalent.
+ //
+ // TODO(jingyue): do some range analysis to keep as many inbounds as
+ // possible. GEPs with inbounds are more friendly to alias analysis.
+ GEP->setIsInBounds(false);
+ Changed = true;
+ }
+ }
+ }
+
+ // Offsets the base with the accumulative byte offset.
+ //
+ // %gep ; the base
+ // ... %gep ...
+ //
+ // => add the offset
+ //
+ // %gep2 ; clone of %gep
+ // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
+ // %gep ; will be removed
+ // ... %gep ...
+ //
+ // => replace all uses of %gep with %new.gep and remove %gep
+ //
+ // %gep2 ; clone of %gep
+ // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
+ // ... %new.gep ...
+ //
+ // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an
+ // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep):
+ // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the
+ // type of %gep.
+ //
+ // %gep2 ; clone of %gep
+ // %0 = bitcast %gep2 to i8*
+ // %uglygep = gep %0, <offset>
+ // %new.gep = bitcast %uglygep to <type of %gep>
+ // ... %new.gep ...
+ Instruction *NewGEP = GEP->clone();
+ NewGEP->insertBefore(GEP);
+
+ Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+ uint64_t ElementTypeSizeOfGEP =
+ DL->getTypeAllocSize(GEP->getType()->getElementType());
+ if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
+ // Very likely. As long as %gep is natually aligned, the byte offset we
+ // extracted should be a multiple of sizeof(*%gep).
+ // Per ANSI C standard, signed / unsigned = unsigned. Therefore, we
+ // cast ElementTypeSizeOfGEP to signed.
+ int64_t Index =
+ AccumulativeByteOffset / static_cast<int64_t>(ElementTypeSizeOfGEP);
+ NewGEP = GetElementPtrInst::Create(
+ NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP);
+ } else {
+ // Unlikely but possible. For example,
+ // #pragma pack(1)
+ // struct S {
+ // int a[3];
+ // int64 b[8];
+ // };
+ // #pragma pack()
+ //
+ // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After
+ // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is
+ // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of
+ // sizeof(int64).
+ //
+ // Emit an uglygep in this case.
+ Type *I8PtrTy = Type::getInt8PtrTy(GEP->getContext(),
+ GEP->getPointerAddressSpace());
+ NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP);
+ NewGEP = GetElementPtrInst::Create(
+ NewGEP, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true),
+ "uglygep", GEP);
+ if (GEP->getType() != I8PtrTy)
+ NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
+ }
+
+ GEP->replaceAllUsesWith(NewGEP);
+ GEP->eraseFromParent();
+
+ return true;
+}
+
+bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
+ if (DisableSeparateConstOffsetFromGEP)
+ return false;
+
+ bool Changed = false;
+ for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
+ for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ) {
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++)) {
+ Changed |= splitGEP(GEP);
+ }
+ // No need to split GEP ConstantExprs because all its indices are constant
+ // already.
+ }
+ }
+ return Changed;
+}
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index ceae5a7..5d5606b 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -21,7 +21,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "simplifycfg"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -38,6 +37,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "simplifycfg"
+
STATISTIC(NumSimpl, "Number of blocks simplified");
namespace {
@@ -71,7 +72,7 @@ FunctionPass *llvm::createCFGSimplificationPass() {
static bool mergeEmptyReturnBlocks(Function &F) {
bool Changed = false;
- BasicBlock *RetBlock = 0;
+ BasicBlock *RetBlock = nullptr;
// Scan all the blocks in the function, looking for empty return blocks.
for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) {
@@ -79,7 +80,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
// Only look at return blocks.
ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
- if (Ret == 0) continue;
+ if (!Ret) continue;
// Only look at the block if it is empty or the only other thing in it is a
// single PHI node that is the operand to the return.
@@ -98,7 +99,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
}
// If this is the first returning block, remember it and keep going.
- if (RetBlock == 0) {
+ if (!RetBlock) {
RetBlock = &BB;
continue;
}
@@ -119,7 +120,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
// If the canonical return block has no PHI node, create one now.
PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin());
- if (RetBlockPHI == 0) {
+ if (!RetBlockPHI) {
Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0);
pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock);
RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(),
@@ -173,7 +174,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
bool EverChanged = removeUnreachableBlocks(F);
EverChanged |= mergeEmptyReturnBlocks(F);
EverChanged |= iterativelySimplifyCFG(F, TTI, DL);
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index 4107374..482c33a 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sink"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -25,6 +24,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "sink"
+
STATISTIC(NumSunk, "Number of instructions sunk");
STATISTIC(NumSinkIter, "Number of sinking iterations");
@@ -203,7 +204,7 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst,
// Don't sink instructions into a loop.
Loop *succ = LI->getLoopFor(SuccToSinkTo);
Loop *cur = LI->getLoopFor(Inst->getParent());
- if (succ != 0 && succ != cur)
+ if (succ != nullptr && succ != cur)
return false;
}
@@ -237,14 +238,14 @@ bool Sinking::SinkInstruction(Instruction *Inst,
// SuccToSinkTo - This is the successor to sink this instruction to, once we
// decide.
- BasicBlock *SuccToSinkTo = 0;
+ BasicBlock *SuccToSinkTo = nullptr;
// Instructions can only be sunk if all their uses are in blocks
// dominated by one of the successors.
// Look at all the postdominators and see if we can sink it in one.
DomTreeNode *DTN = DT->getNode(Inst->getParent());
for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end();
- I != E && SuccToSinkTo == 0; ++I) {
+ I != E && SuccToSinkTo == nullptr; ++I) {
BasicBlock *Candidate = (*I)->getBlock();
if ((*I)->getIDom()->getBlock() == Inst->getParent() &&
IsAcceptableTarget(Inst, Candidate))
@@ -254,13 +255,13 @@ bool Sinking::SinkInstruction(Instruction *Inst,
// If no suitable postdominator was found, look at all the successors and
// decide which one we should sink to, if any.
for (succ_iterator I = succ_begin(Inst->getParent()),
- E = succ_end(Inst->getParent()); I != E && SuccToSinkTo == 0; ++I) {
+ E = succ_end(Inst->getParent()); I != E && !SuccToSinkTo; ++I) {
if (IsAcceptableTarget(Inst, *I))
SuccToSinkTo = *I;
}
// If we couldn't find a block to sink to, ignore this instruction.
- if (SuccToSinkTo == 0)
+ if (!SuccToSinkTo)
return false;
DEBUG(dbgs() << "Sink" << *Inst << " (";
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 8fd2268..7b77ae1 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "structurizecfg"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SCCIterator.h"
@@ -21,6 +20,8 @@
using namespace llvm;
using namespace llvm::PatternMatch;
+#define DEBUG_TYPE "structurizecfg"
+
namespace {
// Definition of the complex types used in this pass.
@@ -64,14 +65,14 @@ public:
/// \brief Start a new query
NearestCommonDominator(DominatorTree *DomTree) {
DT = DomTree;
- Result = 0;
+ Result = nullptr;
}
/// \brief Add BB to the resulting dominator
void addBlock(BasicBlock *BB, bool Remember = true) {
DomTreeNode *Node = DT->getNode(BB);
- if (Result == 0) {
+ if (!Result) {
unsigned Numbering = 0;
for (;Node;Node = Node->getIDom())
IndexMap[Node] = ++Numbering;
@@ -279,7 +280,7 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
void StructurizeCFG::orderNodes() {
scc_iterator<Region *> I = scc_begin(ParentRegion);
for (Order.clear(); !I.isAtEnd(); ++I) {
- std::vector<RegionNode *> &Nodes = *I;
+ const std::vector<RegionNode *> &Nodes = *I;
Order.append(Nodes.begin(), Nodes.end());
}
}
@@ -453,10 +454,7 @@ void StructurizeCFG::insertConditions(bool Loops) {
Value *Default = Loops ? BoolTrue : BoolFalse;
SSAUpdater PhiInserter;
- for (BranchVector::iterator I = Conds.begin(),
- E = Conds.end(); I != E; ++I) {
-
- BranchInst *Term = *I;
+ for (BranchInst *Term : Conds) {
assert(Term->isConditional());
BasicBlock *Parent = Term->getParent();
@@ -472,7 +470,7 @@ void StructurizeCFG::insertConditions(bool Loops) {
NearestCommonDominator Dominator(DT);
Dominator.addBlock(Parent, false);
- Value *ParentValue = 0;
+ Value *ParentValue = nullptr;
for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
PI != PE; ++PI) {
@@ -591,7 +589,7 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
if (Node->isSubRegion()) {
Region *SubRegion = Node->getNodeAs<Region>();
BasicBlock *OldExit = SubRegion->getExit();
- BasicBlock *Dominator = 0;
+ BasicBlock *Dominator = nullptr;
// Find all the edges from the sub region to the exit
for (pred_iterator I = pred_begin(OldExit), E = pred_end(OldExit);
@@ -678,7 +676,8 @@ BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
/// \brief Set the previous node
void StructurizeCFG::setPrevNode(BasicBlock *BB) {
- PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) : 0;
+ PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
+ : nullptr;
}
/// \brief Does BB dominate all the predicates of Node ?
@@ -699,7 +698,7 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
bool Dominated = false;
// Regionentry is always true
- if (PrevNode == 0)
+ if (!PrevNode)
return true;
for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
@@ -806,11 +805,11 @@ void StructurizeCFG::createFlow() {
Conditions.clear();
LoopConds.clear();
- PrevNode = 0;
+ PrevNode = nullptr;
Visited.clear();
while (!Order.empty()) {
- handleLoops(EntryDominatesExit, 0);
+ handleLoops(EntryDominatesExit, nullptr);
}
if (PrevNode)
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 6d02777..05b9892 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -50,12 +50,12 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "tailcallelim"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
@@ -64,6 +64,7 @@
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -76,6 +77,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "tailcallelim"
+
STATISTIC(NumEliminated, "Number of tail calls removed");
STATISTIC(NumRetDuped, "Number of return duplicated");
STATISTIC(NumAccumAdded, "Number of accumulators introduced");
@@ -94,6 +97,9 @@ namespace {
bool runOnFunction(Function &F) override;
private:
+ bool runTRE(Function &F);
+ bool markTails(Function &F, bool &AllCallsAreTailCalls);
+
CallInst *FindTRECandidate(Instruction *I,
bool CannotTailCallElimCallsMarkedTail);
bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
@@ -131,55 +137,255 @@ void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetTransformInfo>();
}
-/// CanTRE - Scan the specified basic block for alloca instructions.
-/// If it contains any that are variable-sized or not in the entry block,
-/// returns false.
-static bool CanTRE(AllocaInst *AI) {
- // Because of PR962, we don't TRE allocas outside the entry block.
-
- // If this alloca is in the body of the function, or if it is a variable
- // sized allocation, we cannot tail call eliminate calls marked 'tail'
- // with this mechanism.
- BasicBlock *BB = AI->getParent();
- return BB == &BB->getParent()->getEntryBlock() &&
- isa<ConstantInt>(AI->getArraySize());
+/// \brief Scan the specified function for alloca instructions.
+/// If it contains any dynamic allocas, returns false.
+static bool CanTRE(Function &F) {
+ // Because of PR962, we don't TRE dynamic allocas.
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
+ if (!AI->isStaticAlloca())
+ return false;
+ }
+ }
+ }
+
+ return true;
}
-namespace {
-struct AllocaCaptureTracker : public CaptureTracker {
- AllocaCaptureTracker() : Captured(false) {}
+bool TailCallElim::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
- void tooManyUses() override { Captured = true; }
+ bool AllCallsAreTailCalls = false;
+ bool Modified = markTails(F, AllCallsAreTailCalls);
+ if (AllCallsAreTailCalls)
+ Modified |= runTRE(F);
+ return Modified;
+}
- bool shouldExplore(const Use *U) override {
- Value *V = U->getUser();
- if (isa<CallInst>(V) || isa<InvokeInst>(V))
- UsesAlloca.insert(V);
- return true;
+namespace {
+struct AllocaDerivedValueTracker {
+ // Start at a root value and walk its use-def chain to mark calls that use the
+ // value or a derived value in AllocaUsers, and places where it may escape in
+ // EscapePoints.
+ void walk(Value *Root) {
+ SmallVector<Use *, 32> Worklist;
+ SmallPtrSet<Use *, 32> Visited;
+
+ auto AddUsesToWorklist = [&](Value *V) {
+ for (auto &U : V->uses()) {
+ if (!Visited.insert(&U))
+ continue;
+ Worklist.push_back(&U);
+ }
+ };
+
+ AddUsesToWorklist(Root);
+
+ while (!Worklist.empty()) {
+ Use *U = Worklist.pop_back_val();
+ Instruction *I = cast<Instruction>(U->getUser());
+
+ switch (I->getOpcode()) {
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS(I);
+ bool IsNocapture = !CS.isCallee(U) &&
+ CS.doesNotCapture(CS.getArgumentNo(U));
+ callUsesLocalStack(CS, IsNocapture);
+ if (IsNocapture) {
+ // If the alloca-derived argument is passed in as nocapture, then it
+ // can't propagate to the call's return. That would be capturing.
+ continue;
+ }
+ break;
+ }
+ case Instruction::Load: {
+ // The result of a load is not alloca-derived (unless an alloca has
+ // otherwise escaped, but this is a local analysis).
+ continue;
+ }
+ case Instruction::Store: {
+ if (U->getOperandNo() == 0)
+ EscapePoints.insert(I);
+ continue; // Stores have no users to analyze.
+ }
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::PHI:
+ case Instruction::Select:
+ case Instruction::AddrSpaceCast:
+ break;
+ default:
+ EscapePoints.insert(I);
+ break;
+ }
+
+ AddUsesToWorklist(I);
+ }
}
- bool captured(const Use *U) override {
- if (isa<ReturnInst>(U->getUser()))
- return false;
- Captured = true;
- return true;
+ void callUsesLocalStack(CallSite CS, bool IsNocapture) {
+ // Add it to the list of alloca users. If it's already there, skip further
+ // processing.
+ if (!AllocaUsers.insert(CS.getInstruction()))
+ return;
+
+ // If it's nocapture then it can't capture the alloca.
+ if (IsNocapture)
+ return;
+
+ // If it can write to memory, it can leak the alloca value.
+ if (!CS.onlyReadsMemory())
+ EscapePoints.insert(CS.getInstruction());
}
- bool Captured;
- SmallPtrSet<const Value *, 16> UsesAlloca;
+ SmallPtrSet<Instruction *, 32> AllocaUsers;
+ SmallPtrSet<Instruction *, 32> EscapePoints;
};
-} // end anonymous namespace
+}
-bool TailCallElim::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
+bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
+ if (F.callsFunctionThatReturnsTwice())
return false;
+ AllCallsAreTailCalls = true;
+
+ // The local stack holds all alloca instructions and all byval arguments.
+ AllocaDerivedValueTracker Tracker;
+ for (Argument &Arg : F.args()) {
+ if (Arg.hasByValAttr())
+ Tracker.walk(&Arg);
+ }
+ for (auto &BB : F) {
+ for (auto &I : BB)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
+ Tracker.walk(AI);
+ }
+ bool Modified = false;
+
+ // Track whether a block is reachable after an alloca has escaped. Blocks that
+ // contain the escaping instruction will be marked as being visited without an
+ // escaped alloca, since that is how the block began.
+ enum VisitType {
+ UNVISITED,
+ UNESCAPED,
+ ESCAPED
+ };
+ DenseMap<BasicBlock *, VisitType> Visited;
+
+ // We propagate the fact that an alloca has escaped from block to successor.
+ // Visit the blocks that are propagating the escapedness first. To do this, we
+ // maintain two worklists.
+ SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped;
+
+ // We may enter a block and visit it thinking that no alloca has escaped yet,
+ // then see an escape point and go back around a loop edge and come back to
+ // the same block twice. Because of this, we defer setting tail on calls when
+ // we first encounter them in a block. Every entry in this list does not
+ // statically use an alloca via use-def chain analysis, but may find an alloca
+ // through other means if the block turns out to be reachable after an escape
+ // point.
+ SmallVector<CallInst *, 32> DeferredTails;
+
+ BasicBlock *BB = &F.getEntryBlock();
+ VisitType Escaped = UNESCAPED;
+ do {
+ for (auto &I : *BB) {
+ if (Tracker.EscapePoints.count(&I))
+ Escaped = ESCAPED;
+
+ CallInst *CI = dyn_cast<CallInst>(&I);
+ if (!CI || CI->isTailCall())
+ continue;
+
+ if (CI->doesNotAccessMemory()) {
+ // A call to a readnone function whose arguments are all things computed
+ // outside this function can be marked tail. Even if you stored the
+ // alloca address into a global, a readnone function can't load the
+ // global anyhow.
+ //
+ // Note that this runs whether we know an alloca has escaped or not. If
+ // it has, then we can't trust Tracker.AllocaUsers to be accurate.
+ bool SafeToTail = true;
+ for (auto &Arg : CI->arg_operands()) {
+ if (isa<Constant>(Arg.getUser()))
+ continue;
+ if (Argument *A = dyn_cast<Argument>(Arg.getUser()))
+ if (!A->hasByValAttr())
+ continue;
+ SafeToTail = false;
+ break;
+ }
+ if (SafeToTail) {
+ emitOptimizationRemark(
+ F.getContext(), "tailcallelim", F, CI->getDebugLoc(),
+ "marked this readnone call a tail call candidate");
+ CI->setTailCall();
+ Modified = true;
+ continue;
+ }
+ }
+
+ if (Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
+ DeferredTails.push_back(CI);
+ } else {
+ AllCallsAreTailCalls = false;
+ }
+ }
+
+ for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) {
+ auto &State = Visited[SuccBB];
+ if (State < Escaped) {
+ State = Escaped;
+ if (State == ESCAPED)
+ WorklistEscaped.push_back(SuccBB);
+ else
+ WorklistUnescaped.push_back(SuccBB);
+ }
+ }
+
+ if (!WorklistEscaped.empty()) {
+ BB = WorklistEscaped.pop_back_val();
+ Escaped = ESCAPED;
+ } else {
+ BB = nullptr;
+ while (!WorklistUnescaped.empty()) {
+ auto *NextBB = WorklistUnescaped.pop_back_val();
+ if (Visited[NextBB] == UNESCAPED) {
+ BB = NextBB;
+ Escaped = UNESCAPED;
+ break;
+ }
+ }
+ }
+ } while (BB);
+
+ for (CallInst *CI : DeferredTails) {
+ if (Visited[CI->getParent()] != ESCAPED) {
+ // If the escape point was part way through the block, calls after the
+ // escape point wouldn't have been put into DeferredTails.
+ emitOptimizationRemark(F.getContext(), "tailcallelim", F,
+ CI->getDebugLoc(),
+ "marked this call a tail call candidate");
+ CI->setTailCall();
+ Modified = true;
+ } else {
+ AllCallsAreTailCalls = false;
+ }
+ }
+
+ return Modified;
+}
+
+bool TailCallElim::runTRE(Function &F) {
// If this function is a varargs function, we won't be able to PHI the args
// right, so don't even try to convert it...
if (F.getFunctionType()->isVarArg()) return false;
TTI = &getAnalysis<TargetTransformInfo>();
- BasicBlock *OldEntry = 0;
+ BasicBlock *OldEntry = nullptr;
bool TailCallsAreMarkedTail = false;
SmallVector<PHINode*, 8> ArgumentPHIs;
bool MadeChange = false;
@@ -188,39 +394,23 @@ bool TailCallElim::runOnFunction(Function &F) {
// marked with the 'tail' attribute, because doing so would cause the stack
// size to increase (real TRE would deallocate variable sized allocas, TRE
// doesn't).
- bool CanTRETailMarkedCall = true;
-
- // Find calls that can be marked tail.
- AllocaCaptureTracker ACT;
- for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) {
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
- CanTRETailMarkedCall &= CanTRE(AI);
- PointerMayBeCaptured(AI, &ACT);
- // If any allocas are captured, exit.
- if (ACT.Captured)
- return false;
- }
- }
- }
+ bool CanTRETailMarkedCall = CanTRE(F);
- // Second pass, change any tail recursive calls to loops.
+ // Change any tail recursive calls to loops.
//
// FIXME: The code generator produces really bad code when an 'escaping
// alloca' is changed from being a static alloca to being a dynamic alloca.
// Until this is resolved, disable this transformation if that would ever
// happen. This bug is PR962.
- if (ACT.UsesAlloca.empty()) {
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
- bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
- ArgumentPHIs, !CanTRETailMarkedCall);
- if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
- Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
- TailCallsAreMarkedTail, ArgumentPHIs,
- !CanTRETailMarkedCall);
- MadeChange |= Change;
- }
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
+ ArgumentPHIs, !CanTRETailMarkedCall);
+ if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
+ Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
+ TailCallsAreMarkedTail, ArgumentPHIs,
+ !CanTRETailMarkedCall);
+ MadeChange |= Change;
}
}
@@ -229,34 +419,13 @@ bool TailCallElim::runOnFunction(Function &F) {
// with themselves. Check to see if we did and clean up our mess if so. This
// occurs when a function passes an argument straight through to its tail
// call.
- if (!ArgumentPHIs.empty()) {
- for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
- PHINode *PN = ArgumentPHIs[i];
-
- // If the PHI Node is a dynamic constant, replace it with the value it is.
- if (Value *PNV = SimplifyInstruction(PN)) {
- PN->replaceAllUsesWith(PNV);
- PN->eraseFromParent();
- }
- }
- }
+ for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
+ PHINode *PN = ArgumentPHIs[i];
- // At this point, we know that the function does not have any captured
- // allocas. If additionally the function does not call setjmp, mark all calls
- // in the function that do not access stack memory with the tail keyword. This
- // implies ensuring that there does not exist any path from a call that takes
- // in an alloca but does not capture it and the call which we wish to mark
- // with "tail".
- if (!F.callsFunctionThatReturnsTwice()) {
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- if (!ACT.UsesAlloca.count(CI)) {
- CI->setTailCall();
- MadeChange = true;
- }
- }
- }
+ // If the PHI Node is a dynamic constant, replace it with the value it is.
+ if (Value *PNV = SimplifyInstruction(PN)) {
+ PN->replaceAllUsesWith(PNV);
+ PN->eraseFromParent();
}
}
@@ -343,11 +512,11 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
//
static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
Function *F = CI->getParent()->getParent();
- Value *ReturnedValue = 0;
+ Value *ReturnedValue = nullptr;
for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) {
ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator());
- if (RI == 0 || RI == IgnoreRI) continue;
+ if (RI == nullptr || RI == IgnoreRI) continue;
// We can only perform this transformation if the value returned is
// evaluatable at the start of the initial invocation of the function,
@@ -355,10 +524,10 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
//
Value *RetOp = RI->getOperand(0);
if (!isDynamicConstant(RetOp, CI, RI))
- return 0;
+ return nullptr;
if (ReturnedValue && RetOp != ReturnedValue)
- return 0; // Cannot transform if differing values are returned.
+ return nullptr; // Cannot transform if differing values are returned.
ReturnedValue = RetOp;
}
return ReturnedValue;
@@ -370,18 +539,18 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
///
Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
CallInst *CI) {
- if (!I->isAssociative() || !I->isCommutative()) return 0;
+ if (!I->isAssociative() || !I->isCommutative()) return nullptr;
assert(I->getNumOperands() == 2 &&
"Associative/commutative operations should have 2 args!");
// Exactly one operand should be the result of the call instruction.
if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
(I->getOperand(0) != CI && I->getOperand(1) != CI))
- return 0;
+ return nullptr;
// The only user of this instruction we allow is a single return instruction.
if (!I->hasOneUse() || !isa<ReturnInst>(I->user_back()))
- return 0;
+ return nullptr;
// Ok, now we have to check all of the other return instructions in this
// function. If they return non-constants or differing values, then we cannot
@@ -402,11 +571,11 @@ TailCallElim::FindTRECandidate(Instruction *TI,
Function *F = BB->getParent();
if (&BB->front() == TI) // Make sure there is something before the terminator.
- return 0;
+ return nullptr;
// Scan backwards from the return, checking to see if there is a tail call in
// this block. If so, set CI to it.
- CallInst *CI = 0;
+ CallInst *CI = nullptr;
BasicBlock::iterator BBI = TI;
while (true) {
CI = dyn_cast<CallInst>(BBI);
@@ -414,14 +583,14 @@ TailCallElim::FindTRECandidate(Instruction *TI,
break;
if (BBI == BB->begin())
- return 0; // Didn't find a potential tail call.
+ return nullptr; // Didn't find a potential tail call.
--BBI;
}
// If this call is marked as a tail call, and if there are dynamic allocas in
// the function, we cannot perform this optimization.
if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
- return 0;
+ return nullptr;
// As a special case, detect code like this:
// double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
@@ -441,7 +610,7 @@ TailCallElim::FindTRECandidate(Instruction *TI,
for (; I != E && FI != FE; ++I, ++FI)
if (*I != &*FI) break;
if (I == E && FI == FE)
- return 0;
+ return nullptr;
}
return CI;
@@ -462,8 +631,8 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
// which is different to the constant returned by other return instructions
// (which is recorded in AccumulatorRecursionEliminationInitVal). This is a
// special case of accumulator recursion, the operation being "return C".
- Value *AccumulatorRecursionEliminationInitVal = 0;
- Instruction *AccumulatorRecursionInstr = 0;
+ Value *AccumulatorRecursionEliminationInitVal = nullptr;
+ Instruction *AccumulatorRecursionInstr = nullptr;
// Ok, we found a potential tail call. We can currently only transform the
// tail call if all of the instructions between the call and the return are
@@ -493,8 +662,8 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
// accumulator recursion variable eliminated.
if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI &&
!isa<UndefValue>(Ret->getReturnValue()) &&
- AccumulatorRecursionEliminationInitVal == 0 &&
- !getCommonReturnValue(0, CI)) {
+ AccumulatorRecursionEliminationInitVal == nullptr &&
+ !getCommonReturnValue(nullptr, CI)) {
// One case remains that we are able to handle: the current return
// instruction returns a constant, and all other return instructions
// return a different constant.
@@ -510,9 +679,12 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
BasicBlock *BB = Ret->getParent();
Function *F = BB->getParent();
+ emitOptimizationRemark(F->getContext(), "tailcallelim", *F, CI->getDebugLoc(),
+ "transforming tail recursion to loop");
+
// OK! We can transform this tail call. If this is the first one found,
// create the new entry block, allowing us to branch back to the old entry.
- if (OldEntry == 0) {
+ if (!OldEntry) {
OldEntry = &F->getEntryBlock();
BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);
NewEntry->takeName(OldEntry);